X-Git-Url: http://bbs.cooldavid.org/git/?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=0c6258bd1ba37ff62884f914dc77b4c057dbb28c;hb=2f9e825d3e0e2b407ae8f082de5c00afcf7378fb;hp=3d2111a2223633d817ccbd6714fcd994509f1ca8;hpb=de75d60d5ea235e6e09f4962ab22541ce0fe176a;p=net-next-2.6.git diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3d2111a2223..0c6258bd1ba 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -808,6 +808,41 @@ void __init page_writeback_init(void) prop_descriptor_init(&vm_dirties, shift); } +/** + * tag_pages_for_writeback - tag pages to be written by write_cache_pages + * @mapping: address space structure to write + * @start: starting page index + * @end: ending page index (inclusive) + * + * This function scans the page range from @start to @end (inclusive) and tags + * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is + * that write_cache_pages (or whoever calls this function) will then use + * TOWRITE tag to identify pages eligible for writeback. This mechanism is + * used to avoid livelocking of writeback by a process steadily creating new + * dirty pages in the file (thus it is important for this function to be quick + * so that it can tag pages faster than a dirtying process can create them). + */ +/* + * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency. + */ +#define WRITEBACK_TAG_BATCH 4096 +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end) +{ + unsigned long tagged; + + do { + spin_lock_irq(&mapping->tree_lock); + tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree, + &start, end, WRITEBACK_TAG_BATCH, + PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); + spin_unlock_irq(&mapping->tree_lock); + WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); + cond_resched(); + } while (tagged >= WRITEBACK_TAG_BATCH); +} +EXPORT_SYMBOL(tag_pages_for_writeback); + /** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write @@ -822,6 +857,13 @@ void __init page_writeback_init(void) * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * To avoid livelocks (when other process dirties new pages), we first tag + * pages which should be written back with TOWRITE tag and only then start + * writing them. For data-integrity sync we have to be careful so that we do + * not miss some pages (e.g., because some other process has cleared TOWRITE + * tag we set). The rule we follow is that TOWRITE tag can be cleared only + * by the process clearing the DIRTY tag (and submitting the page for IO). */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, @@ -837,6 +879,7 @@ int write_cache_pages(struct address_space *mapping, pgoff_t done_index; int cycled; int range_whole = 0; + int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { @@ -853,29 +896,19 @@ int write_cache_pages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; cycled = 1; /* ignore range_cyclic tests */ - - /* - * If this is a data integrity sync, cap the writeback to the - * current end of file. Any extension to the file that occurs - * after this is a new write and we don't need to write those - * pages out to fulfil our data integrity requirements. If we - * try to write them out, we can get stuck in this scan until - * the concurrent writer stops adding dirty pages and extending - * EOF. - */ - if (wbc->sync_mode == WB_SYNC_ALL && - wbc->range_end == LLONG_MAX) { - end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT; - } } - + if (wbc->sync_mode == WB_SYNC_ALL) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; retry: + if (wbc->sync_mode == WB_SYNC_ALL) + tag_pages_for_writeback(mapping, index, end); done_index = index; while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; @@ -1332,6 +1365,9 @@ int test_set_page_writeback(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_TOWRITE); spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page);