]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - mm/page-writeback.c
mm: fix fatal kernel-doc error
[net-next-2.6.git] / mm / page-writeback.c
index 37498ef6154836943f3478304bf7440ab255669a..ea0b7cb4a8c762c592b867988588f476dff617e2 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * threshold otherwise wait until the disk writes catch
                 * up.
                 */
+               trace_wbc_balance_dirty_start(&wbc, bdi);
                if (bdi_nr_reclaimable > bdi_thresh) {
                        writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
+                       trace_wbc_balance_dirty_written(&wbc, bdi);
                }
 
                /*
@@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
+               trace_wbc_balance_dirty_wait(&wbc, bdi);
                __set_current_state(TASK_INTERRUPTIBLE);
                io_schedule_timeout(pause);
 
@@ -804,6 +808,41 @@ void __init page_writeback_init(void)
        prop_descriptor_init(&vm_dirties, shift);
 }
 
+/**
+ * tag_pages_for_writeback - tag pages to be written by write_cache_pages
+ * @mapping: address space structure to write
+ * @start: starting page index
+ * @end: ending page index (inclusive)
+ *
+ * This function scans the page range from @start to @end (inclusive) and tags
+ * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
+ * that write_cache_pages (or whoever calls this function) will then use
+ * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
+ * used to avoid livelocking of writeback by a process steadily creating new
+ * dirty pages in the file (thus it is important for this function to be quick
+ * so that it can tag pages faster than a dirtying process can create them).
+ */
+/*
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
+ */
+void tag_pages_for_writeback(struct address_space *mapping,
+                            pgoff_t start, pgoff_t end)
+{
+#define WRITEBACK_TAG_BATCH 4096
+       unsigned long tagged;
+
+       do {
+               spin_lock_irq(&mapping->tree_lock);
+               tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
+                               &start, end, WRITEBACK_TAG_BATCH,
+                               PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+               spin_unlock_irq(&mapping->tree_lock);
+               WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
+               cond_resched();
+       } while (tagged >= WRITEBACK_TAG_BATCH);
+}
+EXPORT_SYMBOL(tag_pages_for_writeback);
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -818,6 +857,13 @@ void __init page_writeback_init(void)
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
+ *
+ * To avoid livelocks (when other process dirties new pages), we first tag
+ * pages which should be written back with TOWRITE tag and only then start
+ * writing them. For data-integrity sync we have to be careful so that we do
+ * not miss some pages (e.g., because some other process has cleared TOWRITE
+ * tag we set). The rule we follow is that TOWRITE tag can be cleared only
+ * by the process clearing the DIRTY tag (and submitting the page for IO).
  */
 int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
@@ -833,6 +879,7 @@ int write_cache_pages(struct address_space *mapping,
        pgoff_t done_index;
        int cycled;
        int range_whole = 0;
+       int tag;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -849,29 +896,19 @@ int write_cache_pages(struct address_space *mapping,
                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                        range_whole = 1;
                cycled = 1; /* ignore range_cyclic tests */
-
-               /*
-                * If this is a data integrity sync, cap the writeback to the
-                * current end of file. Any extension to the file that occurs
-                * after this is a new write and we don't need to write those
-                * pages out to fulfil our data integrity requirements. If we
-                * try to write them out, we can get stuck in this scan until
-                * the concurrent writer stops adding dirty pages and extending
-                * EOF.
-                */
-               if (wbc->sync_mode == WB_SYNC_ALL &&
-                   wbc->range_end == LLONG_MAX) {
-                       end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT;
-               }
        }
-
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
 retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        done_index = index;
        while (!done && (index <= end)) {
                int i;
 
-               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY,
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
                        break;
@@ -929,6 +966,7 @@ continue_unlock:
                        if (!clear_page_dirty_for_io(page))
                                goto continue_unlock;
 
+                       trace_wbc_writepage(wbc, mapping->backing_dev_info);
                        ret = (*writepage)(page, wbc, data);
                        if (unlikely(ret)) {
                                if (ret == AOP_WRITEPAGE_ACTIVATE) {
@@ -1327,6 +1365,9 @@ int test_set_page_writeback(struct page *page)
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
+               radix_tree_tag_clear(&mapping->page_tree,
+                                    page_index(page),
+                                    PAGECACHE_TAG_TOWRITE);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestSetPageWriteback(page);