Btrfs: properly honor wbc->nr_to_write changes

When btrfs fills a delayed allocation, it tries to increase the wbc nr_to_write to cover a big part of allocation. The theory is that we're doing contiguous IO and writing a few more blocks will save seeks overall at a very low cost. The problem is that extent_write_cache_pages could ignore the new higher nr_to_write if nr_to_write had already gone down to zero. We fix that by rechecking the nr_to_write for every page that is processed in the pagevec. This updates the math around bumping the nr_to_write value to make sure we don't leave a tiny amount of IO hanging around for the very end of a new extent. Signed-off-by: Chris Mason <chris.mason@oracle.com>
author: Chris Mason <chris.mason@oracle.com> 2009-09-18 16:03:16 -0400
committer: Chris Mason <chris.mason@oracle.com> 2009-09-18 16:08:46 -0400
commit: f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76 (patch)
tree: 40538328090b5eb2f3f95a9bc5a290c83384ff43 /fs
parent: 11833d66be94b514652466802100378046c16b72 (diff)
1 files changed, 27 insertions, 11 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a102422cd92e..7e16c6d8153f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2182,7 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        delalloc_end = 0;
        page_started = 0;
        if (!epd->extent_locked) {
-                u64 delalloc_to_write;
+                u64 delalloc_to_write = 0;
                /*
                 * make sure the wbc mapping index is at least updated
                 * to this page.
@@ -2202,16 +2202,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        tree->ops->fill_delalloc(inode, page, delalloc_start,
                                                 delalloc_end, &page_started,
                                                 &nr_written);
-                        delalloc_to_write = (delalloc_end -
+                        /*
-                                        max_t(u64, page_offset(page),
+                         * delalloc_end is already one less than the total
-                                              delalloc_start) + 1) >>
+                         * length, so we don't subtract one from
-                                        PAGE_CACHE_SHIFT;
+                         * PAGE_CACHE_SIZE
-                        if (wbc->nr_to_write < delalloc_to_write) {
+                         */
-                                wbc->nr_to_write = min_t(long, 8192,
+                        delalloc_to_write += (delalloc_end - delalloc_start +
-                                                 delalloc_to_write);
+                                              PAGE_CACHE_SIZE) >>
-                        }
+                                              PAGE_CACHE_SHIFT;
                        delalloc_start = delalloc_end + 1;
                }
+                if (wbc->nr_to_write < delalloc_to_write) {
+                        int thresh = 8192;
+                        if (delalloc_to_write < thresh * 2)
+                                thresh = delalloc_to_write;
+                        wbc->nr_to_write = min_t(u64, delalloc_to_write,
+                                                 thresh);
+                }
                /* did the fill delalloc function already unlock and start
                 * the IO?
@@ -2388,6 +2396,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 {
        int ret = 0;
        int done = 0;
+        int nr_to_write_done = 0;
        struct pagevec pvec;
        int nr_pages;
        pgoff_t index;
@@ -2407,7 +2416,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                scanned = 1;
        }
 retry:
-        while (!done && (index <= end) &&
+        while (!done && !nr_to_write_done && (index <= end) &&
               (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
                              PAGECACHE_TAG_DIRTY, min(end - index,
                                  (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2458,8 +2467,15 @@ retry:
                                unlock_page(page);
                                ret = 0;
                        }
-                        if (ret || wbc->nr_to_write <= 0)
+                        if (ret)
                                done = 1;
+                        /*
+                         * the filesystem may choose to bump up nr_to_write.
+                         * We have to make sure to honor the new nr_to_write
+                         * at any time
+                         */
+                        nr_to_write_done = wbc->nr_to_write <= 0;
                }
                pagevec_release(&pvec);
                cond_resched();
author	Chris Mason <chris.mason@oracle.com>	2009-09-18 16:03:16 -0400
committer	Chris Mason <chris.mason@oracle.com>	2009-09-18 16:08:46 -0400
commit	f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76 (patch)
tree	40538328090b5eb2f3f95a9bc5a290c83384ff43 /fs
parent	11833d66be94b514652466802100378046c16b72 (diff)