diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-09-18 16:03:16 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-09-18 16:08:46 -0400 |
commit | f85d7d6c8f2ad4a86a1f4f4e3791f36dede2fa76 (patch) | |
tree | 40538328090b5eb2f3f95a9bc5a290c83384ff43 | |
parent | 11833d66be94b514652466802100378046c16b72 (diff) |
Btrfs: properly honor wbc->nr_to_write changes
When btrfs fills a delayed allocation, it tries to increase
the wbc nr_to_write to cover a big part of allocation. The
theory is that we're doing contiguous IO and writing a few
more blocks will save seeks overall at a very low cost.
The problem is that extent_write_cache_pages could ignore
the new higher nr_to_write if nr_to_write had already gone
down to zero. We fix that by rechecking the nr_to_write
for every page that is processed in the pagevec.
This updates the math around bumping the nr_to_write value
to make sure we don't leave a tiny amount of IO hanging
around for the very end of a new extent.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/extent_io.c | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a102422cd92e..7e16c6d8153f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2182,7 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2182 | delalloc_end = 0; | 2182 | delalloc_end = 0; |
2183 | page_started = 0; | 2183 | page_started = 0; |
2184 | if (!epd->extent_locked) { | 2184 | if (!epd->extent_locked) { |
2185 | u64 delalloc_to_write; | 2185 | u64 delalloc_to_write = 0; |
2186 | /* | 2186 | /* |
2187 | * make sure the wbc mapping index is at least updated | 2187 | * make sure the wbc mapping index is at least updated |
2188 | * to this page. | 2188 | * to this page. |
@@ -2202,16 +2202,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2202 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2202 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
2203 | delalloc_end, &page_started, | 2203 | delalloc_end, &page_started, |
2204 | &nr_written); | 2204 | &nr_written); |
2205 | delalloc_to_write = (delalloc_end - | 2205 | /* |
2206 | max_t(u64, page_offset(page), | 2206 | * delalloc_end is already one less than the total |
2207 | delalloc_start) + 1) >> | 2207 | * length, so we don't subtract one from |
2208 | PAGE_CACHE_SHIFT; | 2208 | * PAGE_CACHE_SIZE |
2209 | if (wbc->nr_to_write < delalloc_to_write) { | 2209 | */ |
2210 | wbc->nr_to_write = min_t(long, 8192, | 2210 | delalloc_to_write += (delalloc_end - delalloc_start + |
2211 | delalloc_to_write); | 2211 | PAGE_CACHE_SIZE) >> |
2212 | } | 2212 | PAGE_CACHE_SHIFT; |
2213 | delalloc_start = delalloc_end + 1; | 2213 | delalloc_start = delalloc_end + 1; |
2214 | } | 2214 | } |
2215 | if (wbc->nr_to_write < delalloc_to_write) { | ||
2216 | int thresh = 8192; | ||
2217 | |||
2218 | if (delalloc_to_write < thresh * 2) | ||
2219 | thresh = delalloc_to_write; | ||
2220 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
2221 | thresh); | ||
2222 | } | ||
2215 | 2223 | ||
2216 | /* did the fill delalloc function already unlock and start | 2224 | /* did the fill delalloc function already unlock and start |
2217 | * the IO? | 2225 | * the IO? |
@@ -2388,6 +2396,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2388 | { | 2396 | { |
2389 | int ret = 0; | 2397 | int ret = 0; |
2390 | int done = 0; | 2398 | int done = 0; |
2399 | int nr_to_write_done = 0; | ||
2391 | struct pagevec pvec; | 2400 | struct pagevec pvec; |
2392 | int nr_pages; | 2401 | int nr_pages; |
2393 | pgoff_t index; | 2402 | pgoff_t index; |
@@ -2407,7 +2416,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2407 | scanned = 1; | 2416 | scanned = 1; |
2408 | } | 2417 | } |
2409 | retry: | 2418 | retry: |
2410 | while (!done && (index <= end) && | 2419 | while (!done && !nr_to_write_done && (index <= end) && |
2411 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2420 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
2412 | PAGECACHE_TAG_DIRTY, min(end - index, | 2421 | PAGECACHE_TAG_DIRTY, min(end - index, |
2413 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 2422 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
@@ -2458,8 +2467,15 @@ retry: | |||
2458 | unlock_page(page); | 2467 | unlock_page(page); |
2459 | ret = 0; | 2468 | ret = 0; |
2460 | } | 2469 | } |
2461 | if (ret || wbc->nr_to_write <= 0) | 2470 | if (ret) |
2462 | done = 1; | 2471 | done = 1; |
2472 | |||
2473 | /* | ||
2474 | * the filesystem may choose to bump up nr_to_write. | ||
2475 | * We have to make sure to honor the new nr_to_write | ||
2476 | * at any time | ||
2477 | */ | ||
2478 | nr_to_write_done = wbc->nr_to_write <= 0; | ||
2463 | } | 2479 | } |
2464 | pagevec_release(&pvec); | 2480 | pagevec_release(&pvec); |
2465 | cond_resched(); | 2481 | cond_resched(); |