diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-11-19 12:44:22 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-11-19 12:44:22 -0500 |
commit | d2c3f4f695edac4d75c1b3eb01a1d16072de63bb (patch) | |
tree | 14a8dd519d067adbe16e8adb7342343529eb5c75 | |
parent | 105d931d482b7d1b1b2dd4b0ea30365db8630b9f (diff) |
Btrfs: Avoid writeback stalls
While building large bios in writepages, btrfs may end up waiting
for other page writeback to finish if WB_SYNC_ALL is used.
While it is waiting, the bio it is building has a number of pages with the
writeback bit set and they aren't getting to the disk any time soon. This
lowers the latencies of writeback in general by sending down the bio being
built before waiting for other pages.
The bio submission code tries to limit the total number of async bios in
flight by waiting when we're over a certain number of async bios. But,
the waits are happening while writepages is building bios, and this can easily
lead to stalls and other problems for people calling wait_on_page_writeback.
The current fix is to let the congestion tests take care of waiting.
sync() and others make sure to drain the current async requests to make
sure that everything that was pending when the sync was started really get
to disk. The code would drain pending requests both before and after
submitting a new request.
But, if one of the requests is waiting for page writeback to finish,
the draining waits might block that page writeback. This changes the
draining code to only wait after submitting the bio being processed.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/disk-io.c | 12 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 24 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 7 |
3 files changed, 23 insertions, 20 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e8d31274c92..8d03e4a3c4e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -538,15 +538,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
538 | async->work.flags = 0; | 538 | async->work.flags = 0; |
539 | async->bio_flags = bio_flags; | 539 | async->bio_flags = bio_flags; |
540 | 540 | ||
541 | while(atomic_read(&fs_info->async_submit_draining) && | ||
542 | atomic_read(&fs_info->nr_async_submits)) { | ||
543 | wait_event(fs_info->async_submit_wait, | ||
544 | (atomic_read(&fs_info->nr_async_submits) == 0)); | ||
545 | } | ||
546 | |||
547 | atomic_inc(&fs_info->nr_async_submits); | 541 | atomic_inc(&fs_info->nr_async_submits); |
548 | btrfs_queue_worker(&fs_info->workers, &async->work); | 542 | btrfs_queue_worker(&fs_info->workers, &async->work); |
549 | 543 | #if 0 | |
550 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | 544 | if (atomic_read(&fs_info->nr_async_submits) > limit) { |
551 | wait_event_timeout(fs_info->async_submit_wait, | 545 | wait_event_timeout(fs_info->async_submit_wait, |
552 | (atomic_read(&fs_info->nr_async_submits) < limit), | 546 | (atomic_read(&fs_info->nr_async_submits) < limit), |
@@ -556,7 +550,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
556 | (atomic_read(&fs_info->nr_async_bios) < limit), | 550 | (atomic_read(&fs_info->nr_async_bios) < limit), |
557 | HZ/10); | 551 | HZ/10); |
558 | } | 552 | } |
559 | 553 | #endif | |
560 | while(atomic_read(&fs_info->async_submit_draining) && | 554 | while(atomic_read(&fs_info->async_submit_draining) && |
561 | atomic_read(&fs_info->nr_async_submits)) { | 555 | atomic_read(&fs_info->nr_async_submits)) { |
562 | wait_event(fs_info->async_submit_wait, | 556 | wait_event(fs_info->async_submit_wait, |
@@ -1765,11 +1759,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1765 | ret = btrfs_cleanup_reloc_trees(tree_root); | 1759 | ret = btrfs_cleanup_reloc_trees(tree_root); |
1766 | BUG_ON(ret); | 1760 | BUG_ON(ret); |
1767 | 1761 | ||
1762 | read_fs_root: | ||
1768 | location.objectid = BTRFS_FS_TREE_OBJECTID; | 1763 | location.objectid = BTRFS_FS_TREE_OBJECTID; |
1769 | location.type = BTRFS_ROOT_ITEM_KEY; | 1764 | location.type = BTRFS_ROOT_ITEM_KEY; |
1770 | location.offset = (u64)-1; | 1765 | location.offset = (u64)-1; |
1771 | 1766 | ||
1772 | read_fs_root: | ||
1773 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 1767 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
1774 | if (!fs_info->fs_root) | 1768 | if (!fs_info->fs_root) |
1775 | goto fail_cleaner; | 1769 | goto fail_cleaner; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 54d013c3bb88..a0f3804efe4f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2398,7 +2398,8 @@ update_nr_written: | |||
2398 | int extent_write_cache_pages(struct extent_io_tree *tree, | 2398 | int extent_write_cache_pages(struct extent_io_tree *tree, |
2399 | struct address_space *mapping, | 2399 | struct address_space *mapping, |
2400 | struct writeback_control *wbc, | 2400 | struct writeback_control *wbc, |
2401 | writepage_t writepage, void *data) | 2401 | writepage_t writepage, void *data, |
2402 | void (*flush_fn)(void *)) | ||
2402 | { | 2403 | { |
2403 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 2404 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
2404 | int ret = 0; | 2405 | int ret = 0; |
@@ -2460,8 +2461,10 @@ retry: | |||
2460 | continue; | 2461 | continue; |
2461 | } | 2462 | } |
2462 | 2463 | ||
2463 | if (wbc->sync_mode != WB_SYNC_NONE) | 2464 | if (wbc->sync_mode != WB_SYNC_NONE) { |
2465 | flush_fn(data); | ||
2464 | wait_on_page_writeback(page); | 2466 | wait_on_page_writeback(page); |
2467 | } | ||
2465 | 2468 | ||
2466 | if (PageWriteback(page) || | 2469 | if (PageWriteback(page) || |
2467 | !clear_page_dirty_for_io(page)) { | 2470 | !clear_page_dirty_for_io(page)) { |
@@ -2498,6 +2501,15 @@ retry: | |||
2498 | } | 2501 | } |
2499 | EXPORT_SYMBOL(extent_write_cache_pages); | 2502 | EXPORT_SYMBOL(extent_write_cache_pages); |
2500 | 2503 | ||
2504 | static noinline void flush_write_bio(void *data) | ||
2505 | { | ||
2506 | struct extent_page_data *epd = data; | ||
2507 | if (epd->bio) { | ||
2508 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2509 | epd->bio = NULL; | ||
2510 | } | ||
2511 | } | ||
2512 | |||
2501 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2513 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2502 | get_extent_t *get_extent, | 2514 | get_extent_t *get_extent, |
2503 | struct writeback_control *wbc) | 2515 | struct writeback_control *wbc) |
@@ -2523,7 +2535,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2523 | ret = __extent_writepage(page, wbc, &epd); | 2535 | ret = __extent_writepage(page, wbc, &epd); |
2524 | 2536 | ||
2525 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2537 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2526 | __extent_writepage, &epd); | 2538 | __extent_writepage, &epd, flush_write_bio); |
2527 | if (epd.bio) { | 2539 | if (epd.bio) { |
2528 | submit_one_bio(WRITE, epd.bio, 0, 0); | 2540 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2529 | } | 2541 | } |
@@ -2592,7 +2604,8 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2592 | }; | 2604 | }; |
2593 | 2605 | ||
2594 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2606 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2595 | __extent_writepage, &epd); | 2607 | __extent_writepage, &epd, |
2608 | flush_write_bio); | ||
2596 | if (epd.bio) { | 2609 | if (epd.bio) { |
2597 | submit_one_bio(WRITE, epd.bio, 0, 0); | 2610 | submit_one_bio(WRITE, epd.bio, 0, 0); |
2598 | } | 2611 | } |
@@ -3087,6 +3100,9 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3087 | 3100 | ||
3088 | for (i = 0; i < num_pages; i++) { | 3101 | for (i = 0; i < num_pages; i++) { |
3089 | page = extent_buffer_page(eb, i); | 3102 | page = extent_buffer_page(eb, i); |
3103 | if (!set && !PageDirty(page)) | ||
3104 | continue; | ||
3105 | |||
3090 | lock_page(page); | 3106 | lock_page(page); |
3091 | if (i == 0) | 3107 | if (i == 0) |
3092 | set_page_extent_head(page, eb->len); | 3108 | set_page_extent_head(page, eb->len); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6854bf41856a..806caacff86c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -883,13 +883,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
883 | async_cow->work.ordered_free = async_cow_free; | 883 | async_cow->work.ordered_free = async_cow_free; |
884 | async_cow->work.flags = 0; | 884 | async_cow->work.flags = 0; |
885 | 885 | ||
886 | while(atomic_read(&root->fs_info->async_submit_draining) && | ||
887 | atomic_read(&root->fs_info->async_delalloc_pages)) { | ||
888 | wait_event(root->fs_info->async_submit_wait, | ||
889 | (atomic_read(&root->fs_info->async_delalloc_pages) | ||
890 | == 0)); | ||
891 | } | ||
892 | |||
893 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 886 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
894 | PAGE_CACHE_SHIFT; | 887 | PAGE_CACHE_SHIFT; |
895 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 888 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |