aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-11-19 12:44:22 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-19 12:44:22 -0500
commitd2c3f4f695edac4d75c1b3eb01a1d16072de63bb (patch)
tree14a8dd519d067adbe16e8adb7342343529eb5c75
parent105d931d482b7d1b1b2dd4b0ea30365db8630b9f (diff)
Btrfs: Avoid writeback stalls
While building large bios in writepages, btrfs may end up waiting for other page writeback to finish if WB_SYNC_ALL is used. While it is waiting, the bio it is building has a number of pages with the writeback bit set and they aren't getting to the disk any time soon. This lowers the latencies of writeback in general by sending down the bio being built before waiting for other pages. The bio submission code tries to limit the total number of async bios in flight by waiting when we're over a certain number of async bios. But, the waits are happening while writepages is building bios, and this can easily lead to stalls and other problems for people calling wait_on_page_writeback. The current fix is to let the congestion tests take care of waiting. sync() and others make sure to drain the current async requests to make sure that everything that was pending when the sync was started really get to disk. The code would drain pending requests both before and after submitting a new request. But, if one of the requests is waiting for page writeback to finish, the draining waits might block that page writeback. This changes the draining code to only wait after submitting the bio being processed. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent_io.c24
-rw-r--r--fs/btrfs/inode.c7
3 files changed, 23 insertions, 20 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0e8d31274c92..8d03e4a3c4e9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -538,15 +538,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
538 async->work.flags = 0; 538 async->work.flags = 0;
539 async->bio_flags = bio_flags; 539 async->bio_flags = bio_flags;
540 540
541 while(atomic_read(&fs_info->async_submit_draining) &&
542 atomic_read(&fs_info->nr_async_submits)) {
543 wait_event(fs_info->async_submit_wait,
544 (atomic_read(&fs_info->nr_async_submits) == 0));
545 }
546
547 atomic_inc(&fs_info->nr_async_submits); 541 atomic_inc(&fs_info->nr_async_submits);
548 btrfs_queue_worker(&fs_info->workers, &async->work); 542 btrfs_queue_worker(&fs_info->workers, &async->work);
549 543#if 0
550 if (atomic_read(&fs_info->nr_async_submits) > limit) { 544 if (atomic_read(&fs_info->nr_async_submits) > limit) {
551 wait_event_timeout(fs_info->async_submit_wait, 545 wait_event_timeout(fs_info->async_submit_wait,
552 (atomic_read(&fs_info->nr_async_submits) < limit), 546 (atomic_read(&fs_info->nr_async_submits) < limit),
@@ -556,7 +550,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
556 (atomic_read(&fs_info->nr_async_bios) < limit), 550 (atomic_read(&fs_info->nr_async_bios) < limit),
557 HZ/10); 551 HZ/10);
558 } 552 }
559 553#endif
560 while(atomic_read(&fs_info->async_submit_draining) && 554 while(atomic_read(&fs_info->async_submit_draining) &&
561 atomic_read(&fs_info->nr_async_submits)) { 555 atomic_read(&fs_info->nr_async_submits)) {
562 wait_event(fs_info->async_submit_wait, 556 wait_event(fs_info->async_submit_wait,
@@ -1765,11 +1759,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1765 ret = btrfs_cleanup_reloc_trees(tree_root); 1759 ret = btrfs_cleanup_reloc_trees(tree_root);
1766 BUG_ON(ret); 1760 BUG_ON(ret);
1767 1761
1762read_fs_root:
1768 location.objectid = BTRFS_FS_TREE_OBJECTID; 1763 location.objectid = BTRFS_FS_TREE_OBJECTID;
1769 location.type = BTRFS_ROOT_ITEM_KEY; 1764 location.type = BTRFS_ROOT_ITEM_KEY;
1770 location.offset = (u64)-1; 1765 location.offset = (u64)-1;
1771 1766
1772read_fs_root:
1773 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 1767 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1774 if (!fs_info->fs_root) 1768 if (!fs_info->fs_root)
1775 goto fail_cleaner; 1769 goto fail_cleaner;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 54d013c3bb88..a0f3804efe4f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2398,7 +2398,8 @@ update_nr_written:
2398int extent_write_cache_pages(struct extent_io_tree *tree, 2398int extent_write_cache_pages(struct extent_io_tree *tree,
2399 struct address_space *mapping, 2399 struct address_space *mapping,
2400 struct writeback_control *wbc, 2400 struct writeback_control *wbc,
2401 writepage_t writepage, void *data) 2401 writepage_t writepage, void *data,
2402 void (*flush_fn)(void *))
2402{ 2403{
2403 struct backing_dev_info *bdi = mapping->backing_dev_info; 2404 struct backing_dev_info *bdi = mapping->backing_dev_info;
2404 int ret = 0; 2405 int ret = 0;
@@ -2460,8 +2461,10 @@ retry:
2460 continue; 2461 continue;
2461 } 2462 }
2462 2463
2463 if (wbc->sync_mode != WB_SYNC_NONE) 2464 if (wbc->sync_mode != WB_SYNC_NONE) {
2465 flush_fn(data);
2464 wait_on_page_writeback(page); 2466 wait_on_page_writeback(page);
2467 }
2465 2468
2466 if (PageWriteback(page) || 2469 if (PageWriteback(page) ||
2467 !clear_page_dirty_for_io(page)) { 2470 !clear_page_dirty_for_io(page)) {
@@ -2498,6 +2501,15 @@ retry:
2498} 2501}
2499EXPORT_SYMBOL(extent_write_cache_pages); 2502EXPORT_SYMBOL(extent_write_cache_pages);
2500 2503
2504static noinline void flush_write_bio(void *data)
2505{
2506 struct extent_page_data *epd = data;
2507 if (epd->bio) {
2508 submit_one_bio(WRITE, epd->bio, 0, 0);
2509 epd->bio = NULL;
2510 }
2511}
2512
2501int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2513int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2502 get_extent_t *get_extent, 2514 get_extent_t *get_extent,
2503 struct writeback_control *wbc) 2515 struct writeback_control *wbc)
@@ -2523,7 +2535,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2523 ret = __extent_writepage(page, wbc, &epd); 2535 ret = __extent_writepage(page, wbc, &epd);
2524 2536
2525 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2537 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2526 __extent_writepage, &epd); 2538 __extent_writepage, &epd, flush_write_bio);
2527 if (epd.bio) { 2539 if (epd.bio) {
2528 submit_one_bio(WRITE, epd.bio, 0, 0); 2540 submit_one_bio(WRITE, epd.bio, 0, 0);
2529 } 2541 }
@@ -2592,7 +2604,8 @@ int extent_writepages(struct extent_io_tree *tree,
2592 }; 2604 };
2593 2605
2594 ret = extent_write_cache_pages(tree, mapping, wbc, 2606 ret = extent_write_cache_pages(tree, mapping, wbc,
2595 __extent_writepage, &epd); 2607 __extent_writepage, &epd,
2608 flush_write_bio);
2596 if (epd.bio) { 2609 if (epd.bio) {
2597 submit_one_bio(WRITE, epd.bio, 0, 0); 2610 submit_one_bio(WRITE, epd.bio, 0, 0);
2598 } 2611 }
@@ -3087,6 +3100,9 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3087 3100
3088 for (i = 0; i < num_pages; i++) { 3101 for (i = 0; i < num_pages; i++) {
3089 page = extent_buffer_page(eb, i); 3102 page = extent_buffer_page(eb, i);
3103 if (!set && !PageDirty(page))
3104 continue;
3105
3090 lock_page(page); 3106 lock_page(page);
3091 if (i == 0) 3107 if (i == 0)
3092 set_page_extent_head(page, eb->len); 3108 set_page_extent_head(page, eb->len);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6854bf41856a..806caacff86c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -883,13 +883,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
883 async_cow->work.ordered_free = async_cow_free; 883 async_cow->work.ordered_free = async_cow_free;
884 async_cow->work.flags = 0; 884 async_cow->work.flags = 0;
885 885
886 while(atomic_read(&root->fs_info->async_submit_draining) &&
887 atomic_read(&root->fs_info->async_delalloc_pages)) {
888 wait_event(root->fs_info->async_submit_wait,
889 (atomic_read(&root->fs_info->async_delalloc_pages)
890 == 0));
891 }
892
893 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> 886 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
894 PAGE_CACHE_SHIFT; 887 PAGE_CACHE_SHIFT;
895 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); 888 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);