aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-11-07 18:22:45 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-07 18:22:45 -0500
commit5f2cc086ccab27ac5252b3883ac004347860b4c7 (patch)
tree5d9d1a5ebce044fabf6491e454af60289895bba5 /fs/btrfs/transaction.c
parent42e70e7a2f9d96fd843723fa46d5121cb3e551d0 (diff)
Btrfs: Avoid unplug storms during commit
While doing a commit, btrfs makes sure all the metadata blocks were properly written to disk, calling wait_on_page_writeback for each page. This writeback happens after allowing another transaction to start, so it competes for the disk with other processes in the FS. If the page writeback bit is still set, each wait_on_page_writeback might trigger an unplug, even though the page might be waiting for checksumming to finish or might be waiting for the async work queue to submit the bio. This trades wait_on_page_writeback for waiting on the extent writeback bits. It won't trigger any unplugs and substantially improves performance in a number of workloads. This also changes the async bio submission to avoid requeueing if there is only one device. The requeue just wastes CPU time because there are no other devices to service. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c30
1 files changed, 29 insertions, 1 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e72a013d24bf..202c1b6df4a4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -20,6 +20,7 @@
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/blkdev.h>
23#include "ctree.h" 24#include "ctree.h"
24#include "disk-io.h" 25#include "disk-io.h"
25#include "transaction.h" 26#include "transaction.h"
@@ -331,6 +332,7 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
331 int werr = 0; 332 int werr = 0;
332 struct page *page; 333 struct page *page;
333 struct inode *btree_inode = root->fs_info->btree_inode; 334 struct inode *btree_inode = root->fs_info->btree_inode;
335 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
334 u64 start = 0; 336 u64 start = 0;
335 u64 end; 337 u64 end;
336 unsigned long index; 338 unsigned long index;
@@ -371,6 +373,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
371 page_cache_release(page); 373 page_cache_release(page);
372 } 374 }
373 } 375 }
376 /*
377 * we unplug once and then use the wait_on_extent_bit for
378 * everything else
379 */
380 blk_run_address_space(btree_inode->i_mapping);
374 while(1) { 381 while(1) {
375 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 382 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
376 EXTENT_DIRTY); 383 EXTENT_DIRTY);
@@ -391,7 +398,28 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
391 if (err) 398 if (err)
392 werr = err; 399 werr = err;
393 } 400 }
394 wait_on_page_writeback(page); 401 if (PageWriteback(page)) {
402 /*
403 * we don't wait on the page writeback bit
404 * because that triggers a lot of unplugs.
405 * The extent bits are much nicer to
406 * the disks, but come with a slightly
407 * higher latency because we aren't forcing
408 * unplugs.
409 */
410 wait_on_extent_writeback(io_tree,
411 page_offset(page),
412 page_offset(page) +
413 PAGE_CACHE_SIZE - 1);
414 }
415 if (PageWriteback(page)) {
416 /*
417 * the state bits get cleared before the
418 * page bits, lets add some extra
419 * paranoia here
420 */
421 wait_on_page_writeback(page);
422 }
395 page_cache_release(page); 423 page_cache_release(page);
396 cond_resched(); 424 cond_resched();
397 } 425 }