aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-11-06 22:03:00 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-06 22:03:00 -0500
commit4a69a41009c4ac691f7d9c289f5f37fabeddce46 (patch)
tree1dac90d2f8e4ad4114fb1f4c168925daf2769d28 /fs/btrfs/disk-io.c
parent537fb0671549a9a6457ce42a25ab34b29d97a256 (diff)
Btrfs: Add ordered async work queues
Btrfs uses kernel threads to create async work queues for cpu intensive operations such as checksumming and decompression. These work well, but they make it difficult to keep IO order intact. A single writepages call from pdflush or fsync will turn into a number of bios, and each bio is checksummed in parallel. Once the checksum is computed, the bio is sent down to the disk, and since we don't control the order in which the parallel operations happen, they might go down to the disk in almost any order. The code deals with this somewhat by having deep work queues for a single kernel thread, making it very likely that a single thread will process all the bios for a single inode. This patch introduces an explicitly ordered work queue. As work structs are placed into the queue they are put onto the tail of a list. They have three callbacks: ->func (cpu intensive processing here) ->ordered_func (order sensitive processing here) ->ordered_free (free the work struct, all processing is done) The work struct has three callbacks. The func callback does the cpu intensive work, and when it completes the work struct is marked as done. Every time a work struct completes, the list is checked to see if the head is marked as done. If so the ordered_func callback is used to do the order sensitive processing and the ordered_free callback is used to do any cleanup. Then we loop back and check the head of the list again. This patch also changes the checksumming code to use the ordered workqueues. One a 4 drive array, it increases streaming writes from 280MB/s to 350MB/s. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c81
1 files changed, 57 insertions, 24 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 94b4e50f6b2c..e0a28f705a64 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -80,7 +80,8 @@ struct async_submit_bio {
80 struct inode *inode; 80 struct inode *inode;
81 struct bio *bio; 81 struct bio *bio;
82 struct list_head list; 82 struct list_head list;
83 extent_submit_bio_hook_t *submit_bio_hook; 83 extent_submit_bio_hook_t *submit_bio_start;
84 extent_submit_bio_hook_t *submit_bio_done;
84 int rw; 85 int rw;
85 int mirror_num; 86 int mirror_num;
86 unsigned long bio_flags; 87 unsigned long bio_flags;
@@ -452,7 +453,18 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
452 btrfs_async_submit_limit(info); 453 btrfs_async_submit_limit(info);
453} 454}
454 455
455static void run_one_async_submit(struct btrfs_work *work) 456static void run_one_async_start(struct btrfs_work *work)
457{
458 struct btrfs_fs_info *fs_info;
459 struct async_submit_bio *async;
460
461 async = container_of(work, struct async_submit_bio, work);
462 fs_info = BTRFS_I(async->inode)->root->fs_info;
463 async->submit_bio_start(async->inode, async->rw, async->bio,
464 async->mirror_num, async->bio_flags);
465}
466
467static void run_one_async_done(struct btrfs_work *work)
456{ 468{
457 struct btrfs_fs_info *fs_info; 469 struct btrfs_fs_info *fs_info;
458 struct async_submit_bio *async; 470 struct async_submit_bio *async;
@@ -470,15 +482,23 @@ static void run_one_async_submit(struct btrfs_work *work)
470 waitqueue_active(&fs_info->async_submit_wait)) 482 waitqueue_active(&fs_info->async_submit_wait))
471 wake_up(&fs_info->async_submit_wait); 483 wake_up(&fs_info->async_submit_wait);
472 484
473 async->submit_bio_hook(async->inode, async->rw, async->bio, 485 async->submit_bio_done(async->inode, async->rw, async->bio,
474 async->mirror_num, async->bio_flags); 486 async->mirror_num, async->bio_flags);
487}
488
489static void run_one_async_free(struct btrfs_work *work)
490{
491 struct async_submit_bio *async;
492
493 async = container_of(work, struct async_submit_bio, work);
475 kfree(async); 494 kfree(async);
476} 495}
477 496
478int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 497int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
479 int rw, struct bio *bio, int mirror_num, 498 int rw, struct bio *bio, int mirror_num,
480 unsigned long bio_flags, 499 unsigned long bio_flags,
481 extent_submit_bio_hook_t *submit_bio_hook) 500 extent_submit_bio_hook_t *submit_bio_start,
501 extent_submit_bio_hook_t *submit_bio_done)
482{ 502{
483 struct async_submit_bio *async; 503 struct async_submit_bio *async;
484 int limit = btrfs_async_submit_limit(fs_info); 504 int limit = btrfs_async_submit_limit(fs_info);
@@ -491,8 +511,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
491 async->rw = rw; 511 async->rw = rw;
492 async->bio = bio; 512 async->bio = bio;
493 async->mirror_num = mirror_num; 513 async->mirror_num = mirror_num;
494 async->submit_bio_hook = submit_bio_hook; 514 async->submit_bio_start = submit_bio_start;
495 async->work.func = run_one_async_submit; 515 async->submit_bio_done = submit_bio_done;
516
517 async->work.func = run_one_async_start;
518 async->work.ordered_func = run_one_async_done;
519 async->work.ordered_free = run_one_async_free;
520
496 async->work.flags = 0; 521 async->work.flags = 0;
497 async->bio_flags = bio_flags; 522 async->bio_flags = bio_flags;
498 523
@@ -533,29 +558,25 @@ static int btree_csum_one_bio(struct bio *bio)
533 return 0; 558 return 0;
534} 559}
535 560
536static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 561static int __btree_submit_bio_start(struct inode *inode, int rw,
537 int mirror_num, unsigned long bio_flags) 562 struct bio *bio, int mirror_num,
563 unsigned long bio_flags)
538{ 564{
539 struct btrfs_root *root = BTRFS_I(inode)->root;
540 int ret;
541
542 /* 565 /*
543 * when we're called for a write, we're already in the async 566 * when we're called for a write, we're already in the async
544 * submission context. Just jump into btrfs_map_bio 567 * submission context. Just jump into btrfs_map_bio
545 */ 568 */
546 if (rw & (1 << BIO_RW)) { 569 btree_csum_one_bio(bio);
547 btree_csum_one_bio(bio); 570 return 0;
548 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 571}
549 mirror_num, 1);
550 }
551 572
573static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
574 int mirror_num, unsigned long bio_flags)
575{
552 /* 576 /*
553 * called for a read, do the setup so that checksum validation 577 * when we're called for a write, we're already in the async
554 * can happen in the async kernel threads 578 * submission context. Just jump into btrfs_map_bio
555 */ 579 */
556 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
557 BUG_ON(ret);
558
559 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); 580 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
560} 581}
561 582
@@ -567,11 +588,22 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
567 * can happen in parallel across all CPUs 588 * can happen in parallel across all CPUs
568 */ 589 */
569 if (!(rw & (1 << BIO_RW))) { 590 if (!(rw & (1 << BIO_RW))) {
570 return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); 591 int ret;
592 /*
593 * called for a read, do the setup so that checksum validation
594 * can happen in the async kernel threads
595 */
596 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
597 bio, 1);
598 BUG_ON(ret);
599
600 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
601 mirror_num, 1);
571 } 602 }
572 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 603 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
573 inode, rw, bio, mirror_num, 0, 604 inode, rw, bio, mirror_num, 0,
574 __btree_submit_bio_hook); 605 __btree_submit_bio_start,
606 __btree_submit_bio_done);
575} 607}
576 608
577static int btree_writepage(struct page *page, struct writeback_control *wbc) 609static int btree_writepage(struct page *page, struct writeback_control *wbc)
@@ -1534,7 +1566,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1534 * were sent by the writeback daemons, improving overall locality 1566 * were sent by the writeback daemons, improving overall locality
1535 * of the IO going down the pipe. 1567 * of the IO going down the pipe.
1536 */ 1568 */
1537 fs_info->workers.idle_thresh = 128; 1569 fs_info->workers.idle_thresh = 8;
1570 fs_info->workers.ordered = 1;
1538 1571
1539 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); 1572 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
1540 btrfs_init_workers(&fs_info->endio_workers, "endio", 1573 btrfs_init_workers(&fs_info->endio_workers, "endio",