diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-31 15:42:53 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:05 -0400 |
commit | 61b4944018449003ac5f9757f4d125dce519cf51 (patch) | |
tree | 553855996c641a945344db870b6dfd0d2d02086e /fs/btrfs/inode.c | |
parent | 37d1aeee3990385e9bb436c50c2f7e120a668df6 (diff) |
Btrfs: Fix streaming read performance with checksumming on
Large streaming reads make for large bios, which means each entry on the
list async work queues represents a large amount of data. IO
congestion throttling on the device was kicking in before the async
worker threads decided a single thread was busy and needed some help.
The end result was that a streaming read would result in a single CPU
running at 100% instead of balancing the work off to other CPUs.
This patch also changes the pre-IO checksum lookup done by reads to
work on a per-bio basis instead of a per-page. This results in many
extra btree lookups on large streaming reads. Doing the checksum lookup
right before bio submit allows us to reuse searches while processing
adjacent offsets.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 57 |
1 files changed, 4 insertions, 53 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c4afa9d78da9..31d52c51acc3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -374,6 +374,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
374 | BUG_ON(ret); | 374 | BUG_ON(ret); |
375 | 375 | ||
376 | if (!(rw & (1 << BIO_RW))) { | 376 | if (!(rw & (1 << BIO_RW))) { |
377 | if (!btrfs_test_opt(root, NODATASUM) && | ||
378 | !btrfs_test_flag(inode, NODATASUM)) { | ||
379 | btrfs_lookup_bio_sums(root, inode, bio); | ||
380 | } | ||
377 | goto mapit; | 381 | goto mapit; |
378 | } | 382 | } |
379 | 383 | ||
@@ -598,58 +602,6 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
598 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 602 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
599 | } | 603 | } |
600 | 604 | ||
601 | int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) | ||
602 | { | ||
603 | int ret = 0; | ||
604 | struct inode *inode = page->mapping->host; | ||
605 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
606 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
607 | struct btrfs_csum_item *item; | ||
608 | struct btrfs_path *path = NULL; | ||
609 | u32 csum; | ||
610 | |||
611 | if (btrfs_test_opt(root, NODATASUM) || | ||
612 | btrfs_test_flag(inode, NODATASUM)) | ||
613 | return 0; | ||
614 | |||
615 | /* | ||
616 | * It is possible there is an ordered extent that has | ||
617 | * not yet finished for this range in the file. If so, | ||
618 | * that extent will have a csum cached, and it will insert | ||
619 | * the sum after all the blocks in the extent are fully | ||
620 | * on disk. So, look for an ordered extent and use the | ||
621 | * sum if found. We have to do this before looking in the | ||
622 | * btree because csum items are pre-inserted based on | ||
623 | * the file size. btrfs_lookup_csum might find an item | ||
624 | * that still hasn't been fully filled. | ||
625 | */ | ||
626 | ret = btrfs_find_ordered_sum(inode, start, &csum); | ||
627 | if (ret == 0) | ||
628 | goto found; | ||
629 | |||
630 | ret = 0; | ||
631 | path = btrfs_alloc_path(); | ||
632 | item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); | ||
633 | if (IS_ERR(item)) { | ||
634 | ret = PTR_ERR(item); | ||
635 | /* a csum that isn't present is a preallocated region. */ | ||
636 | if (ret == -ENOENT || ret == -EFBIG) | ||
637 | ret = 0; | ||
638 | csum = 0; | ||
639 | printk("no csum found for inode %lu start %Lu\n", inode->i_ino, | ||
640 | start); | ||
641 | goto out; | ||
642 | } | ||
643 | read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, | ||
644 | BTRFS_CRC32_SIZE); | ||
645 | found: | ||
646 | set_state_private(io_tree, start, csum); | ||
647 | out: | ||
648 | if (path) | ||
649 | btrfs_free_path(path); | ||
650 | return ret; | ||
651 | } | ||
652 | |||
653 | struct io_failure_record { | 605 | struct io_failure_record { |
654 | struct page *page; | 606 | struct page *page; |
655 | u64 start; | 607 | u64 start; |
@@ -3613,7 +3565,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
3613 | .fill_delalloc = run_delalloc_range, | 3565 | .fill_delalloc = run_delalloc_range, |
3614 | .submit_bio_hook = btrfs_submit_bio_hook, | 3566 | .submit_bio_hook = btrfs_submit_bio_hook, |
3615 | .merge_bio_hook = btrfs_merge_bio_hook, | 3567 | .merge_bio_hook = btrfs_merge_bio_hook, |
3616 | .readpage_io_hook = btrfs_readpage_io_hook, | ||
3617 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 3568 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
3618 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, | 3569 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
3619 | .writepage_start_hook = btrfs_writepage_start_hook, | 3570 | .writepage_start_hook = btrfs_writepage_start_hook, |