diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2013-07-25 07:22:34 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2013-09-01 08:04:33 -0400 |
commit | facc8a2247340a9735fe8cc123c5da2102f5ef1b (patch) | |
tree | fc6a1ea604e0bd5c3d22da891669e0516d776916 /fs/btrfs/inode.c | |
parent | f2a09da9d0cba17ad4041e7e54f1ca840b12d0be (diff) |
Btrfs: don't cache the csum value into the extent state tree
Before applying this patch, we cached the csum value into the extent state
tree when reading some data from the disk, this operation increased the lock
contention of the state tree.
Now, we just store the csum value into the bio structure or other unshared
structure, so we can reduce the lock contention.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 85 |
1 files changed, 34 insertions, 51 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3280b2b7592..b47330c8e02c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2826,16 +2826,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2826 | * if there's a match, we allow the bio to finish. If not, the code in | 2826 | * if there's a match, we allow the bio to finish. If not, the code in |
2827 | * extent_io.c will try to find good copies for us. | 2827 | * extent_io.c will try to find good copies for us. |
2828 | */ | 2828 | */ |
2829 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 2829 | static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
2830 | struct extent_state *state, int mirror) | 2830 | u64 phy_offset, struct page *page, |
2831 | u64 start, u64 end, int mirror) | ||
2831 | { | 2832 | { |
2832 | size_t offset = start - page_offset(page); | 2833 | size_t offset = start - page_offset(page); |
2833 | struct inode *inode = page->mapping->host; | 2834 | struct inode *inode = page->mapping->host; |
2834 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2835 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2835 | char *kaddr; | 2836 | char *kaddr; |
2836 | u64 private = ~(u32)0; | ||
2837 | int ret; | ||
2838 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2837 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2838 | u32 csum_expected; | ||
2839 | u32 csum = ~(u32)0; | 2839 | u32 csum = ~(u32)0; |
2840 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 2840 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
2841 | DEFAULT_RATELIMIT_BURST); | 2841 | DEFAULT_RATELIMIT_BURST); |
@@ -2855,19 +2855,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2855 | return 0; | 2855 | return 0; |
2856 | } | 2856 | } |
2857 | 2857 | ||
2858 | if (state && state->start == start) { | 2858 | phy_offset >>= inode->i_sb->s_blocksize_bits; |
2859 | private = state->private; | 2859 | csum_expected = *(((u32 *)io_bio->csum) + phy_offset); |
2860 | ret = 0; | ||
2861 | } else { | ||
2862 | ret = get_state_private(io_tree, start, &private); | ||
2863 | } | ||
2864 | kaddr = kmap_atomic(page); | ||
2865 | if (ret) | ||
2866 | goto zeroit; | ||
2867 | 2860 | ||
2861 | kaddr = kmap_atomic(page); | ||
2868 | csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); | 2862 | csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); |
2869 | btrfs_csum_final(csum, (char *)&csum); | 2863 | btrfs_csum_final(csum, (char *)&csum); |
2870 | if (csum != private) | 2864 | if (csum != csum_expected) |
2871 | goto zeroit; | 2865 | goto zeroit; |
2872 | 2866 | ||
2873 | kunmap_atomic(kaddr); | 2867 | kunmap_atomic(kaddr); |
@@ -2876,14 +2870,13 @@ good: | |||
2876 | 2870 | ||
2877 | zeroit: | 2871 | zeroit: |
2878 | if (__ratelimit(&_rs)) | 2872 | if (__ratelimit(&_rs)) |
2879 | btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", | 2873 | btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", |
2880 | (unsigned long long)btrfs_ino(page->mapping->host), | 2874 | (unsigned long long)btrfs_ino(page->mapping->host), |
2881 | (unsigned long long)start, csum, | 2875 | (unsigned long long)start, csum, csum_expected); |
2882 | (unsigned long long)private); | ||
2883 | memset(kaddr + offset, 1, end - start + 1); | 2876 | memset(kaddr + offset, 1, end - start + 1); |
2884 | flush_dcache_page(page); | 2877 | flush_dcache_page(page); |
2885 | kunmap_atomic(kaddr); | 2878 | kunmap_atomic(kaddr); |
2886 | if (private == 0) | 2879 | if (csum_expected == 0) |
2887 | return 0; | 2880 | return 0; |
2888 | return -EIO; | 2881 | return -EIO; |
2889 | } | 2882 | } |
@@ -6812,26 +6805,6 @@ unlock_err: | |||
6812 | return ret; | 6805 | return ret; |
6813 | } | 6806 | } |
6814 | 6807 | ||
6815 | struct btrfs_dio_private { | ||
6816 | struct inode *inode; | ||
6817 | u64 logical_offset; | ||
6818 | u64 disk_bytenr; | ||
6819 | u64 bytes; | ||
6820 | void *private; | ||
6821 | |||
6822 | /* number of bios pending for this dio */ | ||
6823 | atomic_t pending_bios; | ||
6824 | |||
6825 | /* IO errors */ | ||
6826 | int errors; | ||
6827 | |||
6828 | /* orig_bio is our btrfs_io_bio */ | ||
6829 | struct bio *orig_bio; | ||
6830 | |||
6831 | /* dio_bio came from fs/direct-io.c */ | ||
6832 | struct bio *dio_bio; | ||
6833 | }; | ||
6834 | |||
6835 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 6808 | static void btrfs_endio_direct_read(struct bio *bio, int err) |
6836 | { | 6809 | { |
6837 | struct btrfs_dio_private *dip = bio->bi_private; | 6810 | struct btrfs_dio_private *dip = bio->bi_private; |
@@ -6840,6 +6813,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
6840 | struct inode *inode = dip->inode; | 6813 | struct inode *inode = dip->inode; |
6841 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6814 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6842 | struct bio *dio_bio; | 6815 | struct bio *dio_bio; |
6816 | u32 *csums = (u32 *)dip->csum; | ||
6817 | int index = 0; | ||
6843 | u64 start; | 6818 | u64 start; |
6844 | 6819 | ||
6845 | start = dip->logical_offset; | 6820 | start = dip->logical_offset; |
@@ -6848,12 +6823,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
6848 | struct page *page = bvec->bv_page; | 6823 | struct page *page = bvec->bv_page; |
6849 | char *kaddr; | 6824 | char *kaddr; |
6850 | u32 csum = ~(u32)0; | 6825 | u32 csum = ~(u32)0; |
6851 | u64 private = ~(u32)0; | ||
6852 | unsigned long flags; | 6826 | unsigned long flags; |
6853 | 6827 | ||
6854 | if (get_state_private(&BTRFS_I(inode)->io_tree, | ||
6855 | start, &private)) | ||
6856 | goto failed; | ||
6857 | local_irq_save(flags); | 6828 | local_irq_save(flags); |
6858 | kaddr = kmap_atomic(page); | 6829 | kaddr = kmap_atomic(page); |
6859 | csum = btrfs_csum_data(kaddr + bvec->bv_offset, | 6830 | csum = btrfs_csum_data(kaddr + bvec->bv_offset, |
@@ -6863,18 +6834,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
6863 | local_irq_restore(flags); | 6834 | local_irq_restore(flags); |
6864 | 6835 | ||
6865 | flush_dcache_page(bvec->bv_page); | 6836 | flush_dcache_page(bvec->bv_page); |
6866 | if (csum != private) { | 6837 | if (csum != csums[index]) { |
6867 | failed: | 6838 | btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", |
6868 | btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", | 6839 | (unsigned long long)btrfs_ino(inode), |
6869 | (unsigned long long)btrfs_ino(inode), | 6840 | (unsigned long long)start, |
6870 | (unsigned long long)start, | 6841 | csum, csums[index]); |
6871 | csum, (unsigned)private); | ||
6872 | err = -EIO; | 6842 | err = -EIO; |
6873 | } | 6843 | } |
6874 | } | 6844 | } |
6875 | 6845 | ||
6876 | start += bvec->bv_len; | 6846 | start += bvec->bv_len; |
6877 | bvec++; | 6847 | bvec++; |
6848 | index++; | ||
6878 | } while (bvec <= bvec_end); | 6849 | } while (bvec <= bvec_end); |
6879 | 6850 | ||
6880 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | 6851 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
@@ -6991,6 +6962,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
6991 | int rw, u64 file_offset, int skip_sum, | 6962 | int rw, u64 file_offset, int skip_sum, |
6992 | int async_submit) | 6963 | int async_submit) |
6993 | { | 6964 | { |
6965 | struct btrfs_dio_private *dip = bio->bi_private; | ||
6994 | int write = rw & REQ_WRITE; | 6966 | int write = rw & REQ_WRITE; |
6995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6967 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6996 | int ret; | 6968 | int ret; |
@@ -7025,7 +6997,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
7025 | if (ret) | 6997 | if (ret) |
7026 | goto err; | 6998 | goto err; |
7027 | } else if (!skip_sum) { | 6999 | } else if (!skip_sum) { |
7028 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); | 7000 | ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, |
7001 | file_offset); | ||
7029 | if (ret) | 7002 | if (ret) |
7030 | goto err; | 7003 | goto err; |
7031 | } | 7004 | } |
@@ -7060,6 +7033,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7060 | bio_put(orig_bio); | 7033 | bio_put(orig_bio); |
7061 | return -EIO; | 7034 | return -EIO; |
7062 | } | 7035 | } |
7036 | |||
7063 | if (map_length >= orig_bio->bi_size) { | 7037 | if (map_length >= orig_bio->bi_size) { |
7064 | bio = orig_bio; | 7038 | bio = orig_bio; |
7065 | goto submit; | 7039 | goto submit; |
@@ -7155,19 +7129,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7155 | struct btrfs_dio_private *dip; | 7129 | struct btrfs_dio_private *dip; |
7156 | struct bio *io_bio; | 7130 | struct bio *io_bio; |
7157 | int skip_sum; | 7131 | int skip_sum; |
7132 | int sum_len; | ||
7158 | int write = rw & REQ_WRITE; | 7133 | int write = rw & REQ_WRITE; |
7159 | int ret = 0; | 7134 | int ret = 0; |
7135 | u16 csum_size; | ||
7160 | 7136 | ||
7161 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 7137 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
7162 | 7138 | ||
7163 | io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); | 7139 | io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); |
7164 | |||
7165 | if (!io_bio) { | 7140 | if (!io_bio) { |
7166 | ret = -ENOMEM; | 7141 | ret = -ENOMEM; |
7167 | goto free_ordered; | 7142 | goto free_ordered; |
7168 | } | 7143 | } |
7169 | 7144 | ||
7170 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | 7145 | if (!skip_sum && !write) { |
7146 | csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | ||
7147 | sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; | ||
7148 | sum_len *= csum_size; | ||
7149 | } else { | ||
7150 | sum_len = 0; | ||
7151 | } | ||
7152 | |||
7153 | dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); | ||
7171 | if (!dip) { | 7154 | if (!dip) { |
7172 | ret = -ENOMEM; | 7155 | ret = -ENOMEM; |
7173 | goto free_io_bio; | 7156 | goto free_io_bio; |