aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-07-25 07:22:34 -0400
committerChris Mason <chris.mason@fusionio.com>2013-09-01 08:04:33 -0400
commitfacc8a2247340a9735fe8cc123c5da2102f5ef1b (patch)
treefc6a1ea604e0bd5c3d22da891669e0516d776916 /fs
parentf2a09da9d0cba17ad4041e7e54f1ca840b12d0be (diff)
Btrfs: don't cache the csum value into the extent state tree
Before applying this patch, we cached the csum value into the extent state tree when reading some data from the disk, this operation increased the lock contention of the state tree. Now, we just store the csum value into the bio structure or other unshared structure, so we can reduce the lock contention. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/btrfs_inode.h21
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent_io.c125
-rw-r--r--fs/btrfs/extent_io.h10
-rw-r--r--fs/btrfs/file-item.c81
-rw-r--r--fs/btrfs/inode.c85
-rw-r--r--fs/btrfs/volumes.h7
8 files changed, 174 insertions, 164 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 08b286b2a2c5..d0ae226926ee 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
218 return 0; 218 return 0;
219} 219}
220 220
221struct btrfs_dio_private {
222 struct inode *inode;
223 u64 logical_offset;
224 u64 disk_bytenr;
225 u64 bytes;
226 void *private;
227
228 /* number of bios pending for this dio */
229 atomic_t pending_bios;
230
231 /* IO errors */
232 int errors;
233
234 /* orig_bio is our btrfs_io_bio */
235 struct bio *orig_bio;
236
237 /* dio_bio came from fs/direct-io.c */
238 struct bio *dio_bio;
239 u8 csum[0];
240};
241
221/* 242/*
222 * Disable DIO read nolock optimization, so new dio readers will be forced 243 * Disable DIO read nolock optimization, so new dio readers will be forced
223 * to grab i_mutex. It is used to avoid the endless truncate due to 244 * to grab i_mutex. It is used to avoid the endless truncate due to
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index dda60e9f6b89..cbb1263752f8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3556,12 +3556,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
3556 struct btrfs_inode_extref **extref_ret); 3556 struct btrfs_inode_extref **extref_ret);
3557 3557
3558/* file-item.c */ 3558/* file-item.c */
3559struct btrfs_dio_private;
3559int btrfs_del_csums(struct btrfs_trans_handle *trans, 3560int btrfs_del_csums(struct btrfs_trans_handle *trans,
3560 struct btrfs_root *root, u64 bytenr, u64 len); 3561 struct btrfs_root *root, u64 bytenr, u64 len);
3561int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3562int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3562 struct bio *bio, u32 *dst); 3563 struct bio *bio, u32 *dst);
3563int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3564int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3564 struct bio *bio, u64 logical_offset); 3565 struct btrfs_dio_private *dip, struct bio *bio,
3566 u64 logical_offset);
3565int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3567int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3566 struct btrfs_root *root, 3568 struct btrfs_root *root,
3567 u64 objectid, u64 pos, 3569 u64 objectid, u64 pos,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 56fcf8439f42..8e2eee67c3a8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -576,8 +576,9 @@ static noinline int check_leaf(struct btrfs_root *root,
576 return 0; 576 return 0;
577} 577}
578 578
579static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 579static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
580 struct extent_state *state, int mirror) 580 u64 phy_offset, struct page *page,
581 u64 start, u64 end, int mirror)
581{ 582{
582 struct extent_io_tree *tree; 583 struct extent_io_tree *tree;
583 u64 found_start; 584 u64 found_start;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0297f6f4d4c3..6fbacfabb660 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1837,64 +1837,6 @@ out:
1837 return ret; 1837 return ret;
1838} 1838}
1839 1839
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1840int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1841{
1900 struct rb_node *node; 1842 struct rb_node *node;
@@ -2201,8 +2143,9 @@ out:
2201 * needed 2143 * needed
2202 */ 2144 */
2203 2145
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2146static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror) 2147 struct page *page, u64 start, u64 end,
2148 int failed_mirror)
2206{ 2149{
2207 struct io_failure_record *failrec = NULL; 2150 struct io_failure_record *failrec = NULL;
2208 u64 private; 2151 u64 private;
@@ -2211,8 +2154,9 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2211 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 2154 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2212 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2155 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2213 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2156 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2214 struct extent_state *state;
2215 struct bio *bio; 2157 struct bio *bio;
2158 struct btrfs_io_bio *btrfs_failed_bio;
2159 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2160 int num_copies;
2217 int ret; 2161 int ret;
2218 int read_mode; 2162 int read_mode;
@@ -2302,13 +2246,6 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2302 return -EIO; 2246 return -EIO;
2303 } 2247 }
2304 2248
2305 spin_lock(&tree->lock);
2306 state = find_first_extent_bit_state(tree, failrec->start,
2307 EXTENT_LOCKED);
2308 if (state && state->start != failrec->start)
2309 state = NULL;
2310 spin_unlock(&tree->lock);
2311
2312 /* 2249 /*
2313 * there are two premises: 2250 * there are two premises:
2314 * a) deliver good data to the caller 2251 * a) deliver good data to the caller
@@ -2345,9 +2282,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2345 read_mode = READ_SYNC; 2282 read_mode = READ_SYNC;
2346 } 2283 }
2347 2284
2348 if (!state || failrec->this_mirror > num_copies) { 2285 if (failrec->this_mirror > num_copies) {
2349 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2286 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2350 "next_mirror %d, failed_mirror %d\n", state,
2351 num_copies, failrec->this_mirror, failed_mirror); 2287 num_copies, failrec->this_mirror, failed_mirror);
2352 free_io_failure(inode, failrec, 0); 2288 free_io_failure(inode, failrec, 0);
2353 return -EIO; 2289 return -EIO;
@@ -2358,12 +2294,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2358 free_io_failure(inode, failrec, 0); 2294 free_io_failure(inode, failrec, 0);
2359 return -EIO; 2295 return -EIO;
2360 } 2296 }
2361 bio->bi_private = state;
2362 bio->bi_end_io = failed_bio->bi_end_io; 2297 bio->bi_end_io = failed_bio->bi_end_io;
2363 bio->bi_sector = failrec->logical >> 9; 2298 bio->bi_sector = failrec->logical >> 9;
2364 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2299 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2365 bio->bi_size = 0; 2300 bio->bi_size = 0;
2366 2301
2302 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2303 if (btrfs_failed_bio->csum) {
2304 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2305 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2306
2307 btrfs_bio = btrfs_io_bio(bio);
2308 btrfs_bio->csum = btrfs_bio->csum_inline;
2309 phy_offset >>= inode->i_sb->s_blocksize_bits;
2310 phy_offset *= csum_size;
2311 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2312 csum_size);
2313 }
2314
2367 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2315 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2368 2316
2369 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2317 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2462,9 +2410,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2462 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2410 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2463 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2411 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2464 struct bio_vec *bvec = bio->bi_io_vec; 2412 struct bio_vec *bvec = bio->bi_io_vec;
2413 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2465 struct extent_io_tree *tree; 2414 struct extent_io_tree *tree;
2415 u64 offset = 0;
2466 u64 start; 2416 u64 start;
2467 u64 end; 2417 u64 end;
2418 u64 len;
2468 int mirror; 2419 int mirror;
2469 int ret; 2420 int ret;
2470 2421
@@ -2475,7 +2426,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2475 struct page *page = bvec->bv_page; 2426 struct page *page = bvec->bv_page;
2476 struct extent_state *cached = NULL; 2427 struct extent_state *cached = NULL;
2477 struct extent_state *state; 2428 struct extent_state *state;
2478 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2479 struct inode *inode = page->mapping->host; 2429 struct inode *inode = page->mapping->host;
2480 2430
2481 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2431 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2496,6 +2446,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2496 2446
2497 start = page_offset(page); 2447 start = page_offset(page);
2498 end = start + bvec->bv_offset + bvec->bv_len - 1; 2448 end = start + bvec->bv_offset + bvec->bv_len - 1;
2449 len = bvec->bv_len;
2499 2450
2500 if (++bvec <= bvec_end) 2451 if (++bvec <= bvec_end)
2501 prefetchw(&bvec->bv_page->flags); 2452 prefetchw(&bvec->bv_page->flags);
@@ -2514,8 +2465,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2514 mirror = io_bio->mirror_num; 2465 mirror = io_bio->mirror_num;
2515 if (likely(uptodate && tree->ops && 2466 if (likely(uptodate && tree->ops &&
2516 tree->ops->readpage_end_io_hook)) { 2467 tree->ops->readpage_end_io_hook)) {
2517 ret = tree->ops->readpage_end_io_hook(page, start, end, 2468 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2518 state, mirror); 2469 page, start, end,
2470 mirror);
2519 if (ret) 2471 if (ret)
2520 uptodate = 0; 2472 uptodate = 0;
2521 else 2473 else
@@ -2541,7 +2493,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2493 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2494 * remain responsible for that page.
2543 */ 2495 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror); 2496 ret = bio_readpage_error(bio, offset, page, start, end,
2497 mirror);
2545 if (ret == 0) { 2498 if (ret == 0) {
2546 uptodate = 2499 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2500 test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -2573,8 +2526,11 @@ readpage_ok:
2573 SetPageError(page); 2526 SetPageError(page);
2574 } 2527 }
2575 unlock_page(page); 2528 unlock_page(page);
2529 offset += len;
2576 } while (bvec <= bvec_end); 2530 } while (bvec <= bvec_end);
2577 2531
2532 if (io_bio->end_io)
2533 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2534 bio_put(bio);
2579} 2535}
2580 2536
@@ -2586,6 +2542,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2542btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2543 gfp_t gfp_flags)
2588{ 2544{
2545 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2546 struct bio *bio;
2590 2547
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2548 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2558,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2558 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2559 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2560 bio->bi_sector = first_sector;
2561 btrfs_bio = btrfs_io_bio(bio);
2562 btrfs_bio->csum = NULL;
2563 btrfs_bio->csum_allocated = NULL;
2564 btrfs_bio->end_io = NULL;
2604 } 2565 }
2605 return bio; 2566 return bio;
2606} 2567}
@@ -2614,7 +2575,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2575/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2576struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2577{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2578 struct btrfs_io_bio *btrfs_bio;
2579 struct bio *bio;
2580
2581 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2582 if (bio) {
2583 btrfs_bio = btrfs_io_bio(bio);
2584 btrfs_bio->csum = NULL;
2585 btrfs_bio->csum_allocated = NULL;
2586 btrfs_bio->end_io = NULL;
2587 }
2588 return bio;
2618} 2589}
2619 2590
2620 2591
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3b8c4e26e1da..f7544afefdb9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -62,6 +62,7 @@
62 62
63struct extent_state; 63struct extent_state;
64struct btrfs_root; 64struct btrfs_root;
65struct btrfs_io_bio;
65 66
66typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 67typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
67 struct bio *bio, int mirror_num, 68 struct bio *bio, int mirror_num,
@@ -77,8 +78,9 @@ struct extent_io_ops {
77 size_t size, struct bio *bio, 78 size_t size, struct bio *bio,
78 unsigned long bio_flags); 79 unsigned long bio_flags);
79 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 80 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
80 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
81 struct extent_state *state, int mirror); 82 struct page *page, u64 start, u64 end,
83 int mirror);
82 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 84 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
83 struct extent_state *state, int uptodate); 85 struct extent_state *state, int uptodate);
84 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 86 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -262,10 +264,6 @@ int extent_readpages(struct extent_io_tree *tree,
262int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 264int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
263 __u64 start, __u64 len, get_extent_t *get_extent); 265 __u64 start, __u64 len, get_extent_t *get_extent);
264int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 266int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
265void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
266 int count);
267void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
268 int bvec_index, u32 csums[], int count);
269int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 267int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
270void set_page_extent_mapped(struct page *page); 268void set_page_extent_mapped(struct page *page);
271 269
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a7bfc9541803..f87d09a0dfaa 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -23,6 +23,7 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "disk-io.h" 24#include "disk-io.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "volumes.h"
26#include "print-tree.h" 27#include "print-tree.h"
27 28
28#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 29#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
@@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
152 return ret; 153 return ret;
153} 154}
154 155
156static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
157{
158 kfree(bio->csum_allocated);
159}
160
155static int __btrfs_lookup_bio_sums(struct btrfs_root *root, 161static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
156 struct inode *inode, struct bio *bio, 162 struct inode *inode, struct bio *bio,
157 u64 logical_offset, u32 *dst, int dio) 163 u64 logical_offset, u32 *dst, int dio)
158{ 164{
159 u32 sum[16];
160 int len;
161 struct bio_vec *bvec = bio->bi_io_vec; 165 struct bio_vec *bvec = bio->bi_io_vec;
162 int bio_index = 0; 166 struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
167 struct btrfs_csum_item *item = NULL;
168 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
169 struct btrfs_path *path;
170 u8 *csum;
163 u64 offset = 0; 171 u64 offset = 0;
164 u64 item_start_offset = 0; 172 u64 item_start_offset = 0;
165 u64 item_last_offset = 0; 173 u64 item_last_offset = 0;
166 u64 disk_bytenr; 174 u64 disk_bytenr;
167 u32 diff; 175 u32 diff;
168 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 176 int nblocks;
177 int bio_index = 0;
169 int count; 178 int count;
170 struct btrfs_path *path; 179 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
171 struct btrfs_csum_item *item = NULL;
172 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
173 180
174 path = btrfs_alloc_path(); 181 path = btrfs_alloc_path();
175 if (!path) 182 if (!path)
176 return -ENOMEM; 183 return -ENOMEM;
184
185 nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
189 GFP_NOFS);
190 if (!btrfs_bio->csum_allocated) {
191 btrfs_free_path(path);
192 return -ENOMEM;
193 }
194 btrfs_bio->csum = btrfs_bio->csum_allocated;
195 btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
196 } else {
197 btrfs_bio->csum = btrfs_bio->csum_inline;
198 }
199 csum = btrfs_bio->csum;
200 } else {
201 csum = (u8 *)dst;
202 }
203
177 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 204 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
178 path->reada = 2; 205 path->reada = 2;
179 206
@@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
194 if (dio) 221 if (dio)
195 offset = logical_offset; 222 offset = logical_offset;
196 while (bio_index < bio->bi_vcnt) { 223 while (bio_index < bio->bi_vcnt) {
197 len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
198 if (!dio) 224 if (!dio)
199 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 225 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
200 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, 226 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
201 len); 227 (u32 *)csum, nblocks);
202 if (count) 228 if (count)
203 goto found; 229 goto found;
204 230
@@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
213 path, disk_bytenr, 0); 239 path, disk_bytenr, 0);
214 if (IS_ERR(item)) { 240 if (IS_ERR(item)) {
215 count = 1; 241 count = 1;
216 sum[0] = 0; 242 memset(csum, 0, csum_size);
217 if (BTRFS_I(inode)->root->root_key.objectid == 243 if (BTRFS_I(inode)->root->root_key.objectid ==
218 BTRFS_DATA_RELOC_TREE_OBJECTID) { 244 BTRFS_DATA_RELOC_TREE_OBJECTID) {
219 set_extent_bits(io_tree, offset, 245 set_extent_bits(io_tree, offset,
@@ -249,23 +275,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
249 diff = disk_bytenr - item_start_offset; 275 diff = disk_bytenr - item_start_offset;
250 diff = diff / root->sectorsize; 276 diff = diff / root->sectorsize;
251 diff = diff * csum_size; 277 diff = diff * csum_size;
252 count = min_t(int, len, (item_last_offset - disk_bytenr) >> 278 count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
253 inode->i_sb->s_blocksize_bits); 279 inode->i_sb->s_blocksize_bits);
254 read_extent_buffer(path->nodes[0], sum, 280 read_extent_buffer(path->nodes[0], csum,
255 ((unsigned long)item) + diff, 281 ((unsigned long)item) + diff,
256 csum_size * count); 282 csum_size * count);
257found: 283found:
258 if (dst) { 284 csum += count * csum_size;
259 memcpy(dst, sum, count * csum_size); 285 nblocks -= count;
260 dst += count;
261 } else {
262 if (dio)
263 extent_cache_csums_dio(io_tree, offset, sum,
264 count);
265 else
266 extent_cache_csums(io_tree, bio, bio_index, sum,
267 count);
268 }
269 while (count--) { 286 while (count--) {
270 disk_bytenr += bvec->bv_len; 287 disk_bytenr += bvec->bv_len;
271 offset += bvec->bv_len; 288 offset += bvec->bv_len;
@@ -284,9 +301,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
284} 301}
285 302
286int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 303int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
287 struct bio *bio, u64 offset) 304 struct btrfs_dio_private *dip, struct bio *bio,
305 u64 offset)
288{ 306{
289 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); 307 int len = (bio->bi_sector << 9) - dip->disk_bytenr;
308 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
309 int ret;
310
311 len >>= inode->i_sb->s_blocksize_bits;
312 len *= csum_size;
313
314 ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
315 (u32 *)(dip->csum + len), 1);
316 return ret;
290} 317}
291 318
292int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 319int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d3280b2b7592..b47330c8e02c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2826,16 +2826,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2826 * if there's a match, we allow the bio to finish. If not, the code in 2826 * if there's a match, we allow the bio to finish. If not, the code in
2827 * extent_io.c will try to find good copies for us. 2827 * extent_io.c will try to find good copies for us.
2828 */ 2828 */
2829static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2829static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2830 struct extent_state *state, int mirror) 2830 u64 phy_offset, struct page *page,
2831 u64 start, u64 end, int mirror)
2831{ 2832{
2832 size_t offset = start - page_offset(page); 2833 size_t offset = start - page_offset(page);
2833 struct inode *inode = page->mapping->host; 2834 struct inode *inode = page->mapping->host;
2834 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2835 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2835 char *kaddr; 2836 char *kaddr;
2836 u64 private = ~(u32)0;
2837 int ret;
2838 struct btrfs_root *root = BTRFS_I(inode)->root; 2837 struct btrfs_root *root = BTRFS_I(inode)->root;
2838 u32 csum_expected;
2839 u32 csum = ~(u32)0; 2839 u32 csum = ~(u32)0;
2840 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 2840 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2841 DEFAULT_RATELIMIT_BURST); 2841 DEFAULT_RATELIMIT_BURST);
@@ -2855,19 +2855,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2855 return 0; 2855 return 0;
2856 } 2856 }
2857 2857
2858 if (state && state->start == start) { 2858 phy_offset >>= inode->i_sb->s_blocksize_bits;
2859 private = state->private; 2859 csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
2860 ret = 0;
2861 } else {
2862 ret = get_state_private(io_tree, start, &private);
2863 }
2864 kaddr = kmap_atomic(page);
2865 if (ret)
2866 goto zeroit;
2867 2860
2861 kaddr = kmap_atomic(page);
2868 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); 2862 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2869 btrfs_csum_final(csum, (char *)&csum); 2863 btrfs_csum_final(csum, (char *)&csum);
2870 if (csum != private) 2864 if (csum != csum_expected)
2871 goto zeroit; 2865 goto zeroit;
2872 2866
2873 kunmap_atomic(kaddr); 2867 kunmap_atomic(kaddr);
@@ -2876,14 +2870,13 @@ good:
2876 2870
2877zeroit: 2871zeroit:
2878 if (__ratelimit(&_rs)) 2872 if (__ratelimit(&_rs))
2879 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", 2873 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
2880 (unsigned long long)btrfs_ino(page->mapping->host), 2874 (unsigned long long)btrfs_ino(page->mapping->host),
2881 (unsigned long long)start, csum, 2875 (unsigned long long)start, csum, csum_expected);
2882 (unsigned long long)private);
2883 memset(kaddr + offset, 1, end - start + 1); 2876 memset(kaddr + offset, 1, end - start + 1);
2884 flush_dcache_page(page); 2877 flush_dcache_page(page);
2885 kunmap_atomic(kaddr); 2878 kunmap_atomic(kaddr);
2886 if (private == 0) 2879 if (csum_expected == 0)
2887 return 0; 2880 return 0;
2888 return -EIO; 2881 return -EIO;
2889} 2882}
@@ -6812,26 +6805,6 @@ unlock_err:
6812 return ret; 6805 return ret;
6813} 6806}
6814 6807
6815struct btrfs_dio_private {
6816 struct inode *inode;
6817 u64 logical_offset;
6818 u64 disk_bytenr;
6819 u64 bytes;
6820 void *private;
6821
6822 /* number of bios pending for this dio */
6823 atomic_t pending_bios;
6824
6825 /* IO errors */
6826 int errors;
6827
6828 /* orig_bio is our btrfs_io_bio */
6829 struct bio *orig_bio;
6830
6831 /* dio_bio came from fs/direct-io.c */
6832 struct bio *dio_bio;
6833};
6834
6835static void btrfs_endio_direct_read(struct bio *bio, int err) 6808static void btrfs_endio_direct_read(struct bio *bio, int err)
6836{ 6809{
6837 struct btrfs_dio_private *dip = bio->bi_private; 6810 struct btrfs_dio_private *dip = bio->bi_private;
@@ -6840,6 +6813,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6840 struct inode *inode = dip->inode; 6813 struct inode *inode = dip->inode;
6841 struct btrfs_root *root = BTRFS_I(inode)->root; 6814 struct btrfs_root *root = BTRFS_I(inode)->root;
6842 struct bio *dio_bio; 6815 struct bio *dio_bio;
6816 u32 *csums = (u32 *)dip->csum;
6817 int index = 0;
6843 u64 start; 6818 u64 start;
6844 6819
6845 start = dip->logical_offset; 6820 start = dip->logical_offset;
@@ -6848,12 +6823,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6848 struct page *page = bvec->bv_page; 6823 struct page *page = bvec->bv_page;
6849 char *kaddr; 6824 char *kaddr;
6850 u32 csum = ~(u32)0; 6825 u32 csum = ~(u32)0;
6851 u64 private = ~(u32)0;
6852 unsigned long flags; 6826 unsigned long flags;
6853 6827
6854 if (get_state_private(&BTRFS_I(inode)->io_tree,
6855 start, &private))
6856 goto failed;
6857 local_irq_save(flags); 6828 local_irq_save(flags);
6858 kaddr = kmap_atomic(page); 6829 kaddr = kmap_atomic(page);
6859 csum = btrfs_csum_data(kaddr + bvec->bv_offset, 6830 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6863,18 +6834,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6863 local_irq_restore(flags); 6834 local_irq_restore(flags);
6864 6835
6865 flush_dcache_page(bvec->bv_page); 6836 flush_dcache_page(bvec->bv_page);
6866 if (csum != private) { 6837 if (csum != csums[index]) {
6867failed: 6838 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6868 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", 6839 (unsigned long long)btrfs_ino(inode),
6869 (unsigned long long)btrfs_ino(inode), 6840 (unsigned long long)start,
6870 (unsigned long long)start, 6841 csum, csums[index]);
6871 csum, (unsigned)private);
6872 err = -EIO; 6842 err = -EIO;
6873 } 6843 }
6874 } 6844 }
6875 6845
6876 start += bvec->bv_len; 6846 start += bvec->bv_len;
6877 bvec++; 6847 bvec++;
6848 index++;
6878 } while (bvec <= bvec_end); 6849 } while (bvec <= bvec_end);
6879 6850
6880 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6851 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6991,6 +6962,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6991 int rw, u64 file_offset, int skip_sum, 6962 int rw, u64 file_offset, int skip_sum,
6992 int async_submit) 6963 int async_submit)
6993{ 6964{
6965 struct btrfs_dio_private *dip = bio->bi_private;
6994 int write = rw & REQ_WRITE; 6966 int write = rw & REQ_WRITE;
6995 struct btrfs_root *root = BTRFS_I(inode)->root; 6967 struct btrfs_root *root = BTRFS_I(inode)->root;
6996 int ret; 6968 int ret;
@@ -7025,7 +6997,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7025 if (ret) 6997 if (ret)
7026 goto err; 6998 goto err;
7027 } else if (!skip_sum) { 6999 } else if (!skip_sum) {
7028 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); 7000 ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
7001 file_offset);
7029 if (ret) 7002 if (ret)
7030 goto err; 7003 goto err;
7031 } 7004 }
@@ -7060,6 +7033,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7060 bio_put(orig_bio); 7033 bio_put(orig_bio);
7061 return -EIO; 7034 return -EIO;
7062 } 7035 }
7036
7063 if (map_length >= orig_bio->bi_size) { 7037 if (map_length >= orig_bio->bi_size) {
7064 bio = orig_bio; 7038 bio = orig_bio;
7065 goto submit; 7039 goto submit;
@@ -7155,19 +7129,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7155 struct btrfs_dio_private *dip; 7129 struct btrfs_dio_private *dip;
7156 struct bio *io_bio; 7130 struct bio *io_bio;
7157 int skip_sum; 7131 int skip_sum;
7132 int sum_len;
7158 int write = rw & REQ_WRITE; 7133 int write = rw & REQ_WRITE;
7159 int ret = 0; 7134 int ret = 0;
7135 u16 csum_size;
7160 7136
7161 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7137 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7162 7138
7163 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); 7139 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7164
7165 if (!io_bio) { 7140 if (!io_bio) {
7166 ret = -ENOMEM; 7141 ret = -ENOMEM;
7167 goto free_ordered; 7142 goto free_ordered;
7168 } 7143 }
7169 7144
7170 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7145 if (!skip_sum && !write) {
7146 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7147 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
7148 sum_len *= csum_size;
7149 } else {
7150 sum_len = 0;
7151 }
7152
7153 dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
7171 if (!dip) { 7154 if (!dip) {
7172 ret = -ENOMEM; 7155 ret = -ENOMEM;
7173 goto free_io_bio; 7156 goto free_io_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 86705583480d..08c44d9059b1 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,8 @@ struct btrfs_fs_devices {
152 int rotating; 152 int rotating;
153}; 153};
154 154
155#define BTRFS_BIO_INLINE_CSUM_SIZE 64
156
155/* 157/*
156 * we need the mirror number and stripe index to be passed around 158 * we need the mirror number and stripe index to be passed around
157 * the call chain while we are processing end_io (especially errors). 159 * the call chain while we are processing end_io (especially errors).
@@ -161,9 +163,14 @@ struct btrfs_fs_devices {
161 * we allocate are actually btrfs_io_bios. We'll cram as much of 163 * we allocate are actually btrfs_io_bios. We'll cram as much of
162 * struct btrfs_bio as we can into this over time. 164 * struct btrfs_bio as we can into this over time.
163 */ 165 */
166typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
164struct btrfs_io_bio { 167struct btrfs_io_bio {
165 unsigned long mirror_num; 168 unsigned long mirror_num;
166 unsigned long stripe_index; 169 unsigned long stripe_index;
170 u8 *csum;
171 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
172 u8 *csum_allocated;
173 btrfs_io_bio_end_io_t *end_io;
167 struct bio bio; 174 struct bio bio;
168}; 175};
169 176