aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-07-25 07:22:34 -0400
committerChris Mason <chris.mason@fusionio.com>2013-09-01 08:04:33 -0400
commitfacc8a2247340a9735fe8cc123c5da2102f5ef1b (patch)
treefc6a1ea604e0bd5c3d22da891669e0516d776916 /fs/btrfs/extent_io.c
parentf2a09da9d0cba17ad4041e7e54f1ca840b12d0be (diff)
Btrfs: don't cache the csum value into the extent state tree
Before applying this patch, we cached the csum value into the extent state tree when reading some data from the disk, this operation increased the lock contention of the state tree. Now, we just store the csum value into the bio structure or other unshared structure, so we can reduce the lock contention. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c125
1 files changed, 48 insertions, 77 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0297f6f4d4c3..6fbacfabb660 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1837,64 +1837,6 @@ out:
1837 return ret; 1837 return ret;
1838} 1838}
1839 1839
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1840int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1841{
1900 struct rb_node *node; 1842 struct rb_node *node;
@@ -2201,8 +2143,9 @@ out:
2201 * needed 2143 * needed
2202 */ 2144 */
2203 2145
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2146static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror) 2147 struct page *page, u64 start, u64 end,
2148 int failed_mirror)
2206{ 2149{
2207 struct io_failure_record *failrec = NULL; 2150 struct io_failure_record *failrec = NULL;
2208 u64 private; 2151 u64 private;
@@ -2211,8 +2154,9 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2211 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 2154 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2212 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2155 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2213 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2156 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2214 struct extent_state *state;
2215 struct bio *bio; 2157 struct bio *bio;
2158 struct btrfs_io_bio *btrfs_failed_bio;
2159 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2160 int num_copies;
2217 int ret; 2161 int ret;
2218 int read_mode; 2162 int read_mode;
@@ -2302,13 +2246,6 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2302 return -EIO; 2246 return -EIO;
2303 } 2247 }
2304 2248
2305 spin_lock(&tree->lock);
2306 state = find_first_extent_bit_state(tree, failrec->start,
2307 EXTENT_LOCKED);
2308 if (state && state->start != failrec->start)
2309 state = NULL;
2310 spin_unlock(&tree->lock);
2311
2312 /* 2249 /*
2313 * there are two premises: 2250 * there are two premises:
2314 * a) deliver good data to the caller 2251 * a) deliver good data to the caller
@@ -2345,9 +2282,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2345 read_mode = READ_SYNC; 2282 read_mode = READ_SYNC;
2346 } 2283 }
2347 2284
2348 if (!state || failrec->this_mirror > num_copies) { 2285 if (failrec->this_mirror > num_copies) {
2349 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2286 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2350 "next_mirror %d, failed_mirror %d\n", state,
2351 num_copies, failrec->this_mirror, failed_mirror); 2287 num_copies, failrec->this_mirror, failed_mirror);
2352 free_io_failure(inode, failrec, 0); 2288 free_io_failure(inode, failrec, 0);
2353 return -EIO; 2289 return -EIO;
@@ -2358,12 +2294,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2358 free_io_failure(inode, failrec, 0); 2294 free_io_failure(inode, failrec, 0);
2359 return -EIO; 2295 return -EIO;
2360 } 2296 }
2361 bio->bi_private = state;
2362 bio->bi_end_io = failed_bio->bi_end_io; 2297 bio->bi_end_io = failed_bio->bi_end_io;
2363 bio->bi_sector = failrec->logical >> 9; 2298 bio->bi_sector = failrec->logical >> 9;
2364 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2299 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2365 bio->bi_size = 0; 2300 bio->bi_size = 0;
2366 2301
2302 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2303 if (btrfs_failed_bio->csum) {
2304 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2305 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2306
2307 btrfs_bio = btrfs_io_bio(bio);
2308 btrfs_bio->csum = btrfs_bio->csum_inline;
2309 phy_offset >>= inode->i_sb->s_blocksize_bits;
2310 phy_offset *= csum_size;
2311 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2312 csum_size);
2313 }
2314
2367 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2315 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2368 2316
2369 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2317 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2462,9 +2410,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2462 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2410 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2463 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2411 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2464 struct bio_vec *bvec = bio->bi_io_vec; 2412 struct bio_vec *bvec = bio->bi_io_vec;
2413 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2465 struct extent_io_tree *tree; 2414 struct extent_io_tree *tree;
2415 u64 offset = 0;
2466 u64 start; 2416 u64 start;
2467 u64 end; 2417 u64 end;
2418 u64 len;
2468 int mirror; 2419 int mirror;
2469 int ret; 2420 int ret;
2470 2421
@@ -2475,7 +2426,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2475 struct page *page = bvec->bv_page; 2426 struct page *page = bvec->bv_page;
2476 struct extent_state *cached = NULL; 2427 struct extent_state *cached = NULL;
2477 struct extent_state *state; 2428 struct extent_state *state;
2478 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2479 struct inode *inode = page->mapping->host; 2429 struct inode *inode = page->mapping->host;
2480 2430
2481 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2431 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2496,6 +2446,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2496 2446
2497 start = page_offset(page); 2447 start = page_offset(page);
2498 end = start + bvec->bv_offset + bvec->bv_len - 1; 2448 end = start + bvec->bv_offset + bvec->bv_len - 1;
2449 len = bvec->bv_len;
2499 2450
2500 if (++bvec <= bvec_end) 2451 if (++bvec <= bvec_end)
2501 prefetchw(&bvec->bv_page->flags); 2452 prefetchw(&bvec->bv_page->flags);
@@ -2514,8 +2465,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2514 mirror = io_bio->mirror_num; 2465 mirror = io_bio->mirror_num;
2515 if (likely(uptodate && tree->ops && 2466 if (likely(uptodate && tree->ops &&
2516 tree->ops->readpage_end_io_hook)) { 2467 tree->ops->readpage_end_io_hook)) {
2517 ret = tree->ops->readpage_end_io_hook(page, start, end, 2468 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2518 state, mirror); 2469 page, start, end,
2470 mirror);
2519 if (ret) 2471 if (ret)
2520 uptodate = 0; 2472 uptodate = 0;
2521 else 2473 else
@@ -2541,7 +2493,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2493 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2494 * remain responsible for that page.
2543 */ 2495 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror); 2496 ret = bio_readpage_error(bio, offset, page, start, end,
2497 mirror);
2545 if (ret == 0) { 2498 if (ret == 0) {
2546 uptodate = 2499 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2500 test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -2573,8 +2526,11 @@ readpage_ok:
2573 SetPageError(page); 2526 SetPageError(page);
2574 } 2527 }
2575 unlock_page(page); 2528 unlock_page(page);
2529 offset += len;
2576 } while (bvec <= bvec_end); 2530 } while (bvec <= bvec_end);
2577 2531
2532 if (io_bio->end_io)
2533 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2534 bio_put(bio);
2579} 2535}
2580 2536
@@ -2586,6 +2542,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2542btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2543 gfp_t gfp_flags)
2588{ 2544{
2545 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2546 struct bio *bio;
2590 2547
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2548 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2558,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2558 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2559 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2560 bio->bi_sector = first_sector;
2561 btrfs_bio = btrfs_io_bio(bio);
2562 btrfs_bio->csum = NULL;
2563 btrfs_bio->csum_allocated = NULL;
2564 btrfs_bio->end_io = NULL;
2604 } 2565 }
2605 return bio; 2566 return bio;
2606} 2567}
@@ -2614,7 +2575,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2575/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2576struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2577{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2578 struct btrfs_io_bio *btrfs_bio;
2579 struct bio *bio;
2580
2581 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2582 if (bio) {
2583 btrfs_bio = btrfs_io_bio(bio);
2584 btrfs_bio->csum = NULL;
2585 btrfs_bio->csum_allocated = NULL;
2586 btrfs_bio->end_io = NULL;
2587 }
2588 return bio;
2618} 2589}
2619 2590
2620 2591