diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2013-07-25 07:22:34 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2013-09-01 08:04:33 -0400 |
commit | facc8a2247340a9735fe8cc123c5da2102f5ef1b (patch) | |
tree | fc6a1ea604e0bd5c3d22da891669e0516d776916 /fs/btrfs/extent_io.c | |
parent | f2a09da9d0cba17ad4041e7e54f1ca840b12d0be (diff) |
Btrfs: don't cache the csum value into the extent state tree
Before applying this patch, we cached the csum value into the extent state
tree when reading some data from the disk, this operation increased the lock
contention of the state tree.
Now, we just store the csum value into the bio structure or other unshared
structure, so we can reduce the lock contention.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 125 |
1 files changed, 48 insertions, 77 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0297f6f4d4c3..6fbacfabb660 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1837,64 +1837,6 @@ out: | |||
1837 | return ret; | 1837 | return ret; |
1838 | } | 1838 | } |
1839 | 1839 | ||
1840 | void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], | ||
1841 | int count) | ||
1842 | { | ||
1843 | struct rb_node *node; | ||
1844 | struct extent_state *state; | ||
1845 | |||
1846 | spin_lock(&tree->lock); | ||
1847 | /* | ||
1848 | * this search will find all the extents that end after | ||
1849 | * our range starts. | ||
1850 | */ | ||
1851 | node = tree_search(tree, start); | ||
1852 | BUG_ON(!node); | ||
1853 | |||
1854 | state = rb_entry(node, struct extent_state, rb_node); | ||
1855 | BUG_ON(state->start != start); | ||
1856 | |||
1857 | while (count) { | ||
1858 | state->private = *csums++; | ||
1859 | count--; | ||
1860 | state = next_state(state); | ||
1861 | } | ||
1862 | spin_unlock(&tree->lock); | ||
1863 | } | ||
1864 | |||
1865 | static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index) | ||
1866 | { | ||
1867 | struct bio_vec *bvec = bio->bi_io_vec + bio_index; | ||
1868 | |||
1869 | return page_offset(bvec->bv_page) + bvec->bv_offset; | ||
1870 | } | ||
1871 | |||
1872 | void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index, | ||
1873 | u32 csums[], int count) | ||
1874 | { | ||
1875 | struct rb_node *node; | ||
1876 | struct extent_state *state = NULL; | ||
1877 | u64 start; | ||
1878 | |||
1879 | spin_lock(&tree->lock); | ||
1880 | do { | ||
1881 | start = __btrfs_get_bio_offset(bio, bio_index); | ||
1882 | if (state == NULL || state->start != start) { | ||
1883 | node = tree_search(tree, start); | ||
1884 | BUG_ON(!node); | ||
1885 | |||
1886 | state = rb_entry(node, struct extent_state, rb_node); | ||
1887 | BUG_ON(state->start != start); | ||
1888 | } | ||
1889 | state->private = *csums++; | ||
1890 | count--; | ||
1891 | bio_index++; | ||
1892 | |||
1893 | state = next_state(state); | ||
1894 | } while (count); | ||
1895 | spin_unlock(&tree->lock); | ||
1896 | } | ||
1897 | |||
1898 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) | 1840 | int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) |
1899 | { | 1841 | { |
1900 | struct rb_node *node; | 1842 | struct rb_node *node; |
@@ -2201,8 +2143,9 @@ out: | |||
2201 | * needed | 2143 | * needed |
2202 | */ | 2144 | */ |
2203 | 2145 | ||
2204 | static int bio_readpage_error(struct bio *failed_bio, struct page *page, | 2146 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, |
2205 | u64 start, u64 end, int failed_mirror) | 2147 | struct page *page, u64 start, u64 end, |
2148 | int failed_mirror) | ||
2206 | { | 2149 | { |
2207 | struct io_failure_record *failrec = NULL; | 2150 | struct io_failure_record *failrec = NULL; |
2208 | u64 private; | 2151 | u64 private; |
@@ -2211,8 +2154,9 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2211 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 2154 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
2212 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2155 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2213 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 2156 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2214 | struct extent_state *state; | ||
2215 | struct bio *bio; | 2157 | struct bio *bio; |
2158 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2159 | struct btrfs_io_bio *btrfs_bio; | ||
2216 | int num_copies; | 2160 | int num_copies; |
2217 | int ret; | 2161 | int ret; |
2218 | int read_mode; | 2162 | int read_mode; |
@@ -2302,13 +2246,6 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2302 | return -EIO; | 2246 | return -EIO; |
2303 | } | 2247 | } |
2304 | 2248 | ||
2305 | spin_lock(&tree->lock); | ||
2306 | state = find_first_extent_bit_state(tree, failrec->start, | ||
2307 | EXTENT_LOCKED); | ||
2308 | if (state && state->start != failrec->start) | ||
2309 | state = NULL; | ||
2310 | spin_unlock(&tree->lock); | ||
2311 | |||
2312 | /* | 2249 | /* |
2313 | * there are two premises: | 2250 | * there are two premises: |
2314 | * a) deliver good data to the caller | 2251 | * a) deliver good data to the caller |
@@ -2345,9 +2282,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2345 | read_mode = READ_SYNC; | 2282 | read_mode = READ_SYNC; |
2346 | } | 2283 | } |
2347 | 2284 | ||
2348 | if (!state || failrec->this_mirror > num_copies) { | 2285 | if (failrec->this_mirror > num_copies) { |
2349 | pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " | 2286 | pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2350 | "next_mirror %d, failed_mirror %d\n", state, | ||
2351 | num_copies, failrec->this_mirror, failed_mirror); | 2287 | num_copies, failrec->this_mirror, failed_mirror); |
2352 | free_io_failure(inode, failrec, 0); | 2288 | free_io_failure(inode, failrec, 0); |
2353 | return -EIO; | 2289 | return -EIO; |
@@ -2358,12 +2294,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2358 | free_io_failure(inode, failrec, 0); | 2294 | free_io_failure(inode, failrec, 0); |
2359 | return -EIO; | 2295 | return -EIO; |
2360 | } | 2296 | } |
2361 | bio->bi_private = state; | ||
2362 | bio->bi_end_io = failed_bio->bi_end_io; | 2297 | bio->bi_end_io = failed_bio->bi_end_io; |
2363 | bio->bi_sector = failrec->logical >> 9; | 2298 | bio->bi_sector = failrec->logical >> 9; |
2364 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 2299 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
2365 | bio->bi_size = 0; | 2300 | bio->bi_size = 0; |
2366 | 2301 | ||
2302 | btrfs_failed_bio = btrfs_io_bio(failed_bio); | ||
2303 | if (btrfs_failed_bio->csum) { | ||
2304 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
2305 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); | ||
2306 | |||
2307 | btrfs_bio = btrfs_io_bio(bio); | ||
2308 | btrfs_bio->csum = btrfs_bio->csum_inline; | ||
2309 | phy_offset >>= inode->i_sb->s_blocksize_bits; | ||
2310 | phy_offset *= csum_size; | ||
2311 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, | ||
2312 | csum_size); | ||
2313 | } | ||
2314 | |||
2367 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 2315 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); |
2368 | 2316 | ||
2369 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | 2317 | pr_debug("bio_readpage_error: submitting new read[%#x] to " |
@@ -2462,9 +2410,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2462 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 2410 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
2463 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | 2411 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
2464 | struct bio_vec *bvec = bio->bi_io_vec; | 2412 | struct bio_vec *bvec = bio->bi_io_vec; |
2413 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
2465 | struct extent_io_tree *tree; | 2414 | struct extent_io_tree *tree; |
2415 | u64 offset = 0; | ||
2466 | u64 start; | 2416 | u64 start; |
2467 | u64 end; | 2417 | u64 end; |
2418 | u64 len; | ||
2468 | int mirror; | 2419 | int mirror; |
2469 | int ret; | 2420 | int ret; |
2470 | 2421 | ||
@@ -2475,7 +2426,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2475 | struct page *page = bvec->bv_page; | 2426 | struct page *page = bvec->bv_page; |
2476 | struct extent_state *cached = NULL; | 2427 | struct extent_state *cached = NULL; |
2477 | struct extent_state *state; | 2428 | struct extent_state *state; |
2478 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
2479 | struct inode *inode = page->mapping->host; | 2429 | struct inode *inode = page->mapping->host; |
2480 | 2430 | ||
2481 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2431 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
@@ -2496,6 +2446,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2496 | 2446 | ||
2497 | start = page_offset(page); | 2447 | start = page_offset(page); |
2498 | end = start + bvec->bv_offset + bvec->bv_len - 1; | 2448 | end = start + bvec->bv_offset + bvec->bv_len - 1; |
2449 | len = bvec->bv_len; | ||
2499 | 2450 | ||
2500 | if (++bvec <= bvec_end) | 2451 | if (++bvec <= bvec_end) |
2501 | prefetchw(&bvec->bv_page->flags); | 2452 | prefetchw(&bvec->bv_page->flags); |
@@ -2514,8 +2465,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2514 | mirror = io_bio->mirror_num; | 2465 | mirror = io_bio->mirror_num; |
2515 | if (likely(uptodate && tree->ops && | 2466 | if (likely(uptodate && tree->ops && |
2516 | tree->ops->readpage_end_io_hook)) { | 2467 | tree->ops->readpage_end_io_hook)) { |
2517 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2468 | ret = tree->ops->readpage_end_io_hook(io_bio, offset, |
2518 | state, mirror); | 2469 | page, start, end, |
2470 | mirror); | ||
2519 | if (ret) | 2471 | if (ret) |
2520 | uptodate = 0; | 2472 | uptodate = 0; |
2521 | else | 2473 | else |
@@ -2541,7 +2493,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2541 | * can't handle the error it will return -EIO and we | 2493 | * can't handle the error it will return -EIO and we |
2542 | * remain responsible for that page. | 2494 | * remain responsible for that page. |
2543 | */ | 2495 | */ |
2544 | ret = bio_readpage_error(bio, page, start, end, mirror); | 2496 | ret = bio_readpage_error(bio, offset, page, start, end, |
2497 | mirror); | ||
2545 | if (ret == 0) { | 2498 | if (ret == 0) { |
2546 | uptodate = | 2499 | uptodate = |
2547 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2500 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
@@ -2573,8 +2526,11 @@ readpage_ok: | |||
2573 | SetPageError(page); | 2526 | SetPageError(page); |
2574 | } | 2527 | } |
2575 | unlock_page(page); | 2528 | unlock_page(page); |
2529 | offset += len; | ||
2576 | } while (bvec <= bvec_end); | 2530 | } while (bvec <= bvec_end); |
2577 | 2531 | ||
2532 | if (io_bio->end_io) | ||
2533 | io_bio->end_io(io_bio, err); | ||
2578 | bio_put(bio); | 2534 | bio_put(bio); |
2579 | } | 2535 | } |
2580 | 2536 | ||
@@ -2586,6 +2542,7 @@ struct bio * | |||
2586 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 2542 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
2587 | gfp_t gfp_flags) | 2543 | gfp_t gfp_flags) |
2588 | { | 2544 | { |
2545 | struct btrfs_io_bio *btrfs_bio; | ||
2589 | struct bio *bio; | 2546 | struct bio *bio; |
2590 | 2547 | ||
2591 | bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); | 2548 | bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); |
@@ -2601,6 +2558,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
2601 | bio->bi_size = 0; | 2558 | bio->bi_size = 0; |
2602 | bio->bi_bdev = bdev; | 2559 | bio->bi_bdev = bdev; |
2603 | bio->bi_sector = first_sector; | 2560 | bio->bi_sector = first_sector; |
2561 | btrfs_bio = btrfs_io_bio(bio); | ||
2562 | btrfs_bio->csum = NULL; | ||
2563 | btrfs_bio->csum_allocated = NULL; | ||
2564 | btrfs_bio->end_io = NULL; | ||
2604 | } | 2565 | } |
2605 | return bio; | 2566 | return bio; |
2606 | } | 2567 | } |
@@ -2614,7 +2575,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
2614 | /* this also allocates from the btrfs_bioset */ | 2575 | /* this also allocates from the btrfs_bioset */ |
2615 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | 2576 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) |
2616 | { | 2577 | { |
2617 | return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); | 2578 | struct btrfs_io_bio *btrfs_bio; |
2579 | struct bio *bio; | ||
2580 | |||
2581 | bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); | ||
2582 | if (bio) { | ||
2583 | btrfs_bio = btrfs_io_bio(bio); | ||
2584 | btrfs_bio->csum = NULL; | ||
2585 | btrfs_bio->csum_allocated = NULL; | ||
2586 | btrfs_bio->end_io = NULL; | ||
2587 | } | ||
2588 | return bio; | ||
2618 | } | 2589 | } |
2619 | 2590 | ||
2620 | 2591 | ||