diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-18 14:35:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-18 14:35:28 -0400 |
commit | 130901ba33c4a040e12cf7cce240c1056cc096ee (patch) | |
tree | 6471f689c88dad8bc1eabf1e404ffda60629abe7 /fs/btrfs/extent_io.c | |
parent | e51066824af49c2b1e9e686ee9660f58641c7f36 (diff) | |
parent | c5cb6a0573bef87e098ee3cd946115ebe60a910e (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason:
"Miao Xie has been very busy, fixing races and enospc problems and many
other small but important pieces.
Alexandre Oliva discovered some problems with how our error handling
was interacting with the block layer and for now has disabled our
partial handling of sub-page writes. The real sub-page work is in a
series of patches from IBM that we still need to integrate and test.
The code Alexandre has turned off was really incomplete.
Josef has more error handling fixes and an important fix for the new
skinny extent format.
This also has my fix for the tracepoint crash from late in 3.9. It's
the first stage in a larger clean up to get rid of btrfs_bio and make
a proper bioset for all the items we need to tack into the bio. For
now the bioset only holds our mirror_num and stripe_index, but for the
next merge window I'll shuffle more in."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
Btrfs: use a btrfs bioset instead of abusing bio internals
Btrfs: make sure roots are assigned before freeing their nodes
Btrfs: explicitly use global_block_rsv for quota_tree
btrfs: do away with non-whole_page extent I/O
Btrfs: don't invoke btrfs_invalidate_inodes() in the spin lock context
Btrfs: remove BUG_ON() in btrfs_read_fs_tree_no_radix()
Btrfs: pause the space balance when remounting to R/O
Btrfs: fix unprotected root node of the subvolume's inode rb-tree
Btrfs: fix accessing a freed tree root
Btrfs: return errno if possible when we fail to allocate memory
Btrfs: update the global reserve if it is empty
Btrfs: don't steal the reserved space from the global reserve if their space type is different
Btrfs: optimize the error handle of use_block_rsv()
Btrfs: don't use global block reservation for inode cache truncation
Btrfs: don't abort the current transaction if there is no enough space for inode cache
Correct allowed raid levels on balance.
Btrfs: fix possible memory leak in replace_path()
Btrfs: fix possible memory leak in the find_parent_nodes()
Btrfs: don't allow device replace on RAID5/RAID6
Btrfs: handle running extent ops with skinny metadata
...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 138 |
1 files changed, 73 insertions, 65 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 32d67a822e93..e7e7afb4a872 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | static struct kmem_cache *extent_state_cache; | 24 | static struct kmem_cache *extent_state_cache; |
25 | static struct kmem_cache *extent_buffer_cache; | 25 | static struct kmem_cache *extent_buffer_cache; |
26 | static struct bio_set *btrfs_bioset; | ||
26 | 27 | ||
27 | #ifdef CONFIG_BTRFS_DEBUG | 28 | #ifdef CONFIG_BTRFS_DEBUG |
28 | static LIST_HEAD(buffers); | 29 | static LIST_HEAD(buffers); |
@@ -125,10 +126,20 @@ int __init extent_io_init(void) | |||
125 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 126 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
126 | if (!extent_buffer_cache) | 127 | if (!extent_buffer_cache) |
127 | goto free_state_cache; | 128 | goto free_state_cache; |
129 | |||
130 | btrfs_bioset = bioset_create(BIO_POOL_SIZE, | ||
131 | offsetof(struct btrfs_io_bio, bio)); | ||
132 | if (!btrfs_bioset) | ||
133 | goto free_buffer_cache; | ||
128 | return 0; | 134 | return 0; |
129 | 135 | ||
136 | free_buffer_cache: | ||
137 | kmem_cache_destroy(extent_buffer_cache); | ||
138 | extent_buffer_cache = NULL; | ||
139 | |||
130 | free_state_cache: | 140 | free_state_cache: |
131 | kmem_cache_destroy(extent_state_cache); | 141 | kmem_cache_destroy(extent_state_cache); |
142 | extent_state_cache = NULL; | ||
132 | return -ENOMEM; | 143 | return -ENOMEM; |
133 | } | 144 | } |
134 | 145 | ||
@@ -145,6 +156,8 @@ void extent_io_exit(void) | |||
145 | kmem_cache_destroy(extent_state_cache); | 156 | kmem_cache_destroy(extent_state_cache); |
146 | if (extent_buffer_cache) | 157 | if (extent_buffer_cache) |
147 | kmem_cache_destroy(extent_buffer_cache); | 158 | kmem_cache_destroy(extent_buffer_cache); |
159 | if (btrfs_bioset) | ||
160 | bioset_free(btrfs_bioset); | ||
148 | } | 161 | } |
149 | 162 | ||
150 | void extent_io_tree_init(struct extent_io_tree *tree, | 163 | void extent_io_tree_init(struct extent_io_tree *tree, |
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1948 | } | 1961 | } |
1949 | 1962 | ||
1950 | /* | 1963 | /* |
1951 | * helper function to unlock a page if all the extents in the tree | ||
1952 | * for that page are unlocked | ||
1953 | */ | ||
1954 | static void check_page_locked(struct extent_io_tree *tree, struct page *page) | ||
1955 | { | ||
1956 | u64 start = page_offset(page); | ||
1957 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
1958 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) | ||
1959 | unlock_page(page); | ||
1960 | } | ||
1961 | |||
1962 | /* | ||
1963 | * helper function to end page writeback if all the extents | ||
1964 | * in the tree for that page are done with writeback | ||
1965 | */ | ||
1966 | static void check_page_writeback(struct extent_io_tree *tree, | ||
1967 | struct page *page) | ||
1968 | { | ||
1969 | end_page_writeback(page); | ||
1970 | } | ||
1971 | |||
1972 | /* | ||
1973 | * When IO fails, either with EIO or csum verification fails, we | 1964 | * When IO fails, either with EIO or csum verification fails, we |
1974 | * try other mirrors that might have a good copy of the data. This | 1965 | * try other mirrors that might have a good copy of the data. This |
1975 | * io_failure_record is used to record state as we go through all the | 1966 | * io_failure_record is used to record state as we go through all the |
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
2046 | if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) | 2037 | if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) |
2047 | return 0; | 2038 | return 0; |
2048 | 2039 | ||
2049 | bio = bio_alloc(GFP_NOFS, 1); | 2040 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
2050 | if (!bio) | 2041 | if (!bio) |
2051 | return -EIO; | 2042 | return -EIO; |
2052 | bio->bi_private = &compl; | 2043 | bio->bi_private = &compl; |
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
2336 | return -EIO; | 2327 | return -EIO; |
2337 | } | 2328 | } |
2338 | 2329 | ||
2339 | bio = bio_alloc(GFP_NOFS, 1); | 2330 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
2340 | if (!bio) { | 2331 | if (!bio) { |
2341 | free_io_failure(inode, failrec, 0); | 2332 | free_io_failure(inode, failrec, 0); |
2342 | return -EIO; | 2333 | return -EIO; |
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
2398 | struct extent_io_tree *tree; | 2389 | struct extent_io_tree *tree; |
2399 | u64 start; | 2390 | u64 start; |
2400 | u64 end; | 2391 | u64 end; |
2401 | int whole_page; | ||
2402 | 2392 | ||
2403 | do { | 2393 | do { |
2404 | struct page *page = bvec->bv_page; | 2394 | struct page *page = bvec->bv_page; |
2405 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2395 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2406 | 2396 | ||
2407 | start = page_offset(page) + bvec->bv_offset; | 2397 | /* We always issue full-page reads, but if some block |
2408 | end = start + bvec->bv_len - 1; | 2398 | * in a page fails to read, blk_update_request() will |
2399 | * advance bv_offset and adjust bv_len to compensate. | ||
2400 | * Print a warning for nonzero offsets, and an error | ||
2401 | * if they don't add up to a full page. */ | ||
2402 | if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) | ||
2403 | printk("%s page write in btrfs with offset %u and length %u\n", | ||
2404 | bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE | ||
2405 | ? KERN_ERR "partial" : KERN_INFO "incomplete", | ||
2406 | bvec->bv_offset, bvec->bv_len); | ||
2409 | 2407 | ||
2410 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2408 | start = page_offset(page); |
2411 | whole_page = 1; | 2409 | end = start + bvec->bv_offset + bvec->bv_len - 1; |
2412 | else | ||
2413 | whole_page = 0; | ||
2414 | 2410 | ||
2415 | if (--bvec >= bio->bi_io_vec) | 2411 | if (--bvec >= bio->bi_io_vec) |
2416 | prefetchw(&bvec->bv_page->flags); | 2412 | prefetchw(&bvec->bv_page->flags); |
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
2418 | if (end_extent_writepage(page, err, start, end)) | 2414 | if (end_extent_writepage(page, err, start, end)) |
2419 | continue; | 2415 | continue; |
2420 | 2416 | ||
2421 | if (whole_page) | 2417 | end_page_writeback(page); |
2422 | end_page_writeback(page); | ||
2423 | else | ||
2424 | check_page_writeback(tree, page); | ||
2425 | } while (bvec >= bio->bi_io_vec); | 2418 | } while (bvec >= bio->bi_io_vec); |
2426 | 2419 | ||
2427 | bio_put(bio); | 2420 | bio_put(bio); |
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2446 | struct extent_io_tree *tree; | 2439 | struct extent_io_tree *tree; |
2447 | u64 start; | 2440 | u64 start; |
2448 | u64 end; | 2441 | u64 end; |
2449 | int whole_page; | ||
2450 | int mirror; | 2442 | int mirror; |
2451 | int ret; | 2443 | int ret; |
2452 | 2444 | ||
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2457 | struct page *page = bvec->bv_page; | 2449 | struct page *page = bvec->bv_page; |
2458 | struct extent_state *cached = NULL; | 2450 | struct extent_state *cached = NULL; |
2459 | struct extent_state *state; | 2451 | struct extent_state *state; |
2452 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
2460 | 2453 | ||
2461 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2454 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
2462 | "mirror=%ld\n", (u64)bio->bi_sector, err, | 2455 | "mirror=%lu\n", (u64)bio->bi_sector, err, |
2463 | (long int)bio->bi_bdev); | 2456 | io_bio->mirror_num); |
2464 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2457 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2465 | 2458 | ||
2466 | start = page_offset(page) + bvec->bv_offset; | 2459 | /* We always issue full-page reads, but if some block |
2467 | end = start + bvec->bv_len - 1; | 2460 | * in a page fails to read, blk_update_request() will |
2461 | * advance bv_offset and adjust bv_len to compensate. | ||
2462 | * Print a warning for nonzero offsets, and an error | ||
2463 | * if they don't add up to a full page. */ | ||
2464 | if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) | ||
2465 | printk("%s page read in btrfs with offset %u and length %u\n", | ||
2466 | bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE | ||
2467 | ? KERN_ERR "partial" : KERN_INFO "incomplete", | ||
2468 | bvec->bv_offset, bvec->bv_len); | ||
2468 | 2469 | ||
2469 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2470 | start = page_offset(page); |
2470 | whole_page = 1; | 2471 | end = start + bvec->bv_offset + bvec->bv_len - 1; |
2471 | else | ||
2472 | whole_page = 0; | ||
2473 | 2472 | ||
2474 | if (++bvec <= bvec_end) | 2473 | if (++bvec <= bvec_end) |
2475 | prefetchw(&bvec->bv_page->flags); | 2474 | prefetchw(&bvec->bv_page->flags); |
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2485 | } | 2484 | } |
2486 | spin_unlock(&tree->lock); | 2485 | spin_unlock(&tree->lock); |
2487 | 2486 | ||
2488 | mirror = (int)(unsigned long)bio->bi_bdev; | 2487 | mirror = io_bio->mirror_num; |
2489 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 2488 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
2490 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2489 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
2491 | state, mirror); | 2490 | state, mirror); |
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2528 | } | 2527 | } |
2529 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); | 2528 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
2530 | 2529 | ||
2531 | if (whole_page) { | 2530 | if (uptodate) { |
2532 | if (uptodate) { | 2531 | SetPageUptodate(page); |
2533 | SetPageUptodate(page); | ||
2534 | } else { | ||
2535 | ClearPageUptodate(page); | ||
2536 | SetPageError(page); | ||
2537 | } | ||
2538 | unlock_page(page); | ||
2539 | } else { | 2532 | } else { |
2540 | if (uptodate) { | 2533 | ClearPageUptodate(page); |
2541 | check_page_uptodate(tree, page); | 2534 | SetPageError(page); |
2542 | } else { | ||
2543 | ClearPageUptodate(page); | ||
2544 | SetPageError(page); | ||
2545 | } | ||
2546 | check_page_locked(tree, page); | ||
2547 | } | 2535 | } |
2536 | unlock_page(page); | ||
2548 | } while (bvec <= bvec_end); | 2537 | } while (bvec <= bvec_end); |
2549 | 2538 | ||
2550 | bio_put(bio); | 2539 | bio_put(bio); |
2551 | } | 2540 | } |
2552 | 2541 | ||
2542 | /* | ||
2543 | * this allocates from the btrfs_bioset. We're returning a bio right now | ||
2544 | * but you can call btrfs_io_bio for the appropriate container_of magic | ||
2545 | */ | ||
2553 | struct bio * | 2546 | struct bio * |
2554 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 2547 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
2555 | gfp_t gfp_flags) | 2548 | gfp_t gfp_flags) |
2556 | { | 2549 | { |
2557 | struct bio *bio; | 2550 | struct bio *bio; |
2558 | 2551 | ||
2559 | bio = bio_alloc(gfp_flags, nr_vecs); | 2552 | bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); |
2560 | 2553 | ||
2561 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | 2554 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { |
2562 | while (!bio && (nr_vecs /= 2)) | 2555 | while (!bio && (nr_vecs /= 2)) { |
2563 | bio = bio_alloc(gfp_flags, nr_vecs); | 2556 | bio = bio_alloc_bioset(gfp_flags, |
2557 | nr_vecs, btrfs_bioset); | ||
2558 | } | ||
2564 | } | 2559 | } |
2565 | 2560 | ||
2566 | if (bio) { | 2561 | if (bio) { |
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
2571 | return bio; | 2566 | return bio; |
2572 | } | 2567 | } |
2573 | 2568 | ||
2569 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) | ||
2570 | { | ||
2571 | return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | ||
2572 | } | ||
2573 | |||
2574 | |||
2575 | /* this also allocates from the btrfs_bioset */ | ||
2576 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
2577 | { | ||
2578 | return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); | ||
2579 | } | ||
2580 | |||
2581 | |||
2574 | static int __must_check submit_one_bio(int rw, struct bio *bio, | 2582 | static int __must_check submit_one_bio(int rw, struct bio *bio, |
2575 | int mirror_num, unsigned long bio_flags) | 2583 | int mirror_num, unsigned long bio_flags) |
2576 | { | 2584 | { |
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3988 | last_for_get_extent = isize; | 3996 | last_for_get_extent = isize; |
3989 | } | 3997 | } |
3990 | 3998 | ||
3991 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3999 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0, |
3992 | &cached_state); | 4000 | &cached_state); |
3993 | 4001 | ||
3994 | em = get_extent_skip_holes(inode, start, last_for_get_extent, | 4002 | em = get_extent_skip_holes(inode, start, last_for_get_extent, |
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4075 | out_free: | 4083 | out_free: |
4076 | free_extent_map(em); | 4084 | free_extent_map(em); |
4077 | out: | 4085 | out: |
4078 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, | 4086 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, |
4079 | &cached_state, GFP_NOFS); | 4087 | &cached_state, GFP_NOFS); |
4080 | return ret; | 4088 | return ret; |
4081 | } | 4089 | } |