aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-18 14:35:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-18 14:35:28 -0400
commit130901ba33c4a040e12cf7cce240c1056cc096ee (patch)
tree6471f689c88dad8bc1eabf1e404ffda60629abe7 /fs/btrfs/extent_io.c
parente51066824af49c2b1e9e686ee9660f58641c7f36 (diff)
parentc5cb6a0573bef87e098ee3cd946115ebe60a910e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "Miao Xie has been very busy, fixing races and enospc problems and many other small but important pieces. Alexandre Oliva discovered some problems with how our error handling was interacting with the block layer and for now has disabled our partial handling of sub-page writes. The real sub-page work is in a series of patches from IBM that we still need to integrate and test. The code Alexandre has turned off was really incomplete. Josef has more error handling fixes and an important fix for the new skinny extent format. This also has my fix for the tracepoint crash from late in 3.9. It's the first stage in a larger clean up to get rid of btrfs_bio and make a proper bioset for all the items we need to tack into the bio. For now the bioset only holds our mirror_num and stripe_index, but for the next merge window I'll shuffle more in." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits) Btrfs: use a btrfs bioset instead of abusing bio internals Btrfs: make sure roots are assigned before freeing their nodes Btrfs: explicitly use global_block_rsv for quota_tree btrfs: do away with non-whole_page extent I/O Btrfs: don't invoke btrfs_invalidate_inodes() in the spin lock context Btrfs: remove BUG_ON() in btrfs_read_fs_tree_no_radix() Btrfs: pause the space balance when remounting to R/O Btrfs: fix unprotected root node of the subvolume's inode rb-tree Btrfs: fix accessing a freed tree root Btrfs: return errno if possible when we fail to allocate memory Btrfs: update the global reserve if it is empty Btrfs: don't steal the reserved space from the global reserve if their space type is different Btrfs: optimize the error handle of use_block_rsv() Btrfs: don't use global block reservation for inode cache truncation Btrfs: don't abort the current transaction if there is no enough space for inode cache Correct allowed raid levels on balance. Btrfs: fix possible memory leak in replace_path() Btrfs: fix possible memory leak in the find_parent_nodes() Btrfs: don't allow device replace on RAID5/RAID6 Btrfs: handle running extent ops with skinny metadata ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c138
1 files changed, 73 insertions, 65 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 32d67a822e93..e7e7afb4a872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
23 23
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
26 27
27#ifdef CONFIG_BTRFS_DEBUG 28#ifdef CONFIG_BTRFS_DEBUG
28static LIST_HEAD(buffers); 29static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@ int __init extent_io_init(void)
125 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); 126 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
126 if (!extent_buffer_cache) 127 if (!extent_buffer_cache)
127 goto free_state_cache; 128 goto free_state_cache;
129
130 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
131 offsetof(struct btrfs_io_bio, bio));
132 if (!btrfs_bioset)
133 goto free_buffer_cache;
128 return 0; 134 return 0;
129 135
136free_buffer_cache:
137 kmem_cache_destroy(extent_buffer_cache);
138 extent_buffer_cache = NULL;
139
130free_state_cache: 140free_state_cache:
131 kmem_cache_destroy(extent_state_cache); 141 kmem_cache_destroy(extent_state_cache);
142 extent_state_cache = NULL;
132 return -ENOMEM; 143 return -ENOMEM;
133} 144}
134 145
@@ -145,6 +156,8 @@ void extent_io_exit(void)
145 kmem_cache_destroy(extent_state_cache); 156 kmem_cache_destroy(extent_state_cache);
146 if (extent_buffer_cache) 157 if (extent_buffer_cache)
147 kmem_cache_destroy(extent_buffer_cache); 158 kmem_cache_destroy(extent_buffer_cache);
159 if (btrfs_bioset)
160 bioset_free(btrfs_bioset);
148} 161}
149 162
150void extent_io_tree_init(struct extent_io_tree *tree, 163void extent_io_tree_init(struct extent_io_tree *tree,
@@ -1948,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1948} 1961}
1949 1962
1950/* 1963/*
1951 * helper function to unlock a page if all the extents in the tree
1952 * for that page are unlocked
1953 */
1954static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1955{
1956 u64 start = page_offset(page);
1957 u64 end = start + PAGE_CACHE_SIZE - 1;
1958 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1959 unlock_page(page);
1960}
1961
1962/*
1963 * helper function to end page writeback if all the extents
1964 * in the tree for that page are done with writeback
1965 */
1966static void check_page_writeback(struct extent_io_tree *tree,
1967 struct page *page)
1968{
1969 end_page_writeback(page);
1970}
1971
1972/*
1973 * When IO fails, either with EIO or csum verification fails, we 1964 * When IO fails, either with EIO or csum verification fails, we
1974 * try other mirrors that might have a good copy of the data. This 1965 * try other mirrors that might have a good copy of the data. This
1975 * io_failure_record is used to record state as we go through all the 1966 * io_failure_record is used to record state as we go through all the
@@ -2046,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2046 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) 2037 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2047 return 0; 2038 return 0;
2048 2039
2049 bio = bio_alloc(GFP_NOFS, 1); 2040 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2050 if (!bio) 2041 if (!bio)
2051 return -EIO; 2042 return -EIO;
2052 bio->bi_private = &compl; 2043 bio->bi_private = &compl;
@@ -2336,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2336 return -EIO; 2327 return -EIO;
2337 } 2328 }
2338 2329
2339 bio = bio_alloc(GFP_NOFS, 1); 2330 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2340 if (!bio) { 2331 if (!bio) {
2341 free_io_failure(inode, failrec, 0); 2332 free_io_failure(inode, failrec, 0);
2342 return -EIO; 2333 return -EIO;
@@ -2398,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2398 struct extent_io_tree *tree; 2389 struct extent_io_tree *tree;
2399 u64 start; 2390 u64 start;
2400 u64 end; 2391 u64 end;
2401 int whole_page;
2402 2392
2403 do { 2393 do {
2404 struct page *page = bvec->bv_page; 2394 struct page *page = bvec->bv_page;
2405 tree = &BTRFS_I(page->mapping->host)->io_tree; 2395 tree = &BTRFS_I(page->mapping->host)->io_tree;
2406 2396
2407 start = page_offset(page) + bvec->bv_offset; 2397 /* We always issue full-page reads, but if some block
2408 end = start + bvec->bv_len - 1; 2398 * in a page fails to read, blk_update_request() will
2399 * advance bv_offset and adjust bv_len to compensate.
2400 * Print a warning for nonzero offsets, and an error
2401 * if they don't add up to a full page. */
2402 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2403 printk("%s page write in btrfs with offset %u and length %u\n",
2404 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2405 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2406 bvec->bv_offset, bvec->bv_len);
2409 2407
2410 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2408 start = page_offset(page);
2411 whole_page = 1; 2409 end = start + bvec->bv_offset + bvec->bv_len - 1;
2412 else
2413 whole_page = 0;
2414 2410
2415 if (--bvec >= bio->bi_io_vec) 2411 if (--bvec >= bio->bi_io_vec)
2416 prefetchw(&bvec->bv_page->flags); 2412 prefetchw(&bvec->bv_page->flags);
@@ -2418,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2418 if (end_extent_writepage(page, err, start, end)) 2414 if (end_extent_writepage(page, err, start, end))
2419 continue; 2415 continue;
2420 2416
2421 if (whole_page) 2417 end_page_writeback(page);
2422 end_page_writeback(page);
2423 else
2424 check_page_writeback(tree, page);
2425 } while (bvec >= bio->bi_io_vec); 2418 } while (bvec >= bio->bi_io_vec);
2426 2419
2427 bio_put(bio); 2420 bio_put(bio);
@@ -2446,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2446 struct extent_io_tree *tree; 2439 struct extent_io_tree *tree;
2447 u64 start; 2440 u64 start;
2448 u64 end; 2441 u64 end;
2449 int whole_page;
2450 int mirror; 2442 int mirror;
2451 int ret; 2443 int ret;
2452 2444
@@ -2457,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2457 struct page *page = bvec->bv_page; 2449 struct page *page = bvec->bv_page;
2458 struct extent_state *cached = NULL; 2450 struct extent_state *cached = NULL;
2459 struct extent_state *state; 2451 struct extent_state *state;
2452 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2460 2453
2461 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2454 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2462 "mirror=%ld\n", (u64)bio->bi_sector, err, 2455 "mirror=%lu\n", (u64)bio->bi_sector, err,
2463 (long int)bio->bi_bdev); 2456 io_bio->mirror_num);
2464 tree = &BTRFS_I(page->mapping->host)->io_tree; 2457 tree = &BTRFS_I(page->mapping->host)->io_tree;
2465 2458
2466 start = page_offset(page) + bvec->bv_offset; 2459 /* We always issue full-page reads, but if some block
2467 end = start + bvec->bv_len - 1; 2460 * in a page fails to read, blk_update_request() will
2461 * advance bv_offset and adjust bv_len to compensate.
2462 * Print a warning for nonzero offsets, and an error
2463 * if they don't add up to a full page. */
2464 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
2465 printk("%s page read in btrfs with offset %u and length %u\n",
2466 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
2467 ? KERN_ERR "partial" : KERN_INFO "incomplete",
2468 bvec->bv_offset, bvec->bv_len);
2468 2469
2469 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2470 start = page_offset(page);
2470 whole_page = 1; 2471 end = start + bvec->bv_offset + bvec->bv_len - 1;
2471 else
2472 whole_page = 0;
2473 2472
2474 if (++bvec <= bvec_end) 2473 if (++bvec <= bvec_end)
2475 prefetchw(&bvec->bv_page->flags); 2474 prefetchw(&bvec->bv_page->flags);
@@ -2485,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2485 } 2484 }
2486 spin_unlock(&tree->lock); 2485 spin_unlock(&tree->lock);
2487 2486
2488 mirror = (int)(unsigned long)bio->bi_bdev; 2487 mirror = io_bio->mirror_num;
2489 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2488 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2490 ret = tree->ops->readpage_end_io_hook(page, start, end, 2489 ret = tree->ops->readpage_end_io_hook(page, start, end,
2491 state, mirror); 2490 state, mirror);
@@ -2528,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2528 } 2527 }
2529 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); 2528 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2530 2529
2531 if (whole_page) { 2530 if (uptodate) {
2532 if (uptodate) { 2531 SetPageUptodate(page);
2533 SetPageUptodate(page);
2534 } else {
2535 ClearPageUptodate(page);
2536 SetPageError(page);
2537 }
2538 unlock_page(page);
2539 } else { 2532 } else {
2540 if (uptodate) { 2533 ClearPageUptodate(page);
2541 check_page_uptodate(tree, page); 2534 SetPageError(page);
2542 } else {
2543 ClearPageUptodate(page);
2544 SetPageError(page);
2545 }
2546 check_page_locked(tree, page);
2547 } 2535 }
2536 unlock_page(page);
2548 } while (bvec <= bvec_end); 2537 } while (bvec <= bvec_end);
2549 2538
2550 bio_put(bio); 2539 bio_put(bio);
2551} 2540}
2552 2541
2542/*
2543 * this allocates from the btrfs_bioset. We're returning a bio right now
2544 * but you can call btrfs_io_bio for the appropriate container_of magic
2545 */
2553struct bio * 2546struct bio *
2554btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2547btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2555 gfp_t gfp_flags) 2548 gfp_t gfp_flags)
2556{ 2549{
2557 struct bio *bio; 2550 struct bio *bio;
2558 2551
2559 bio = bio_alloc(gfp_flags, nr_vecs); 2552 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2560 2553
2561 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 2554 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2562 while (!bio && (nr_vecs /= 2)) 2555 while (!bio && (nr_vecs /= 2)) {
2563 bio = bio_alloc(gfp_flags, nr_vecs); 2556 bio = bio_alloc_bioset(gfp_flags,
2557 nr_vecs, btrfs_bioset);
2558 }
2564 } 2559 }
2565 2560
2566 if (bio) { 2561 if (bio) {
@@ -2571,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2571 return bio; 2566 return bio;
2572} 2567}
2573 2568
2569struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2570{
2571 return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2572}
2573
2574
2575/* this also allocates from the btrfs_bioset */
2576struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2577{
2578 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2579}
2580
2581
2574static int __must_check submit_one_bio(int rw, struct bio *bio, 2582static int __must_check submit_one_bio(int rw, struct bio *bio,
2575 int mirror_num, unsigned long bio_flags) 2583 int mirror_num, unsigned long bio_flags)
2576{ 2584{
@@ -3988,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3988 last_for_get_extent = isize; 3996 last_for_get_extent = isize;
3989 } 3997 }
3990 3998
3991 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3999 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
3992 &cached_state); 4000 &cached_state);
3993 4001
3994 em = get_extent_skip_holes(inode, start, last_for_get_extent, 4002 em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4075,7 +4083,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4075out_free: 4083out_free:
4076 free_extent_map(em); 4084 free_extent_map(em);
4077out: 4085out:
4078 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, 4086 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4079 &cached_state, GFP_NOFS); 4087 &cached_state, GFP_NOFS);
4080 return ret; 4088 return ret;
4081} 4089}