aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-04-17 11:29:12 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:01 -0400
commit3b951516ed703af0f6d82053937655ad69b60864 (patch)
tree616e648b8683e6bb2996c7ce6bafb8f592244698
parent7b859fe7cd383fa5e7e379884a748680d0af7bcc (diff)
Btrfs: Use the extent map cache to find the logical disk block during data retries
The data read retry code needs to find the logical disk block before it can resubmit new bios. But, finding this block isn't allowed to take the fs_mutex because that will deadlock with a number of different callers. This changes the retry code to use the extent map cache instead, but that requires the extent map cache to have the extent we're looking for. This is a problem because btrfs_drop_extent_cache just drops the entire extent instead of the little tiny part it is invalidating. The bulk of the code in this patch changes btrfs_drop_extent_cache to invalidate only a portion of the extent cache, and changes btrfs_get_extent to deal with the results. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c1
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/btrfs/file.c47
-rw-r--r--fs/btrfs/inode.c120
-rw-r--r--fs/btrfs/volumes.c3
5 files changed, 168 insertions, 15 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 76fd5d7146e1..593011e5d455 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1747,6 +1747,7 @@ again:
1747 search_start, search_end, hint_byte, ins, 1747 search_start, search_end, hint_byte, ins,
1748 trans->alloc_exclude_start, 1748 trans->alloc_exclude_start,
1749 trans->alloc_exclude_nr, data); 1749 trans->alloc_exclude_nr, data);
1750
1750 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 1751 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1751 num_bytes = num_bytes >> 1; 1752 num_bytes = num_bytes >> 1;
1752 num_bytes = max(num_bytes, min_alloc_size); 1753 num_bytes = max(num_bytes, min_alloc_size);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 88322684be6a..21597bea21fd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1025,7 +1025,8 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
1025search_again: 1025search_again:
1026 node = tree_search(tree, cur_start); 1026 node = tree_search(tree, cur_start);
1027 if (!node) { 1027 if (!node) {
1028 *end = (u64)-1; 1028 if (!found)
1029 *end = (u64)-1;
1029 goto out; 1030 goto out;
1030 } 1031 }
1031 1032
@@ -1540,6 +1541,8 @@ static int end_bio_extent_readpage(struct bio *bio,
1540 start, end, state); 1541 start, end, state);
1541 if (ret == 0) { 1542 if (ret == 0) {
1542 state = NULL; 1543 state = NULL;
1544 uptodate =
1545 test_bit(BIO_UPTODATE, &bio->bi_flags);
1543 continue; 1546 continue;
1544 } 1547 }
1545 } 1548 }
@@ -1555,10 +1558,11 @@ static int end_bio_extent_readpage(struct bio *bio,
1555 !(state->state & EXTENT_LOCKED)) 1558 !(state->state & EXTENT_LOCKED))
1556 state = NULL; 1559 state = NULL;
1557 } 1560 }
1558 if (!state && uptodate) { 1561 if (!state) {
1559 spin_unlock_irqrestore(&tree->lock, flags); 1562 spin_unlock_irqrestore(&tree->lock, flags);
1560 set_extent_uptodate(tree, start, end, 1563 if (uptodate)
1561 GFP_ATOMIC); 1564 set_extent_uptodate(tree, start, end,
1565 GFP_ATOMIC);
1562 unlock_extent(tree, start, end, GFP_ATOMIC); 1566 unlock_extent(tree, start, end, GFP_ATOMIC);
1563 goto next_io; 1567 goto next_io;
1564 } 1568 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9fbda6552069..3f5525f0834c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -356,12 +356,23 @@ out_unlock:
356int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) 356int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
357{ 357{
358 struct extent_map *em; 358 struct extent_map *em;
359 struct extent_map *split = NULL;
360 struct extent_map *split2 = NULL;
359 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 361 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
360 u64 len = end - start + 1; 362 u64 len = end - start + 1;
363 int ret;
364 int testend = 1;
361 365
362 if (end == (u64)-1) 366 if (end == (u64)-1) {
363 len = (u64)-1; 367 len = (u64)-1;
368 testend = 0;
369 }
364 while(1) { 370 while(1) {
371 if (!split)
372 split = alloc_extent_map(GFP_NOFS);
373 if (!split2)
374 split2 = alloc_extent_map(GFP_NOFS);
375
365 spin_lock(&em_tree->lock); 376 spin_lock(&em_tree->lock);
366 em = lookup_extent_mapping(em_tree, start, len); 377 em = lookup_extent_mapping(em_tree, start, len);
367 if (!em) { 378 if (!em) {
@@ -369,6 +380,36 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
369 break; 380 break;
370 } 381 }
371 remove_extent_mapping(em_tree, em); 382 remove_extent_mapping(em_tree, em);
383
384 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
385 em->start < start) {
386 split->start = em->start;
387 split->len = start - em->start;
388 split->block_start = em->block_start;
389 split->bdev = em->bdev;
390 split->flags = em->flags;
391 ret = add_extent_mapping(em_tree, split);
392 BUG_ON(ret);
393 free_extent_map(split);
394 split = split2;
395 split2 = NULL;
396 }
397 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
398 testend && em->start + em->len > start + len) {
399 u64 diff = start + len - em->start;
400
401 split->start = start + len;
402 split->len = em->start + em->len - (start + len);
403 split->bdev = em->bdev;
404 split->flags = em->flags;
405
406 split->block_start = em->block_start + diff;
407
408 ret = add_extent_mapping(em_tree, split);
409 BUG_ON(ret);
410 free_extent_map(split);
411 split = NULL;
412 }
372 spin_unlock(&em_tree->lock); 413 spin_unlock(&em_tree->lock);
373 414
374 /* once for us */ 415 /* once for us */
@@ -376,6 +417,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
376 /* once for the tree*/ 417 /* once for the tree*/
377 free_extent_map(em); 418 free_extent_map(em);
378 } 419 }
420 if (split)
421 free_extent_map(split);
422 if (split2)
423 free_extent_map(split2);
379 return 0; 424 return 0;
380} 425}
381 426
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5632ea760077..40f8da884090 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
122 if (alloc_hint == EXTENT_MAP_INLINE) 122 if (alloc_hint == EXTENT_MAP_INLINE)
123 goto out; 123 goto out;
124 124
125 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
126
125 while(num_bytes > 0) { 127 while(num_bytes > 0) {
126 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 128 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
127 ret = btrfs_alloc_extent(trans, root, cur_alloc_size, 129 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
@@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
140 ins.offset); 142 ins.offset);
141 inode->i_blocks += ins.offset >> 9; 143 inode->i_blocks += ins.offset >> 9;
142 btrfs_check_file(root, inode); 144 btrfs_check_file(root, inode);
145 if (num_bytes < cur_alloc_size) {
146 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
147 cur_alloc_size);
148 break;
149 }
143 num_bytes -= cur_alloc_size; 150 num_bytes -= cur_alloc_size;
144 alloc_hint = ins.objectid + ins.offset; 151 alloc_hint = ins.objectid + ins.offset;
145 start += cur_alloc_size; 152 start += cur_alloc_size;
@@ -427,6 +434,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
427 struct extent_map *em; 434 struct extent_map *em;
428 struct inode *inode = page->mapping->host; 435 struct inode *inode = page->mapping->host;
429 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 436 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
437 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
430 struct bio *bio; 438 struct bio *bio;
431 int num_copies; 439 int num_copies;
432 int ret; 440 int ret;
@@ -434,7 +442,6 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
434 442
435 ret = get_state_private(failure_tree, start, &private); 443 ret = get_state_private(failure_tree, start, &private);
436 if (ret) { 444 if (ret) {
437 size_t pg_offset = start - page_offset(page);
438 failrec = kmalloc(sizeof(*failrec), GFP_NOFS); 445 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
439 if (!failrec) 446 if (!failrec)
440 return -ENOMEM; 447 return -ENOMEM;
@@ -442,8 +449,13 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
442 failrec->len = end - start + 1; 449 failrec->len = end - start + 1;
443 failrec->last_mirror = 0; 450 failrec->last_mirror = 0;
444 451
445 em = btrfs_get_extent(inode, NULL, pg_offset, start, 452 spin_lock(&em_tree->lock);
446 failrec->len, 0); 453 em = lookup_extent_mapping(em_tree, start, failrec->len);
454 if (em->start > start || em->start + em->len < start) {
455 free_extent_map(em);
456 em = NULL;
457 }
458 spin_unlock(&em_tree->lock);
447 459
448 if (!em || IS_ERR(em)) { 460 if (!em || IS_ERR(em)) {
449 kfree(failrec); 461 kfree(failrec);
@@ -559,6 +571,8 @@ zeroit:
559 flush_dcache_page(page); 571 flush_dcache_page(page);
560 kunmap_atomic(kaddr, KM_IRQ0); 572 kunmap_atomic(kaddr, KM_IRQ0);
561 local_irq_restore(flags); 573 local_irq_restore(flags);
574 if (private == 0)
575 return 0;
562 return -EIO; 576 return -EIO;
563} 577}
564 578
@@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
908 int pending_del_nr = 0; 922 int pending_del_nr = 0;
909 int pending_del_slot = 0; 923 int pending_del_slot = 0;
910 int extent_type = -1; 924 int extent_type = -1;
925 u64 mask = root->sectorsize - 1;
911 926
912 btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); 927 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
913 path = btrfs_alloc_path(); 928 path = btrfs_alloc_path();
914 path->reada = -1; 929 path->reada = -1;
915 BUG_ON(!path); 930 BUG_ON(!path);
@@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1212 hole_start, 0, 0, 1227 hole_start, 0, 0,
1213 hole_size); 1228 hole_size);
1214 btrfs_drop_extent_cache(inode, hole_start, 1229 btrfs_drop_extent_cache(inode, hole_start,
1215 hole_size - 1); 1230 (u64)-1);
1216 btrfs_check_file(root, inode); 1231 btrfs_check_file(root, inode);
1217 } 1232 }
1218 btrfs_end_transaction(trans, root); 1233 btrfs_end_transaction(trans, root);
@@ -2083,6 +2098,68 @@ out_unlock:
2083 return err; 2098 return err;
2084} 2099}
2085 2100
2101static int merge_extent_mapping(struct extent_map_tree *em_tree,
2102 struct extent_map *existing,
2103 struct extent_map *em)
2104{
2105 u64 start_diff;
2106 u64 new_end;
2107 int ret = 0;
2108 int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2109
2110 if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2111 goto invalid;
2112
2113 if (!real_blocks && em->block_start != existing->block_start)
2114 goto invalid;
2115
2116 new_end = max(existing->start + existing->len, em->start + em->len);
2117
2118 if (existing->start >= em->start) {
2119 if (em->start + em->len < existing->start)
2120 goto invalid;
2121
2122 start_diff = existing->start - em->start;
2123 if (real_blocks && em->block_start + start_diff !=
2124 existing->block_start)
2125 goto invalid;
2126
2127 em->len = new_end - em->start;
2128
2129 remove_extent_mapping(em_tree, existing);
2130 /* free for the tree */
2131 free_extent_map(existing);
2132 ret = add_extent_mapping(em_tree, em);
2133
2134 } else if (em->start > existing->start) {
2135
2136 if (existing->start + existing->len < em->start)
2137 goto invalid;
2138
2139 start_diff = em->start - existing->start;
2140 if (real_blocks && existing->block_start + start_diff !=
2141 em->block_start)
2142 goto invalid;
2143
2144 remove_extent_mapping(em_tree, existing);
2145 em->block_start = existing->block_start;
2146 em->start = existing->start;
2147 em->len = new_end - existing->start;
2148 free_extent_map(existing);
2149
2150 ret = add_extent_mapping(em_tree, em);
2151 } else {
2152 goto invalid;
2153 }
2154 return ret;
2155
2156invalid:
2157 printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2158 existing->start, existing->len, existing->block_start,
2159 em->start, em->len, em->block_start);
2160 return -EIO;
2161}
2162
2086struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2163struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2087 size_t pg_offset, u64 start, u64 len, 2164 size_t pg_offset, u64 start, u64 len,
2088 int create) 2165 int create)
@@ -2267,12 +2344,35 @@ insert:
2267 err = 0; 2344 err = 0;
2268 spin_lock(&em_tree->lock); 2345 spin_lock(&em_tree->lock);
2269 ret = add_extent_mapping(em_tree, em); 2346 ret = add_extent_mapping(em_tree, em);
2347
2348 /* it is possible that someone inserted the extent into the tree
2349 * while we had the lock dropped. It is also possible that
2350 * an overlapping map exists in the tree
2351 */
2270 if (ret == -EEXIST) { 2352 if (ret == -EEXIST) {
2271 free_extent_map(em); 2353 struct extent_map *existing;
2272 em = lookup_extent_mapping(em_tree, start, len); 2354 existing = lookup_extent_mapping(em_tree, start, len);
2273 if (!em) { 2355 if (!existing) {
2274 err = -EIO; 2356 existing = lookup_extent_mapping(em_tree, em->start,
2275 printk("failing to insert %Lu %Lu\n", start, len); 2357 em->len);
2358 if (existing) {
2359 err = merge_extent_mapping(em_tree, existing,
2360 em);
2361 free_extent_map(existing);
2362 if (err) {
2363 free_extent_map(em);
2364 em = NULL;
2365 }
2366 } else {
2367 err = -EIO;
2368 printk("failing to insert %Lu %Lu\n",
2369 start, len);
2370 free_extent_map(em);
2371 em = NULL;
2372 }
2373 } else {
2374 free_extent_map(em);
2375 em = existing;
2276 } 2376 }
2277 } 2377 }
2278 spin_unlock(&em_tree->lock); 2378 spin_unlock(&em_tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e6417a573d44..0e658c1d8211 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -883,6 +883,9 @@ again:
883 spin_lock(&em_tree->lock); 883 spin_lock(&em_tree->lock);
884 em = lookup_extent_mapping(em_tree, logical, *length); 884 em = lookup_extent_mapping(em_tree, logical, *length);
885 spin_unlock(&em_tree->lock); 885 spin_unlock(&em_tree->lock);
886 if (!em) {
887 printk("unable to find logical %Lu\n", logical);
888 }
886 BUG_ON(!em); 889 BUG_ON(!em);
887 890
888 BUG_ON(em->start > logical || em->start + em->len < logical); 891 BUG_ON(em->start > logical || em->start + em->len < logical);