diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-04-17 11:29:12 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:01 -0400 |
commit | 3b951516ed703af0f6d82053937655ad69b60864 (patch) | |
tree | 616e648b8683e6bb2996c7ce6bafb8f592244698 /fs | |
parent | 7b859fe7cd383fa5e7e379884a748680d0af7bcc (diff) |
Btrfs: Use the extent map cache to find the logical disk block during data retries
The data read retry code needs to find the logical disk block before it
can resubmit new bios. But, finding this block isn't allowed to take
the fs_mutex because that will deadlock with a number of different callers.
This changes the retry code to use the extent map cache instead, but
that requires the extent map cache to have the extent we're looking for.
This is a problem because btrfs_drop_extent_cache just drops the entire
extent instead of the little tiny part it is invalidating.
The bulk of the code in this patch changes btrfs_drop_extent_cache to
invalidate only a portion of the extent cache, and changes btrfs_get_extent
to deal with the results.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/extent-tree.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 12 | ||||
-rw-r--r-- | fs/btrfs/file.c | 47 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 120 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 3 |
5 files changed, 168 insertions, 15 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 76fd5d7146e1..593011e5d455 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1747,6 +1747,7 @@ again: | |||
1747 | search_start, search_end, hint_byte, ins, | 1747 | search_start, search_end, hint_byte, ins, |
1748 | trans->alloc_exclude_start, | 1748 | trans->alloc_exclude_start, |
1749 | trans->alloc_exclude_nr, data); | 1749 | trans->alloc_exclude_nr, data); |
1750 | |||
1750 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 1751 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
1751 | num_bytes = num_bytes >> 1; | 1752 | num_bytes = num_bytes >> 1; |
1752 | num_bytes = max(num_bytes, min_alloc_size); | 1753 | num_bytes = max(num_bytes, min_alloc_size); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 88322684be6a..21597bea21fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1025,7 +1025,8 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, | |||
1025 | search_again: | 1025 | search_again: |
1026 | node = tree_search(tree, cur_start); | 1026 | node = tree_search(tree, cur_start); |
1027 | if (!node) { | 1027 | if (!node) { |
1028 | *end = (u64)-1; | 1028 | if (!found) |
1029 | *end = (u64)-1; | ||
1029 | goto out; | 1030 | goto out; |
1030 | } | 1031 | } |
1031 | 1032 | ||
@@ -1540,6 +1541,8 @@ static int end_bio_extent_readpage(struct bio *bio, | |||
1540 | start, end, state); | 1541 | start, end, state); |
1541 | if (ret == 0) { | 1542 | if (ret == 0) { |
1542 | state = NULL; | 1543 | state = NULL; |
1544 | uptodate = | ||
1545 | test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
1543 | continue; | 1546 | continue; |
1544 | } | 1547 | } |
1545 | } | 1548 | } |
@@ -1555,10 +1558,11 @@ static int end_bio_extent_readpage(struct bio *bio, | |||
1555 | !(state->state & EXTENT_LOCKED)) | 1558 | !(state->state & EXTENT_LOCKED)) |
1556 | state = NULL; | 1559 | state = NULL; |
1557 | } | 1560 | } |
1558 | if (!state && uptodate) { | 1561 | if (!state) { |
1559 | spin_unlock_irqrestore(&tree->lock, flags); | 1562 | spin_unlock_irqrestore(&tree->lock, flags); |
1560 | set_extent_uptodate(tree, start, end, | 1563 | if (uptodate) |
1561 | GFP_ATOMIC); | 1564 | set_extent_uptodate(tree, start, end, |
1565 | GFP_ATOMIC); | ||
1562 | unlock_extent(tree, start, end, GFP_ATOMIC); | 1566 | unlock_extent(tree, start, end, GFP_ATOMIC); |
1563 | goto next_io; | 1567 | goto next_io; |
1564 | } | 1568 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9fbda6552069..3f5525f0834c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -356,12 +356,23 @@ out_unlock: | |||
356 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) | 356 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) |
357 | { | 357 | { |
358 | struct extent_map *em; | 358 | struct extent_map *em; |
359 | struct extent_map *split = NULL; | ||
360 | struct extent_map *split2 = NULL; | ||
359 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 361 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
360 | u64 len = end - start + 1; | 362 | u64 len = end - start + 1; |
363 | int ret; | ||
364 | int testend = 1; | ||
361 | 365 | ||
362 | if (end == (u64)-1) | 366 | if (end == (u64)-1) { |
363 | len = (u64)-1; | 367 | len = (u64)-1; |
368 | testend = 0; | ||
369 | } | ||
364 | while(1) { | 370 | while(1) { |
371 | if (!split) | ||
372 | split = alloc_extent_map(GFP_NOFS); | ||
373 | if (!split2) | ||
374 | split2 = alloc_extent_map(GFP_NOFS); | ||
375 | |||
365 | spin_lock(&em_tree->lock); | 376 | spin_lock(&em_tree->lock); |
366 | em = lookup_extent_mapping(em_tree, start, len); | 377 | em = lookup_extent_mapping(em_tree, start, len); |
367 | if (!em) { | 378 | if (!em) { |
@@ -369,6 +380,36 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) | |||
369 | break; | 380 | break; |
370 | } | 381 | } |
371 | remove_extent_mapping(em_tree, em); | 382 | remove_extent_mapping(em_tree, em); |
383 | |||
384 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | ||
385 | em->start < start) { | ||
386 | split->start = em->start; | ||
387 | split->len = start - em->start; | ||
388 | split->block_start = em->block_start; | ||
389 | split->bdev = em->bdev; | ||
390 | split->flags = em->flags; | ||
391 | ret = add_extent_mapping(em_tree, split); | ||
392 | BUG_ON(ret); | ||
393 | free_extent_map(split); | ||
394 | split = split2; | ||
395 | split2 = NULL; | ||
396 | } | ||
397 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | ||
398 | testend && em->start + em->len > start + len) { | ||
399 | u64 diff = start + len - em->start; | ||
400 | |||
401 | split->start = start + len; | ||
402 | split->len = em->start + em->len - (start + len); | ||
403 | split->bdev = em->bdev; | ||
404 | split->flags = em->flags; | ||
405 | |||
406 | split->block_start = em->block_start + diff; | ||
407 | |||
408 | ret = add_extent_mapping(em_tree, split); | ||
409 | BUG_ON(ret); | ||
410 | free_extent_map(split); | ||
411 | split = NULL; | ||
412 | } | ||
372 | spin_unlock(&em_tree->lock); | 413 | spin_unlock(&em_tree->lock); |
373 | 414 | ||
374 | /* once for us */ | 415 | /* once for us */ |
@@ -376,6 +417,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) | |||
376 | /* once for the tree*/ | 417 | /* once for the tree*/ |
377 | free_extent_map(em); | 418 | free_extent_map(em); |
378 | } | 419 | } |
420 | if (split) | ||
421 | free_extent_map(split); | ||
422 | if (split2) | ||
423 | free_extent_map(split2); | ||
379 | return 0; | 424 | return 0; |
380 | } | 425 | } |
381 | 426 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5632ea760077..40f8da884090 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) | |||
122 | if (alloc_hint == EXTENT_MAP_INLINE) | 122 | if (alloc_hint == EXTENT_MAP_INLINE) |
123 | goto out; | 123 | goto out; |
124 | 124 | ||
125 | BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); | ||
126 | |||
125 | while(num_bytes > 0) { | 127 | while(num_bytes > 0) { |
126 | cur_alloc_size = min(num_bytes, root->fs_info->max_extent); | 128 | cur_alloc_size = min(num_bytes, root->fs_info->max_extent); |
127 | ret = btrfs_alloc_extent(trans, root, cur_alloc_size, | 129 | ret = btrfs_alloc_extent(trans, root, cur_alloc_size, |
@@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) | |||
140 | ins.offset); | 142 | ins.offset); |
141 | inode->i_blocks += ins.offset >> 9; | 143 | inode->i_blocks += ins.offset >> 9; |
142 | btrfs_check_file(root, inode); | 144 | btrfs_check_file(root, inode); |
145 | if (num_bytes < cur_alloc_size) { | ||
146 | printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, | ||
147 | cur_alloc_size); | ||
148 | break; | ||
149 | } | ||
143 | num_bytes -= cur_alloc_size; | 150 | num_bytes -= cur_alloc_size; |
144 | alloc_hint = ins.objectid + ins.offset; | 151 | alloc_hint = ins.objectid + ins.offset; |
145 | start += cur_alloc_size; | 152 | start += cur_alloc_size; |
@@ -427,6 +434,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | |||
427 | struct extent_map *em; | 434 | struct extent_map *em; |
428 | struct inode *inode = page->mapping->host; | 435 | struct inode *inode = page->mapping->host; |
429 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 436 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
437 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
430 | struct bio *bio; | 438 | struct bio *bio; |
431 | int num_copies; | 439 | int num_copies; |
432 | int ret; | 440 | int ret; |
@@ -434,7 +442,6 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | |||
434 | 442 | ||
435 | ret = get_state_private(failure_tree, start, &private); | 443 | ret = get_state_private(failure_tree, start, &private); |
436 | if (ret) { | 444 | if (ret) { |
437 | size_t pg_offset = start - page_offset(page); | ||
438 | failrec = kmalloc(sizeof(*failrec), GFP_NOFS); | 445 | failrec = kmalloc(sizeof(*failrec), GFP_NOFS); |
439 | if (!failrec) | 446 | if (!failrec) |
440 | return -ENOMEM; | 447 | return -ENOMEM; |
@@ -442,8 +449,13 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio, | |||
442 | failrec->len = end - start + 1; | 449 | failrec->len = end - start + 1; |
443 | failrec->last_mirror = 0; | 450 | failrec->last_mirror = 0; |
444 | 451 | ||
445 | em = btrfs_get_extent(inode, NULL, pg_offset, start, | 452 | spin_lock(&em_tree->lock); |
446 | failrec->len, 0); | 453 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
454 | if (em->start > start || em->start + em->len < start) { | ||
455 | free_extent_map(em); | ||
456 | em = NULL; | ||
457 | } | ||
458 | spin_unlock(&em_tree->lock); | ||
447 | 459 | ||
448 | if (!em || IS_ERR(em)) { | 460 | if (!em || IS_ERR(em)) { |
449 | kfree(failrec); | 461 | kfree(failrec); |
@@ -559,6 +571,8 @@ zeroit: | |||
559 | flush_dcache_page(page); | 571 | flush_dcache_page(page); |
560 | kunmap_atomic(kaddr, KM_IRQ0); | 572 | kunmap_atomic(kaddr, KM_IRQ0); |
561 | local_irq_restore(flags); | 573 | local_irq_restore(flags); |
574 | if (private == 0) | ||
575 | return 0; | ||
562 | return -EIO; | 576 | return -EIO; |
563 | } | 577 | } |
564 | 578 | ||
@@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, | |||
908 | int pending_del_nr = 0; | 922 | int pending_del_nr = 0; |
909 | int pending_del_slot = 0; | 923 | int pending_del_slot = 0; |
910 | int extent_type = -1; | 924 | int extent_type = -1; |
925 | u64 mask = root->sectorsize - 1; | ||
911 | 926 | ||
912 | btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); | 927 | btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); |
913 | path = btrfs_alloc_path(); | 928 | path = btrfs_alloc_path(); |
914 | path->reada = -1; | 929 | path->reada = -1; |
915 | BUG_ON(!path); | 930 | BUG_ON(!path); |
@@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
1212 | hole_start, 0, 0, | 1227 | hole_start, 0, 0, |
1213 | hole_size); | 1228 | hole_size); |
1214 | btrfs_drop_extent_cache(inode, hole_start, | 1229 | btrfs_drop_extent_cache(inode, hole_start, |
1215 | hole_size - 1); | 1230 | (u64)-1); |
1216 | btrfs_check_file(root, inode); | 1231 | btrfs_check_file(root, inode); |
1217 | } | 1232 | } |
1218 | btrfs_end_transaction(trans, root); | 1233 | btrfs_end_transaction(trans, root); |
@@ -2083,6 +2098,68 @@ out_unlock: | |||
2083 | return err; | 2098 | return err; |
2084 | } | 2099 | } |
2085 | 2100 | ||
2101 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | ||
2102 | struct extent_map *existing, | ||
2103 | struct extent_map *em) | ||
2104 | { | ||
2105 | u64 start_diff; | ||
2106 | u64 new_end; | ||
2107 | int ret = 0; | ||
2108 | int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE; | ||
2109 | |||
2110 | if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE) | ||
2111 | goto invalid; | ||
2112 | |||
2113 | if (!real_blocks && em->block_start != existing->block_start) | ||
2114 | goto invalid; | ||
2115 | |||
2116 | new_end = max(existing->start + existing->len, em->start + em->len); | ||
2117 | |||
2118 | if (existing->start >= em->start) { | ||
2119 | if (em->start + em->len < existing->start) | ||
2120 | goto invalid; | ||
2121 | |||
2122 | start_diff = existing->start - em->start; | ||
2123 | if (real_blocks && em->block_start + start_diff != | ||
2124 | existing->block_start) | ||
2125 | goto invalid; | ||
2126 | |||
2127 | em->len = new_end - em->start; | ||
2128 | |||
2129 | remove_extent_mapping(em_tree, existing); | ||
2130 | /* free for the tree */ | ||
2131 | free_extent_map(existing); | ||
2132 | ret = add_extent_mapping(em_tree, em); | ||
2133 | |||
2134 | } else if (em->start > existing->start) { | ||
2135 | |||
2136 | if (existing->start + existing->len < em->start) | ||
2137 | goto invalid; | ||
2138 | |||
2139 | start_diff = em->start - existing->start; | ||
2140 | if (real_blocks && existing->block_start + start_diff != | ||
2141 | em->block_start) | ||
2142 | goto invalid; | ||
2143 | |||
2144 | remove_extent_mapping(em_tree, existing); | ||
2145 | em->block_start = existing->block_start; | ||
2146 | em->start = existing->start; | ||
2147 | em->len = new_end - existing->start; | ||
2148 | free_extent_map(existing); | ||
2149 | |||
2150 | ret = add_extent_mapping(em_tree, em); | ||
2151 | } else { | ||
2152 | goto invalid; | ||
2153 | } | ||
2154 | return ret; | ||
2155 | |||
2156 | invalid: | ||
2157 | printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n", | ||
2158 | existing->start, existing->len, existing->block_start, | ||
2159 | em->start, em->len, em->block_start); | ||
2160 | return -EIO; | ||
2161 | } | ||
2162 | |||
2086 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 2163 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
2087 | size_t pg_offset, u64 start, u64 len, | 2164 | size_t pg_offset, u64 start, u64 len, |
2088 | int create) | 2165 | int create) |
@@ -2267,12 +2344,35 @@ insert: | |||
2267 | err = 0; | 2344 | err = 0; |
2268 | spin_lock(&em_tree->lock); | 2345 | spin_lock(&em_tree->lock); |
2269 | ret = add_extent_mapping(em_tree, em); | 2346 | ret = add_extent_mapping(em_tree, em); |
2347 | |||
2348 | /* it is possible that someone inserted the extent into the tree | ||
2349 | * while we had the lock dropped. It is also possible that | ||
2350 | * an overlapping map exists in the tree | ||
2351 | */ | ||
2270 | if (ret == -EEXIST) { | 2352 | if (ret == -EEXIST) { |
2271 | free_extent_map(em); | 2353 | struct extent_map *existing; |
2272 | em = lookup_extent_mapping(em_tree, start, len); | 2354 | existing = lookup_extent_mapping(em_tree, start, len); |
2273 | if (!em) { | 2355 | if (!existing) { |
2274 | err = -EIO; | 2356 | existing = lookup_extent_mapping(em_tree, em->start, |
2275 | printk("failing to insert %Lu %Lu\n", start, len); | 2357 | em->len); |
2358 | if (existing) { | ||
2359 | err = merge_extent_mapping(em_tree, existing, | ||
2360 | em); | ||
2361 | free_extent_map(existing); | ||
2362 | if (err) { | ||
2363 | free_extent_map(em); | ||
2364 | em = NULL; | ||
2365 | } | ||
2366 | } else { | ||
2367 | err = -EIO; | ||
2368 | printk("failing to insert %Lu %Lu\n", | ||
2369 | start, len); | ||
2370 | free_extent_map(em); | ||
2371 | em = NULL; | ||
2372 | } | ||
2373 | } else { | ||
2374 | free_extent_map(em); | ||
2375 | em = existing; | ||
2276 | } | 2376 | } |
2277 | } | 2377 | } |
2278 | spin_unlock(&em_tree->lock); | 2378 | spin_unlock(&em_tree->lock); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e6417a573d44..0e658c1d8211 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -883,6 +883,9 @@ again: | |||
883 | spin_lock(&em_tree->lock); | 883 | spin_lock(&em_tree->lock); |
884 | em = lookup_extent_mapping(em_tree, logical, *length); | 884 | em = lookup_extent_mapping(em_tree, logical, *length); |
885 | spin_unlock(&em_tree->lock); | 885 | spin_unlock(&em_tree->lock); |
886 | if (!em) { | ||
887 | printk("unable to find logical %Lu\n", logical); | ||
888 | } | ||
886 | BUG_ON(!em); | 889 | BUG_ON(!em); |
887 | 890 | ||
888 | BUG_ON(em->start > logical || em->start + em->len < logical); | 891 | BUG_ON(em->start > logical || em->start + em->len < logical); |