aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-04-17 11:29:12 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:01 -0400
commit3b951516ed703af0f6d82053937655ad69b60864 (patch)
tree616e648b8683e6bb2996c7ce6bafb8f592244698 /fs/btrfs/inode.c
parent7b859fe7cd383fa5e7e379884a748680d0af7bcc (diff)
Btrfs: Use the extent map cache to find the logical disk block during data retries
The data read retry code needs to find the logical disk block before it can resubmit new bios. But, finding this block isn't allowed to take the fs_mutex because that will deadlock with a number of different callers. This changes the retry code to use the extent map cache instead, but that requires the extent map cache to have the extent we're looking for. This is a problem because btrfs_drop_extent_cache just drops the entire extent instead of the little tiny part it is invalidating. The bulk of the code in this patch changes btrfs_drop_extent_cache to invalidate only a portion of the extent cache, and changes btrfs_get_extent to deal with the results. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c120
1 files changed, 110 insertions, 10 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5632ea760077..40f8da884090 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
122 if (alloc_hint == EXTENT_MAP_INLINE) 122 if (alloc_hint == EXTENT_MAP_INLINE)
123 goto out; 123 goto out;
124 124
125 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
126
125 while(num_bytes > 0) { 127 while(num_bytes > 0) {
126 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 128 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
127 ret = btrfs_alloc_extent(trans, root, cur_alloc_size, 129 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
@@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
140 ins.offset); 142 ins.offset);
141 inode->i_blocks += ins.offset >> 9; 143 inode->i_blocks += ins.offset >> 9;
142 btrfs_check_file(root, inode); 144 btrfs_check_file(root, inode);
145 if (num_bytes < cur_alloc_size) {
146 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
147 cur_alloc_size);
148 break;
149 }
143 num_bytes -= cur_alloc_size; 150 num_bytes -= cur_alloc_size;
144 alloc_hint = ins.objectid + ins.offset; 151 alloc_hint = ins.objectid + ins.offset;
145 start += cur_alloc_size; 152 start += cur_alloc_size;
@@ -427,6 +434,7 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
427 struct extent_map *em; 434 struct extent_map *em;
428 struct inode *inode = page->mapping->host; 435 struct inode *inode = page->mapping->host;
429 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 436 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
437 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
430 struct bio *bio; 438 struct bio *bio;
431 int num_copies; 439 int num_copies;
432 int ret; 440 int ret;
@@ -434,7 +442,6 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
434 442
435 ret = get_state_private(failure_tree, start, &private); 443 ret = get_state_private(failure_tree, start, &private);
436 if (ret) { 444 if (ret) {
437 size_t pg_offset = start - page_offset(page);
438 failrec = kmalloc(sizeof(*failrec), GFP_NOFS); 445 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
439 if (!failrec) 446 if (!failrec)
440 return -ENOMEM; 447 return -ENOMEM;
@@ -442,8 +449,13 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
442 failrec->len = end - start + 1; 449 failrec->len = end - start + 1;
443 failrec->last_mirror = 0; 450 failrec->last_mirror = 0;
444 451
445 em = btrfs_get_extent(inode, NULL, pg_offset, start, 452 spin_lock(&em_tree->lock);
446 failrec->len, 0); 453 em = lookup_extent_mapping(em_tree, start, failrec->len);
454 if (em->start > start || em->start + em->len < start) {
455 free_extent_map(em);
456 em = NULL;
457 }
458 spin_unlock(&em_tree->lock);
447 459
448 if (!em || IS_ERR(em)) { 460 if (!em || IS_ERR(em)) {
449 kfree(failrec); 461 kfree(failrec);
@@ -559,6 +571,8 @@ zeroit:
559 flush_dcache_page(page); 571 flush_dcache_page(page);
560 kunmap_atomic(kaddr, KM_IRQ0); 572 kunmap_atomic(kaddr, KM_IRQ0);
561 local_irq_restore(flags); 573 local_irq_restore(flags);
574 if (private == 0)
575 return 0;
562 return -EIO; 576 return -EIO;
563} 577}
564 578
@@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
908 int pending_del_nr = 0; 922 int pending_del_nr = 0;
909 int pending_del_slot = 0; 923 int pending_del_slot = 0;
910 int extent_type = -1; 924 int extent_type = -1;
925 u64 mask = root->sectorsize - 1;
911 926
912 btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); 927 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
913 path = btrfs_alloc_path(); 928 path = btrfs_alloc_path();
914 path->reada = -1; 929 path->reada = -1;
915 BUG_ON(!path); 930 BUG_ON(!path);
@@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1212 hole_start, 0, 0, 1227 hole_start, 0, 0,
1213 hole_size); 1228 hole_size);
1214 btrfs_drop_extent_cache(inode, hole_start, 1229 btrfs_drop_extent_cache(inode, hole_start,
1215 hole_size - 1); 1230 (u64)-1);
1216 btrfs_check_file(root, inode); 1231 btrfs_check_file(root, inode);
1217 } 1232 }
1218 btrfs_end_transaction(trans, root); 1233 btrfs_end_transaction(trans, root);
@@ -2083,6 +2098,68 @@ out_unlock:
2083 return err; 2098 return err;
2084} 2099}
2085 2100
2101static int merge_extent_mapping(struct extent_map_tree *em_tree,
2102 struct extent_map *existing,
2103 struct extent_map *em)
2104{
2105 u64 start_diff;
2106 u64 new_end;
2107 int ret = 0;
2108 int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2109
2110 if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2111 goto invalid;
2112
2113 if (!real_blocks && em->block_start != existing->block_start)
2114 goto invalid;
2115
2116 new_end = max(existing->start + existing->len, em->start + em->len);
2117
2118 if (existing->start >= em->start) {
2119 if (em->start + em->len < existing->start)
2120 goto invalid;
2121
2122 start_diff = existing->start - em->start;
2123 if (real_blocks && em->block_start + start_diff !=
2124 existing->block_start)
2125 goto invalid;
2126
2127 em->len = new_end - em->start;
2128
2129 remove_extent_mapping(em_tree, existing);
2130 /* free for the tree */
2131 free_extent_map(existing);
2132 ret = add_extent_mapping(em_tree, em);
2133
2134 } else if (em->start > existing->start) {
2135
2136 if (existing->start + existing->len < em->start)
2137 goto invalid;
2138
2139 start_diff = em->start - existing->start;
2140 if (real_blocks && existing->block_start + start_diff !=
2141 em->block_start)
2142 goto invalid;
2143
2144 remove_extent_mapping(em_tree, existing);
2145 em->block_start = existing->block_start;
2146 em->start = existing->start;
2147 em->len = new_end - existing->start;
2148 free_extent_map(existing);
2149
2150 ret = add_extent_mapping(em_tree, em);
2151 } else {
2152 goto invalid;
2153 }
2154 return ret;
2155
2156invalid:
2157 printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2158 existing->start, existing->len, existing->block_start,
2159 em->start, em->len, em->block_start);
2160 return -EIO;
2161}
2162
2086struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2163struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2087 size_t pg_offset, u64 start, u64 len, 2164 size_t pg_offset, u64 start, u64 len,
2088 int create) 2165 int create)
@@ -2267,12 +2344,35 @@ insert:
2267 err = 0; 2344 err = 0;
2268 spin_lock(&em_tree->lock); 2345 spin_lock(&em_tree->lock);
2269 ret = add_extent_mapping(em_tree, em); 2346 ret = add_extent_mapping(em_tree, em);
2347
2348 /* it is possible that someone inserted the extent into the tree
2349 * while we had the lock dropped. It is also possible that
2350 * an overlapping map exists in the tree
2351 */
2270 if (ret == -EEXIST) { 2352 if (ret == -EEXIST) {
2271 free_extent_map(em); 2353 struct extent_map *existing;
2272 em = lookup_extent_mapping(em_tree, start, len); 2354 existing = lookup_extent_mapping(em_tree, start, len);
2273 if (!em) { 2355 if (!existing) {
2274 err = -EIO; 2356 existing = lookup_extent_mapping(em_tree, em->start,
2275 printk("failing to insert %Lu %Lu\n", start, len); 2357 em->len);
2358 if (existing) {
2359 err = merge_extent_mapping(em_tree, existing,
2360 em);
2361 free_extent_map(existing);
2362 if (err) {
2363 free_extent_map(em);
2364 em = NULL;
2365 }
2366 } else {
2367 err = -EIO;
2368 printk("failing to insert %Lu %Lu\n",
2369 start, len);
2370 free_extent_map(em);
2371 em = NULL;
2372 }
2373 } else {
2374 free_extent_map(em);
2375 em = existing;
2276 } 2376 }
2277 } 2377 }
2278 spin_unlock(&em_tree->lock); 2378 spin_unlock(&em_tree->lock);