aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-11 12:27:37 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 13:31:07 -0400
commita1ed835e1ab5795f91b198d08c43e2f56848dcf3 (patch)
treeac3b370823fa76c5be7698e3663306badbbd622d
parent8b62b72b26bcd72082c4a69d179dd906bcc22200 (diff)
Btrfs: Fix extent replacment race
Data COW means that whenever we write to a file, we replace any old extent pointers with new ones. There was a window where a readpage might find the old extent pointers on disk and cache them in the extent_map tree in ram in the middle of a given write replacing them. Even though both the readpage and the write had their respective bytes in the file locked, the extent readpage inserts may cover more bytes than it had locked down. This commit closes the race by keeping the new extent pinned in the extent map tree until after the on-disk btree is properly setup with the new extent pointers. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent_map.c50
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/tree-log.c2
7 files changed, 80 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 38eeb6c49c8a..1ceab8b4d6dc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2292,7 +2292,7 @@ extern struct file_operations btrfs_file_operations;
2292int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2292int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2293 struct btrfs_root *root, struct inode *inode, 2293 struct btrfs_root *root, struct inode *inode,
2294 u64 start, u64 end, u64 locked_end, 2294 u64 start, u64 end, u64 locked_end,
2295 u64 inline_limit, u64 *hint_block); 2295 u64 inline_limit, u64 *hint_block, int drop_cache);
2296int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2296int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2297 struct btrfs_root *root, 2297 struct btrfs_root *root,
2298 struct inode *inode, u64 start, u64 end); 2298 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 72e9fa3c31f5..5bc7a0d325e7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
198 return 0; 198 return 0;
199} 199}
200 200
201int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
202{
203 int ret = 0;
204 struct extent_map *merge = NULL;
205 struct rb_node *rb;
206 struct extent_map *em;
207
208 write_lock(&tree->lock);
209 em = lookup_extent_mapping(tree, start, len);
210
211 WARN_ON(em->start != start || !em);
212
213 if (!em)
214 goto out;
215
216 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
217
218 if (em->start != 0) {
219 rb = rb_prev(&em->rb_node);
220 if (rb)
221 merge = rb_entry(rb, struct extent_map, rb_node);
222 if (rb && mergable_maps(merge, em)) {
223 em->start = merge->start;
224 em->len += merge->len;
225 em->block_len += merge->block_len;
226 em->block_start = merge->block_start;
227 merge->in_tree = 0;
228 rb_erase(&merge->rb_node, &tree->map);
229 free_extent_map(merge);
230 }
231 }
232
233 rb = rb_next(&em->rb_node);
234 if (rb)
235 merge = rb_entry(rb, struct extent_map, rb_node);
236 if (rb && mergable_maps(em, merge)) {
237 em->len += merge->len;
238 em->block_len += merge->len;
239 rb_erase(&merge->rb_node, &tree->map);
240 merge->in_tree = 0;
241 free_extent_map(merge);
242 }
243
244 free_extent_map(em);
245out:
246 write_unlock(&tree->lock);
247 return ret;
248
249}
250
201/** 251/**
202 * add_extent_mapping - add new extent map to the extent tree 252 * add_extent_mapping - add new extent map to the extent tree
203 * @tree: tree to insert new map in 253 * @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6216dfbcf9be..d3d442f4bbbd 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -59,4 +59,5 @@ struct extent_map *alloc_extent_map(gfp_t mask);
59void free_extent_map(struct extent_map *em); 59void free_extent_map(struct extent_map *em);
60int __init extent_map_init(void); 60int __init extent_map_init(void);
61void extent_map_exit(void); 61void extent_map_exit(void);
62int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
62#endif 63#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ef66c3d989b9..4123db9d5141 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -177,10 +177,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
177 } 177 }
178 flags = em->flags; 178 flags = em->flags;
179 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 179 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
180 write_unlock(&em_tree->lock);
181 if (em->start <= start && 180 if (em->start <= start &&
182 (!testend || em->start + em->len >= start + len)) { 181 (!testend || em->start + em->len >= start + len)) {
183 free_extent_map(em); 182 free_extent_map(em);
183 write_unlock(&em_tree->lock);
184 break; 184 break;
185 } 185 }
186 if (start < em->start) { 186 if (start < em->start) {
@@ -190,6 +190,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
190 start = em->start + em->len; 190 start = em->start + em->len;
191 } 191 }
192 free_extent_map(em); 192 free_extent_map(em);
193 write_unlock(&em_tree->lock);
193 continue; 194 continue;
194 } 195 }
195 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 196 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -269,7 +270,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
269noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 270noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
270 struct btrfs_root *root, struct inode *inode, 271 struct btrfs_root *root, struct inode *inode,
271 u64 start, u64 end, u64 locked_end, 272 u64 start, u64 end, u64 locked_end,
272 u64 inline_limit, u64 *hint_byte) 273 u64 inline_limit, u64 *hint_byte, int drop_cache)
273{ 274{
274 u64 extent_end = 0; 275 u64 extent_end = 0;
275 u64 search_start = start; 276 u64 search_start = start;
@@ -294,7 +295,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
294 int ret; 295 int ret;
295 296
296 inline_limit = 0; 297 inline_limit = 0;
297 btrfs_drop_extent_cache(inode, start, end - 1, 0); 298 if (drop_cache)
299 btrfs_drop_extent_cache(inode, start, end - 1, 0);
298 300
299 path = btrfs_alloc_path(); 301 path = btrfs_alloc_path();
300 if (!path) 302 if (!path)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 739a245e25d6..233fe6f26120 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -232,7 +232,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
232 } 232 }
233 233
234 ret = btrfs_drop_extents(trans, root, inode, start, 234 ret = btrfs_drop_extents(trans, root, inode, start,
235 aligned_end, aligned_end, start, &hint_byte); 235 aligned_end, aligned_end, start,
236 &hint_byte, 1);
236 BUG_ON(ret); 237 BUG_ON(ret);
237 238
238 if (isize > actual_end) 239 if (isize > actual_end)
@@ -241,7 +242,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
241 inline_len, compressed_size, 242 inline_len, compressed_size,
242 compressed_pages); 243 compressed_pages);
243 BUG_ON(ret); 244 BUG_ON(ret);
244 btrfs_drop_extent_cache(inode, start, aligned_end, 0); 245 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
245 return 0; 246 return 0;
246} 247}
247 248
@@ -1455,9 +1456,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 BUG_ON(!path); 1456 BUG_ON(!path);
1456 1457
1457 path->leave_spinning = 1; 1458 path->leave_spinning = 1;
1459
1460 /*
1461 * we may be replacing one extent in the tree with another.
1462 * The new extent is pinned in the extent map, and we don't want
1463 * to drop it from the cache until it is completely in the btree.
1464 *
1465 * So, tell btrfs_drop_extents to leave this extent in the cache.
1466 * the caller is expected to unpin it and allow it to be merged
1467 * with the others.
1468 */
1458 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1469 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1459 file_pos + num_bytes, locked_end, 1470 file_pos + num_bytes, locked_end,
1460 file_pos, &hint); 1471 file_pos, &hint, 0);
1461 BUG_ON(ret); 1472 BUG_ON(ret);
1462 1473
1463 ins.objectid = inode->i_ino; 1474 ins.objectid = inode->i_ino;
@@ -1485,7 +1496,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1485 btrfs_mark_buffer_dirty(leaf); 1496 btrfs_mark_buffer_dirty(leaf);
1486 1497
1487 inode_add_bytes(inode, num_bytes); 1498 inode_add_bytes(inode, num_bytes);
1488 btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
1489 1499
1490 ins.objectid = disk_bytenr; 1500 ins.objectid = disk_bytenr;
1491 ins.offset = disk_num_bytes; 1501 ins.offset = disk_num_bytes;
@@ -1596,6 +1606,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1596 ordered_extent->len, 1606 ordered_extent->len,
1597 compressed, 0, 0, 1607 compressed, 0, 0,
1598 BTRFS_FILE_EXTENT_REG); 1608 BTRFS_FILE_EXTENT_REG);
1609 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1610 ordered_extent->file_offset,
1611 ordered_extent->len);
1599 BUG_ON(ret); 1612 BUG_ON(ret);
1600 } 1613 }
1601 unlock_extent(io_tree, ordered_extent->file_offset, 1614 unlock_extent(io_tree, ordered_extent->file_offset,
@@ -2940,7 +2953,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2940 cur_offset, 2953 cur_offset,
2941 cur_offset + hole_size, 2954 cur_offset + hole_size,
2942 block_end, 2955 block_end,
2943 cur_offset, &hint_byte); 2956 cur_offset, &hint_byte, 1);
2944 if (err) 2957 if (err)
2945 break; 2958 break;
2946 err = btrfs_insert_file_extent(trans, root, 2959 err = btrfs_insert_file_extent(trans, root,
@@ -5086,6 +5099,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5086 0, 0, 0, 5099 0, 0, 0,
5087 BTRFS_FILE_EXTENT_PREALLOC); 5100 BTRFS_FILE_EXTENT_PREALLOC);
5088 BUG_ON(ret); 5101 BUG_ON(ret);
5102 btrfs_drop_extent_cache(inode, cur_offset,
5103 cur_offset + ins.offset -1, 0);
5089 num_bytes -= ins.offset; 5104 num_bytes -= ins.offset;
5090 cur_offset += ins.offset; 5105 cur_offset += ins.offset;
5091 alloc_hint = ins.objectid + ins.offset; 5106 alloc_hint = ins.objectid + ins.offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9f4db848db10..e2d8e90259b0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -597,9 +597,8 @@ again:
597 clear_page_dirty_for_io(page); 597 clear_page_dirty_for_io(page);
598 598
599 btrfs_set_extent_delalloc(inode, page_start, page_end); 599 btrfs_set_extent_delalloc(inode, page_start, page_end);
600
601 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
602 set_page_dirty(page); 600 set_page_dirty(page);
601 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
603 unlock_page(page); 602 unlock_page(page);
604 page_cache_release(page); 603 page_cache_release(page);
605 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 604 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -977,7 +976,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
977 976
978 /* punch hole in destination first */ 977 /* punch hole in destination first */
979 btrfs_drop_extents(trans, root, inode, off, off + len, 978 btrfs_drop_extents(trans, root, inode, off, off + len,
980 off + len, 0, &hint_byte); 979 off + len, 0, &hint_byte, 1);
981 980
982 /* clone data */ 981 /* clone data */
983 key.objectid = src->i_ino; 982 key.objectid = src->i_ino;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c502..8661a7381b39 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
534 saved_nbytes = inode_get_bytes(inode); 534 saved_nbytes = inode_get_bytes(inode);
535 /* drop any overlapping extents */ 535 /* drop any overlapping extents */
536 ret = btrfs_drop_extents(trans, root, inode, 536 ret = btrfs_drop_extents(trans, root, inode,
537 start, extent_end, extent_end, start, &alloc_hint); 537 start, extent_end, extent_end, start, &alloc_hint, 1);
538 BUG_ON(ret); 538 BUG_ON(ret);
539 539
540 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||