aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2014-03-28 17:07:27 -0400
committerChris Mason <clm@fb.com>2014-04-06 20:34:37 -0400
commita26e8c9f75b0bfd8cccc9e8f110737b136eb5994 (patch)
tree678d0c7e3611739b89ca851c868ab8892a70effa /fs/btrfs
parent573a075567f0174551e2fad2a3164afd2af788f2 (diff)
Btrfs: don't clear uptodate if the eb is under IO
So I have an awful exercise script that will run snapshot, balance and send/receive in parallel. This sometimes would crash spectacularly and when it came back up the fs would be completely hosed. Turns out this is because of a bad interaction of balance and send/receive. Send will hold onto its entire path for the whole send, but its blocks could get relocated out from underneath it, and because it doesn't old tree locks theres nothing to keep this from happening. So it will go to read in a slot with an old transid, and we could have re-allocated this block for something else and it could have a completely different transid. But because we think it is invalid we clear uptodate and re-read in the block. If we do this before we actually write out the new block we could write back stale data to the fs, and boom we're screwed. Now we definitely need to fix this disconnect between send and balance, but we really really need to not allow ourselves to accidently read in stale data over new data. So make sure we check if the extent buffer is not under io before clearing uptodate, this will kick back EIO to the caller instead of reading in stale data and keep us from corrupting the fs. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/disk-io.c20
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/transaction.h2
6 files changed, 27 insertions, 3 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d9698fda2d12..98fe70193397 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
329{ 329{
330 struct extent_state *cached_state = NULL; 330 struct extent_state *cached_state = NULL;
331 int ret; 331 int ret;
332 bool need_lock = (current->journal_info ==
333 (void *)BTRFS_SEND_TRANS_STUB);
332 334
333 if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 335 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
334 return 0; 336 return 0;
@@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
336 if (atomic) 338 if (atomic)
337 return -EAGAIN; 339 return -EAGAIN;
338 340
341 if (need_lock) {
342 btrfs_tree_read_lock(eb);
343 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
344 }
345
339 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 346 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
340 0, &cached_state); 347 0, &cached_state);
341 if (extent_buffer_uptodate(eb) && 348 if (extent_buffer_uptodate(eb) &&
@@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
347 "found %llu\n", 354 "found %llu\n",
348 eb->start, parent_transid, btrfs_header_generation(eb)); 355 eb->start, parent_transid, btrfs_header_generation(eb));
349 ret = 1; 356 ret = 1;
350 clear_extent_buffer_uptodate(eb); 357
358 /*
359 * Things reading via commit roots that don't have normal protection,
360 * like send, can have a really old block in cache that may point at a
361 * block that has been free'd and re-allocated. So don't clear uptodate
362 * if we find an eb that is under IO (dirty/writeback) because we could
363 * end up reading in the stale data and then writing it back out and
364 * making everybody very sad.
365 */
366 if (!extent_buffer_under_io(eb))
367 clear_extent_buffer_uptodate(eb);
351out: 368out:
352 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 369 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
353 &cached_state, GFP_NOFS); 370 &cached_state, GFP_NOFS);
371 btrfs_tree_read_unlock_blocking(eb);
354 return ret; 372 return ret;
355} 373}
356 374
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd78c84821c8..d35a3ca15fb5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4315,7 +4315,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4315 kmem_cache_free(extent_buffer_cache, eb); 4315 kmem_cache_free(extent_buffer_cache, eb);
4316} 4316}
4317 4317
4318static int extent_buffer_under_io(struct extent_buffer *eb) 4318int extent_buffer_under_io(struct extent_buffer *eb)
4319{ 4319{
4320 return (atomic_read(&eb->io_pages) || 4320 return (atomic_read(&eb->io_pages) ||
4321 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || 4321 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 58b27e5ab521..c488b45237bf 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb);
320int set_extent_buffer_uptodate(struct extent_buffer *eb); 320int set_extent_buffer_uptodate(struct extent_buffer *eb);
321int clear_extent_buffer_uptodate(struct extent_buffer *eb); 321int clear_extent_buffer_uptodate(struct extent_buffer *eb);
322int extent_buffer_uptodate(struct extent_buffer *eb); 322int extent_buffer_uptodate(struct extent_buffer *eb);
323int extent_buffer_under_io(struct extent_buffer *eb);
323int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 324int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
324 unsigned long min_len, char **map, 325 unsigned long min_len, char **map,
325 unsigned long *map_start, 326 unsigned long *map_start,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 9b6da9d55f9a..d00534b78382 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5718,7 +5718,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5718 NULL); 5718 NULL);
5719 sort_clone_roots = 1; 5719 sort_clone_roots = 1;
5720 5720
5721 current->journal_info = (void *)BTRFS_SEND_TRANS_STUB;
5721 ret = send_subvol(sctx); 5722 ret = send_subvol(sctx);
5723 current->journal_info = NULL;
5722 if (ret < 0) 5724 if (ret < 0)
5723 goto out; 5725 goto out;
5724 5726
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a04707f740d6..038177cfabbb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -375,7 +375,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
375 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) 375 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
376 return ERR_PTR(-EROFS); 376 return ERR_PTR(-EROFS);
377 377
378 if (current->journal_info) { 378 if (current->journal_info &&
379 current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
379 WARN_ON(type & TRANS_EXTWRITERS); 380 WARN_ON(type & TRANS_EXTWRITERS);
380 h = current->journal_info; 381 h = current->journal_info;
381 h->use_count++; 382 h->use_count++;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 6ac037e9f9f0..2bcba89d952e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -78,6 +78,8 @@ struct btrfs_transaction {
78#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ 78#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \
79 __TRANS_ATTACH) 79 __TRANS_ATTACH)
80 80
81#define BTRFS_SEND_TRANS_STUB 1
82
81struct btrfs_trans_handle { 83struct btrfs_trans_handle {
82 u64 transid; 84 u64 transid;
83 u64 bytes_reserved; 85 u64 bytes_reserved;