aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-05-16 18:50:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-05-16 18:50:58 -0400
commitc7309e88a694acbe9e42655f02b9dd37c7931424 (patch)
treed4d6e55e33bf34f7759c8be1cc52b938aff68813 /fs/btrfs
parent518af3cb8ccaf32057db6046e241ec393d6c7b98 (diff)
parent062c19e9dd692b8a78e3532f71c290520a2ab437 (diff)
Merge branch 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "The first commit is a fix from Filipe for a very old extent buffer reuse race that triggered a BUG_ON. It hasn't come up often, I looked through old logs at FB and we hit it a handful of times over the last year. The rest are other corners he hit during testing" * 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: fix race when reusing stale extent buffers that leads to BUG_ON Btrfs: fix race between block group creation and their cache writeout Btrfs: fix panic when starting bg cache writeout after IO error Btrfs: fix crash after inode cache writeback failure
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent-tree.c31
-rw-r--r--fs/btrfs/extent_io.c19
-rw-r--r--fs/btrfs/free-space-cache.c14
-rw-r--r--fs/btrfs/ordered-data.c14
4 files changed, 68 insertions, 10 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0ec8e228b89f..7effed6f2fa6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3180 btrfs_mark_buffer_dirty(leaf); 3180 btrfs_mark_buffer_dirty(leaf);
3181fail: 3181fail:
3182 btrfs_release_path(path); 3182 btrfs_release_path(path);
3183 if (ret)
3184 btrfs_abort_transaction(trans, root, ret);
3185 return ret; 3183 return ret;
3186 3184
3187} 3185}
@@ -3487,8 +3485,30 @@ again:
3487 ret = 0; 3485 ret = 0;
3488 } 3486 }
3489 } 3487 }
3490 if (!ret) 3488 if (!ret) {
3491 ret = write_one_cache_group(trans, root, path, cache); 3489 ret = write_one_cache_group(trans, root, path, cache);
3490 /*
3491 * Our block group might still be attached to the list
3492 * of new block groups in the transaction handle of some
3493 * other task (struct btrfs_trans_handle->new_bgs). This
3494 * means its block group item isn't yet in the extent
3495 * tree. If this happens ignore the error, as we will
3496 * try again later in the critical section of the
3497 * transaction commit.
3498 */
3499 if (ret == -ENOENT) {
3500 ret = 0;
3501 spin_lock(&cur_trans->dirty_bgs_lock);
3502 if (list_empty(&cache->dirty_list)) {
3503 list_add_tail(&cache->dirty_list,
3504 &cur_trans->dirty_bgs);
3505 btrfs_get_block_group(cache);
3506 }
3507 spin_unlock(&cur_trans->dirty_bgs_lock);
3508 } else if (ret) {
3509 btrfs_abort_transaction(trans, root, ret);
3510 }
3511 }
3492 3512
3493 /* if its not on the io list, we need to put the block group */ 3513 /* if its not on the io list, we need to put the block group */
3494 if (should_put) 3514 if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3597 ret = 0; 3617 ret = 0;
3598 } 3618 }
3599 } 3619 }
3600 if (!ret) 3620 if (!ret) {
3601 ret = write_one_cache_group(trans, root, path, cache); 3621 ret = write_one_cache_group(trans, root, path, cache);
3622 if (ret)
3623 btrfs_abort_transaction(trans, root, ret);
3624 }
3602 3625
3603 /* if its not on the io list, we need to put the block group */ 3626 /* if its not on the io list, we need to put the block group */
3604 if (should_put) 3627 if (should_put)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 43af5a61ad25..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4772 start >> PAGE_CACHE_SHIFT); 4772 start >> PAGE_CACHE_SHIFT);
4773 if (eb && atomic_inc_not_zero(&eb->refs)) { 4773 if (eb && atomic_inc_not_zero(&eb->refs)) {
4774 rcu_read_unlock(); 4774 rcu_read_unlock();
4775 /*
4776 * Lock our eb's refs_lock to avoid races with
4777 * free_extent_buffer. When we get our eb it might be flagged
4778 * with EXTENT_BUFFER_STALE and another task running
4779 * free_extent_buffer might have seen that flag set,
4780 * eb->refs == 2, that the buffer isn't under IO (dirty and
4781 * writeback flags not set) and it's still in the tree (flag
4782 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
4783 * of decrementing the extent buffer's reference count twice.
4784 * So here we could race and increment the eb's reference count,
4785 * clear its stale flag, mark it as dirty and drop our reference
4786 * before the other task finishes executing free_extent_buffer,
4787 * which would later result in an attempt to free an extent
4788 * buffer that is dirty.
4789 */
4790 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4791 spin_lock(&eb->refs_lock);
4792 spin_unlock(&eb->refs_lock);
4793 }
4775 mark_extent_buffer_accessed(eb, NULL); 4794 mark_extent_buffer_accessed(eb, NULL);
4776 return eb; 4795 return eb;
4777 } 4796 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5e020d76fd07..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3467 int ret; 3467 int ret;
3468 struct btrfs_io_ctl io_ctl; 3468 struct btrfs_io_ctl io_ctl;
3469 bool release_metadata = true;
3469 3470
3470 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3471 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3471 return 0; 3472 return 0;
@@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3473 memset(&io_ctl, 0, sizeof(io_ctl)); 3474 memset(&io_ctl, 0, sizeof(io_ctl));
3474 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3475 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3475 trans, path, 0); 3476 trans, path, 0);
3476 if (!ret) 3477 if (!ret) {
3478 /*
3479 * At this point writepages() didn't error out, so our metadata
3480 * reservation is released when the writeback finishes, at
3481 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3482 * with or without an error.
3483 */
3484 release_metadata = false;
3477 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3485 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3486 }
3478 3487
3479 if (ret) { 3488 if (ret) {
3480 btrfs_delalloc_release_metadata(inode, inode->i_size); 3489 if (release_metadata)
3490 btrfs_delalloc_release_metadata(inode, inode->i_size);
3481#ifdef DEBUG 3491#ifdef DEBUG
3482 btrfs_err(root->fs_info, 3492 btrfs_err(root->fs_info,
3483 "failed to write free ino cache for root %llu", 3493 "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
723{ 723{
724 int ret = 0; 724 int ret = 0;
725 int ret_wb = 0;
725 u64 end; 726 u64 end;
726 u64 orig_end; 727 u64 orig_end;
727 struct btrfs_ordered_extent *ordered; 728 struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
741 if (ret) 742 if (ret)
742 return ret; 743 return ret;
743 744
744 ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 745 /*
745 if (ret) 746 * If we have a writeback error don't return immediately. Wait first
746 return ret; 747 * for any ordered extents that haven't completed yet. This is to make
748 * sure no one can dirty the same page ranges and call writepages()
749 * before the ordered extents complete - to avoid failures (-EEXIST)
750 * when adding the new ordered extents to the ordered tree.
751 */
752 ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
747 753
748 end = orig_end; 754 end = orig_end;
749 while (1) { 755 while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
767 break; 773 break;
768 end--; 774 end--;
769 } 775 }
770 return ret; 776 return ret_wb ? ret_wb : ret;
771} 777}
772 778
773/* 779/*