aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/extent-tree.c121
-rw-r--r--fs/btrfs/extent_io.c73
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/volumes.c15
8 files changed, 187 insertions, 88 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index cde698a07d21..a2ae42720a6a 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1802 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1802 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1803 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1803 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1804 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1804 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1805 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1806
1805 inode->i_version = btrfs_stack_inode_sequence(inode_item); 1807 inode->i_version = btrfs_stack_inode_sequence(inode_item);
1806 inode->i_rdev = 0; 1808 inode->i_rdev = 0;
1807 *rdev = btrfs_stack_inode_rdev(inode_item); 1809 *rdev = btrfs_stack_inode_rdev(inode_item);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1eef4ee01d1a..7effed6f2fa6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3178,10 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3180 btrfs_mark_buffer_dirty(leaf); 3180 btrfs_mark_buffer_dirty(leaf);
3181 btrfs_release_path(path);
3182fail: 3181fail:
3183 if (ret) 3182 btrfs_release_path(path);
3184 btrfs_abort_transaction(trans, root, ret);
3185 return ret; 3183 return ret;
3186 3184
3187} 3185}
@@ -3305,8 +3303,7 @@ again:
3305 3303
3306 spin_lock(&block_group->lock); 3304 spin_lock(&block_group->lock);
3307 if (block_group->cached != BTRFS_CACHE_FINISHED || 3305 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3308 !btrfs_test_opt(root, SPACE_CACHE) || 3306 !btrfs_test_opt(root, SPACE_CACHE)) {
3309 block_group->delalloc_bytes) {
3310 /* 3307 /*
3311 * don't bother trying to write stuff out _if_ 3308 * don't bother trying to write stuff out _if_
3312 * a) we're not cached, 3309 * a) we're not cached,
@@ -3408,17 +3405,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3408 int loops = 0; 3405 int loops = 0;
3409 3406
3410 spin_lock(&cur_trans->dirty_bgs_lock); 3407 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) { 3408 if (list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3409 spin_unlock(&cur_trans->dirty_bgs_lock);
3410 return 0;
3413 } 3411 }
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3414 spin_unlock(&cur_trans->dirty_bgs_lock); 3413 spin_unlock(&cur_trans->dirty_bgs_lock);
3415 3414
3416again: 3415again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3419 return 0;
3420 }
3421
3422 /* 3416 /*
3423 * make sure all the block groups on our dirty list actually 3417 * make sure all the block groups on our dirty list actually
3424 * exist 3418 * exist
@@ -3431,18 +3425,16 @@ again:
3431 return -ENOMEM; 3425 return -ENOMEM;
3432 } 3426 }
3433 3427
3428 /*
3429 * cache_write_mutex is here only to save us from balance or automatic
3430 * removal of empty block groups deleting this block group while we are
3431 * writing out the cache
3432 */
3433 mutex_lock(&trans->transaction->cache_write_mutex);
3434 while (!list_empty(&dirty)) { 3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty, 3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache, 3436 struct btrfs_block_group_cache,
3437 dirty_list); 3437 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /* 3438 /*
3447 * this can happen if something re-dirties a block 3439 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to 3440 * group that is already under IO. Just wait for it to
@@ -3493,9 +3485,30 @@ again:
3493 ret = 0; 3485 ret = 0;
3494 } 3486 }
3495 } 3487 }
3496 if (!ret) 3488 if (!ret) {
3497 ret = write_one_cache_group(trans, root, path, cache); 3489 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex); 3490 /*
3491 * Our block group might still be attached to the list
3492 * of new block groups in the transaction handle of some
3493 * other task (struct btrfs_trans_handle->new_bgs). This
3494 * means its block group item isn't yet in the extent
3495 * tree. If this happens ignore the error, as we will
3496 * try again later in the critical section of the
3497 * transaction commit.
3498 */
3499 if (ret == -ENOENT) {
3500 ret = 0;
3501 spin_lock(&cur_trans->dirty_bgs_lock);
3502 if (list_empty(&cache->dirty_list)) {
3503 list_add_tail(&cache->dirty_list,
3504 &cur_trans->dirty_bgs);
3505 btrfs_get_block_group(cache);
3506 }
3507 spin_unlock(&cur_trans->dirty_bgs_lock);
3508 } else if (ret) {
3509 btrfs_abort_transaction(trans, root, ret);
3510 }
3511 }
3499 3512
3500 /* if its not on the io list, we need to put the block group */ 3513 /* if its not on the io list, we need to put the block group */
3501 if (should_put) 3514 if (should_put)
@@ -3503,7 +3516,16 @@ again:
3503 3516
3504 if (ret) 3517 if (ret)
3505 break; 3518 break;
3519
3520 /*
3521 * Avoid blocking other tasks for too long. It might even save
3522 * us from writing caches for block groups that are going to be
3523 * removed.
3524 */
3525 mutex_unlock(&trans->transaction->cache_write_mutex);
3526 mutex_lock(&trans->transaction->cache_write_mutex);
3506 } 3527 }
3528 mutex_unlock(&trans->transaction->cache_write_mutex);
3507 3529
3508 /* 3530 /*
3509 * go through delayed refs for all the stuff we've just kicked off 3531 * go through delayed refs for all the stuff we've just kicked off
@@ -3514,8 +3536,15 @@ again:
3514 loops++; 3536 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock); 3537 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty); 3538 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3539 /*
3540 * dirty_bgs_lock protects us from concurrent block group
3541 * deletes too (not just cache_write_mutex).
3542 */
3543 if (!list_empty(&dirty)) {
3544 spin_unlock(&cur_trans->dirty_bgs_lock);
3545 goto again;
3546 }
3517 spin_unlock(&cur_trans->dirty_bgs_lock); 3547 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 } 3548 }
3520 3549
3521 btrfs_free_path(path); 3550 btrfs_free_path(path);
@@ -3588,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3588 ret = 0; 3617 ret = 0;
3589 } 3618 }
3590 } 3619 }
3591 if (!ret) 3620 if (!ret) {
3592 ret = write_one_cache_group(trans, root, path, cache); 3621 ret = write_one_cache_group(trans, root, path, cache);
3622 if (ret)
3623 btrfs_abort_transaction(trans, root, ret);
3624 }
3593 3625
3594 /* if its not on the io list, we need to put the block group */ 3626 /* if its not on the io list, we need to put the block group */
3595 if (should_put) 3627 if (should_put)
@@ -7537,7 +7569,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7537 * returns the key for the extent through ins, and a tree buffer for 7569 * returns the key for the extent through ins, and a tree buffer for
7538 * the first block of the extent through buf. 7570 * the first block of the extent through buf.
7539 * 7571 *
7540 * returns the tree buffer or NULL. 7572 * returns the tree buffer or an ERR_PTR on error.
7541 */ 7573 */
7542struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, 7574struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7543 struct btrfs_root *root, 7575 struct btrfs_root *root,
@@ -7548,6 +7580,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7548 struct btrfs_key ins; 7580 struct btrfs_key ins;
7549 struct btrfs_block_rsv *block_rsv; 7581 struct btrfs_block_rsv *block_rsv;
7550 struct extent_buffer *buf; 7582 struct extent_buffer *buf;
7583 struct btrfs_delayed_extent_op *extent_op;
7551 u64 flags = 0; 7584 u64 flags = 0;
7552 int ret; 7585 int ret;
7553 u32 blocksize = root->nodesize; 7586 u32 blocksize = root->nodesize;
@@ -7568,13 +7601,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7568 7601
7569 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7602 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7570 empty_size, hint, &ins, 0, 0); 7603 empty_size, hint, &ins, 0, 0);
7571 if (ret) { 7604 if (ret)
7572 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7605 goto out_unuse;
7573 return ERR_PTR(ret);
7574 }
7575 7606
7576 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); 7607 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
7577 BUG_ON(IS_ERR(buf)); /* -ENOMEM */ 7608 if (IS_ERR(buf)) {
7609 ret = PTR_ERR(buf);
7610 goto out_free_reserved;
7611 }
7578 7612
7579 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 7613 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7580 if (parent == 0) 7614 if (parent == 0)
@@ -7584,9 +7618,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7584 BUG_ON(parent > 0); 7618 BUG_ON(parent > 0);
7585 7619
7586 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 7620 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
7587 struct btrfs_delayed_extent_op *extent_op;
7588 extent_op = btrfs_alloc_delayed_extent_op(); 7621 extent_op = btrfs_alloc_delayed_extent_op();
7589 BUG_ON(!extent_op); /* -ENOMEM */ 7622 if (!extent_op) {
7623 ret = -ENOMEM;
7624 goto out_free_buf;
7625 }
7590 if (key) 7626 if (key)
7591 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 7627 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7592 else 7628 else
@@ -7601,13 +7637,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7601 extent_op->level = level; 7637 extent_op->level = level;
7602 7638
7603 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, 7639 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
7604 ins.objectid, 7640 ins.objectid, ins.offset,
7605 ins.offset, parent, root_objectid, 7641 parent, root_objectid, level,
7606 level, BTRFS_ADD_DELAYED_EXTENT, 7642 BTRFS_ADD_DELAYED_EXTENT,
7607 extent_op, 0); 7643 extent_op, 0);
7608 BUG_ON(ret); /* -ENOMEM */ 7644 if (ret)
7645 goto out_free_delayed;
7609 } 7646 }
7610 return buf; 7647 return buf;
7648
7649out_free_delayed:
7650 btrfs_free_delayed_extent_op(extent_op);
7651out_free_buf:
7652 free_extent_buffer(buf);
7653out_free_reserved:
7654 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
7655out_unuse:
7656 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7657 return ERR_PTR(ret);
7611} 7658}
7612 7659
7613struct walk_control { 7660struct walk_control {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 782f3bc4651d..c32d226bfecc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4560 do { 4560 do {
4561 index--; 4561 index--;
4562 page = eb->pages[index]; 4562 page = eb->pages[index];
4563 if (page && mapped) { 4563 if (!page)
4564 continue;
4565 if (mapped)
4564 spin_lock(&page->mapping->private_lock); 4566 spin_lock(&page->mapping->private_lock);
4567 /*
4568 * We do this since we'll remove the pages after we've
4569 * removed the eb from the radix tree, so we could race
4570 * and have this page now attached to the new eb. So
4571 * only clear page_private if it's still connected to
4572 * this eb.
4573 */
4574 if (PagePrivate(page) &&
4575 page->private == (unsigned long)eb) {
4576 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4577 BUG_ON(PageDirty(page));
4578 BUG_ON(PageWriteback(page));
4565 /* 4579 /*
4566 * We do this since we'll remove the pages after we've 4580 * We need to make sure we haven't be attached
4567 * removed the eb from the radix tree, so we could race 4581 * to a new eb.
4568 * and have this page now attached to the new eb. So
4569 * only clear page_private if it's still connected to
4570 * this eb.
4571 */ 4582 */
4572 if (PagePrivate(page) && 4583 ClearPagePrivate(page);
4573 page->private == (unsigned long)eb) { 4584 set_page_private(page, 0);
4574 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 4585 /* One for the page private */
4575 BUG_ON(PageDirty(page));
4576 BUG_ON(PageWriteback(page));
4577 /*
4578 * We need to make sure we haven't be attached
4579 * to a new eb.
4580 */
4581 ClearPagePrivate(page);
4582 set_page_private(page, 0);
4583 /* One for the page private */
4584 page_cache_release(page);
4585 }
4586 spin_unlock(&page->mapping->private_lock);
4587
4588 }
4589 if (page) {
4590 /* One for when we alloced the page */
4591 page_cache_release(page); 4586 page_cache_release(page);
4592 } 4587 }
4588
4589 if (mapped)
4590 spin_unlock(&page->mapping->private_lock);
4591
4592 /* One for when we alloced the page */
4593 page_cache_release(page);
4593 } while (index != 0); 4594 } while (index != 0);
4594} 4595}
4595 4596
@@ -4771,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4771 start >> PAGE_CACHE_SHIFT); 4772 start >> PAGE_CACHE_SHIFT);
4772 if (eb && atomic_inc_not_zero(&eb->refs)) { 4773 if (eb && atomic_inc_not_zero(&eb->refs)) {
4773 rcu_read_unlock(); 4774 rcu_read_unlock();
4775 /*
4776 * Lock our eb's refs_lock to avoid races with
4777 * free_extent_buffer. When we get our eb it might be flagged
4778 * with EXTENT_BUFFER_STALE and another task running
4779 * free_extent_buffer might have seen that flag set,
4780 * eb->refs == 2, that the buffer isn't under IO (dirty and
4781 * writeback flags not set) and it's still in the tree (flag
4782 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
4783 * of decrementing the extent buffer's reference count twice.
4784 * So here we could race and increment the eb's reference count,
4785 * clear its stale flag, mark it as dirty and drop our reference
4786 * before the other task finishes executing free_extent_buffer,
4787 * which would later result in an attempt to free an extent
4788 * buffer that is dirty.
4789 */
4790 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4791 spin_lock(&eb->refs_lock);
4792 spin_unlock(&eb->refs_lock);
4793 }
4774 mark_extent_buffer_accessed(eb, NULL); 4794 mark_extent_buffer_accessed(eb, NULL);
4775 return eb; 4795 return eb;
4776 } 4796 }
@@ -4870,6 +4890,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4870 mark_extent_buffer_accessed(exists, p); 4890 mark_extent_buffer_accessed(exists, p);
4871 goto free_eb; 4891 goto free_eb;
4872 } 4892 }
4893 exists = NULL;
4873 4894
4874 /* 4895 /*
4875 * Do this so attach doesn't complain and we need to 4896 * Do this so attach doesn't complain and we need to
@@ -4933,12 +4954,12 @@ again:
4933 return eb; 4954 return eb;
4934 4955
4935free_eb: 4956free_eb:
4957 WARN_ON(!atomic_dec_and_test(&eb->refs));
4936 for (i = 0; i < num_pages; i++) { 4958 for (i = 0; i < num_pages; i++) {
4937 if (eb->pages[i]) 4959 if (eb->pages[i])
4938 unlock_page(eb->pages[i]); 4960 unlock_page(eb->pages[i]);
4939 } 4961 }
4940 4962
4941 WARN_ON(!atomic_dec_and_test(&eb->refs));
4942 btrfs_release_extent_buffer(eb); 4963 btrfs_release_extent_buffer(eb);
4943 return exists; 4964 return exists;
4944} 4965}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81fa75a8e1f3..9dbe5b548fa6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & 88 mapping_gfp_mask(inode->i_mapping) &
89 ~(GFP_NOFS & ~__GFP_HIGHMEM)); 89 ~(__GFP_FS | __GFP_HIGHMEM));
90 90
91 return inode; 91 return inode;
92} 92}
@@ -1218,7 +1218,7 @@ out:
1218 * 1218 *
1219 * This function writes out a free space cache struct to disk for quick recovery 1219 * This function writes out a free space cache struct to disk for quick recovery
1220 * on mount. This will return 0 if it was successfull in writing the cache out, 1220 * on mount. This will return 0 if it was successfull in writing the cache out,
1221 * and -1 if it was not. 1221 * or an errno if it was not.
1222 */ 1222 */
1223static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1223static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1224 struct btrfs_free_space_ctl *ctl, 1224 struct btrfs_free_space_ctl *ctl,
@@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1235 int must_iput = 0; 1235 int must_iput = 0;
1236 1236
1237 if (!i_size_read(inode)) 1237 if (!i_size_read(inode))
1238 return -1; 1238 return -EIO;
1239 1239
1240 WARN_ON(io_ctl->pages); 1240 WARN_ON(io_ctl->pages);
1241 ret = io_ctl_init(io_ctl, inode, root, 1); 1241 ret = io_ctl_init(io_ctl, inode, root, 1);
1242 if (ret) 1242 if (ret)
1243 return -1; 1243 return ret;
1244 1244
1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1246 down_write(&block_group->data_rwsem); 1246 down_write(&block_group->data_rwsem);
@@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1258 } 1258 }
1259 1259
1260 /* Lock all pages first so we can lock the extent safely. */ 1260 /* Lock all pages first so we can lock the extent safely. */
1261 io_ctl_prepare_pages(io_ctl, inode, 0); 1261 ret = io_ctl_prepare_pages(io_ctl, inode, 0);
1262 if (ret)
1263 goto out;
1262 1264
1263 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1265 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1264 0, &cached_state); 1266 0, &cached_state);
@@ -3464,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3464 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3465 int ret; 3467 int ret;
3466 struct btrfs_io_ctl io_ctl; 3468 struct btrfs_io_ctl io_ctl;
3469 bool release_metadata = true;
3467 3470
3468 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3471 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3469 return 0; 3472 return 0;
@@ -3471,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3471 memset(&io_ctl, 0, sizeof(io_ctl)); 3474 memset(&io_ctl, 0, sizeof(io_ctl));
3472 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3475 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3473 trans, path, 0); 3476 trans, path, 0);
3474 if (!ret) 3477 if (!ret) {
3478 /*
3479 * At this point writepages() didn't error out, so our metadata
3480 * reservation is released when the writeback finishes, at
3481 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3482 * with or without an error.
3483 */
3484 release_metadata = false;
3475 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3485 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3486 }
3476 3487
3477 if (ret) { 3488 if (ret) {
3478 btrfs_delalloc_release_metadata(inode, inode->i_size); 3489 if (release_metadata)
3490 btrfs_delalloc_release_metadata(inode, inode->i_size);
3479#ifdef DEBUG 3491#ifdef DEBUG
3480 btrfs_err(root->fs_info, 3492 btrfs_err(root->fs_info,
3481 "failed to write free ino cache for root %llu", 3493 "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada4d24ed11b..8bb013672aee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3632,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode)
3632 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 3632 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3633 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); 3633 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3634 3634
3635 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3636 inode->i_generation = BTRFS_I(inode)->generation;
3637 inode->i_rdev = 0;
3638 rdev = btrfs_inode_rdev(leaf, inode_item);
3639
3640 BTRFS_I(inode)->index_cnt = (u64)-1;
3641 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3642
3643cache_index:
3635 /* 3644 /*
3636 * If we were modified in the current generation and evicted from memory 3645 * If we were modified in the current generation and evicted from memory
3637 * and then re-read we need to do a full sync since we don't have any 3646 * and then re-read we need to do a full sync since we don't have any
3638 * idea about which extents were modified before we were evicted from 3647 * idea about which extents were modified before we were evicted from
3639 * cache. 3648 * cache.
3649 *
3650 * This is required for both inode re-read from disk and delayed inode
3651 * in delayed_nodes_tree.
3640 */ 3652 */
3641 if (BTRFS_I(inode)->last_trans == root->fs_info->generation) 3653 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
3642 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3654 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3643 &BTRFS_I(inode)->runtime_flags); 3655 &BTRFS_I(inode)->runtime_flags);
3644 3656
3645 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3646 inode->i_generation = BTRFS_I(inode)->generation;
3647 inode->i_rdev = 0;
3648 rdev = btrfs_inode_rdev(leaf, inode_item);
3649
3650 BTRFS_I(inode)->index_cnt = (u64)-1;
3651 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3652
3653cache_index:
3654 path->slots[0]++; 3657 path->slots[0]++;
3655 if (inode->i_nlink != 1 || 3658 if (inode->i_nlink != 1 ||
3656 path->slots[0] >= btrfs_header_nritems(leaf)) 3659 path->slots[0] >= btrfs_header_nritems(leaf))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b05653f182c2..1c22c6518504 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2410 "Attempt to delete subvolume %llu during send", 2410 "Attempt to delete subvolume %llu during send",
2411 dest->root_key.objectid); 2411 dest->root_key.objectid);
2412 err = -EPERM; 2412 err = -EPERM;
2413 goto out_dput; 2413 goto out_unlock_inode;
2414 } 2414 }
2415 2415
2416 d_invalidate(dentry); 2416 d_invalidate(dentry);
@@ -2505,6 +2505,7 @@ out_up_write:
2505 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); 2505 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
2506 spin_unlock(&dest->root_item_lock); 2506 spin_unlock(&dest->root_item_lock);
2507 } 2507 }
2508out_unlock_inode:
2508 mutex_unlock(&inode->i_mutex); 2509 mutex_unlock(&inode->i_mutex);
2509 if (!err) { 2510 if (!err) {
2510 shrink_dcache_sb(root->fs_info->sb); 2511 shrink_dcache_sb(root->fs_info->sb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 157cc54fc634..760c4a5e096b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 722int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
723{ 723{
724 int ret = 0; 724 int ret = 0;
725 int ret_wb = 0;
725 u64 end; 726 u64 end;
726 u64 orig_end; 727 u64 orig_end;
727 struct btrfs_ordered_extent *ordered; 728 struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
741 if (ret) 742 if (ret)
742 return ret; 743 return ret;
743 744
744 ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 745 /*
745 if (ret) 746 * If we have a writeback error don't return immediately. Wait first
746 return ret; 747 * for any ordered extents that haven't completed yet. This is to make
748 * sure no one can dirty the same page ranges and call writepages()
749 * before the ordered extents complete - to avoid failures (-EEXIST)
750 * when adding the new ordered extents to the ordered tree.
751 */
752 ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
747 753
748 end = orig_end; 754 end = orig_end;
749 while (1) { 755 while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
767 break; 773 break;
768 end--; 774 end--;
769 } 775 }
770 return ret; 776 return ret_wb ? ret_wb : ret;
771} 777}
772 778
773/* 779/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bcd2a007517..96aebf3bcd5b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
1058 struct extent_map *em; 1058 struct extent_map *em;
1059 struct list_head *search_list = &trans->transaction->pending_chunks; 1059 struct list_head *search_list = &trans->transaction->pending_chunks;
1060 int ret = 0; 1060 int ret = 0;
1061 u64 physical_start = *start;
1061 1062
1062again: 1063again:
1063 list_for_each_entry(em, search_list, list) { 1064 list_for_each_entry(em, search_list, list) {
@@ -1068,9 +1069,9 @@ again:
1068 for (i = 0; i < map->num_stripes; i++) { 1069 for (i = 0; i < map->num_stripes; i++) {
1069 if (map->stripes[i].dev != device) 1070 if (map->stripes[i].dev != device)
1070 continue; 1071 continue;
1071 if (map->stripes[i].physical >= *start + len || 1072 if (map->stripes[i].physical >= physical_start + len ||
1072 map->stripes[i].physical + em->orig_block_len <= 1073 map->stripes[i].physical + em->orig_block_len <=
1073 *start) 1074 physical_start)
1074 continue; 1075 continue;
1075 *start = map->stripes[i].physical + 1076 *start = map->stripes[i].physical +
1076 em->orig_block_len; 1077 em->orig_block_len;
@@ -1193,8 +1194,14 @@ again:
1193 */ 1194 */
1194 if (contains_pending_extent(trans, device, 1195 if (contains_pending_extent(trans, device,
1195 &search_start, 1196 &search_start,
1196 hole_size)) 1197 hole_size)) {
1197 hole_size = 0; 1198 if (key.offset >= search_start) {
1199 hole_size = key.offset - search_start;
1200 } else {
1201 WARN_ON_ONCE(1);
1202 hole_size = 0;
1203 }
1204 }
1198 1205
1199 if (hole_size > max_hole_size) { 1206 if (hole_size > max_hole_size) {
1200 max_hole_start = search_start; 1207 max_hole_start = search_start;