diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 121 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 73 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 26 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 21 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 14 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 15 |
8 files changed, 187 insertions, 88 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index cde698a07d21..a2ae42720a6a 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -1802,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
| 1802 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1802 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
| 1803 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); | 1803 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); |
| 1804 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); | 1804 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); |
| 1805 | BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); | ||
| 1806 | |||
| 1805 | inode->i_version = btrfs_stack_inode_sequence(inode_item); | 1807 | inode->i_version = btrfs_stack_inode_sequence(inode_item); |
| 1806 | inode->i_rdev = 0; | 1808 | inode->i_rdev = 0; |
| 1807 | *rdev = btrfs_stack_inode_rdev(inode_item); | 1809 | *rdev = btrfs_stack_inode_rdev(inode_item); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1eef4ee01d1a..7effed6f2fa6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -3178,10 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
| 3178 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); | 3178 | bi = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 3179 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); | 3179 | write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); |
| 3180 | btrfs_mark_buffer_dirty(leaf); | 3180 | btrfs_mark_buffer_dirty(leaf); |
| 3181 | btrfs_release_path(path); | ||
| 3182 | fail: | 3181 | fail: |
| 3183 | if (ret) | 3182 | btrfs_release_path(path); |
| 3184 | btrfs_abort_transaction(trans, root, ret); | ||
| 3185 | return ret; | 3183 | return ret; |
| 3186 | 3184 | ||
| 3187 | } | 3185 | } |
| @@ -3305,8 +3303,7 @@ again: | |||
| 3305 | 3303 | ||
| 3306 | spin_lock(&block_group->lock); | 3304 | spin_lock(&block_group->lock); |
| 3307 | if (block_group->cached != BTRFS_CACHE_FINISHED || | 3305 | if (block_group->cached != BTRFS_CACHE_FINISHED || |
| 3308 | !btrfs_test_opt(root, SPACE_CACHE) || | 3306 | !btrfs_test_opt(root, SPACE_CACHE)) { |
| 3309 | block_group->delalloc_bytes) { | ||
| 3310 | /* | 3307 | /* |
| 3311 | * don't bother trying to write stuff out _if_ | 3308 | * don't bother trying to write stuff out _if_ |
| 3312 | * a) we're not cached, | 3309 | * a) we're not cached, |
| @@ -3408,17 +3405,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 3408 | int loops = 0; | 3405 | int loops = 0; |
| 3409 | 3406 | ||
| 3410 | spin_lock(&cur_trans->dirty_bgs_lock); | 3407 | spin_lock(&cur_trans->dirty_bgs_lock); |
| 3411 | if (!list_empty(&cur_trans->dirty_bgs)) { | 3408 | if (list_empty(&cur_trans->dirty_bgs)) { |
| 3412 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | 3409 | spin_unlock(&cur_trans->dirty_bgs_lock); |
| 3410 | return 0; | ||
| 3413 | } | 3411 | } |
| 3412 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | ||
| 3414 | spin_unlock(&cur_trans->dirty_bgs_lock); | 3413 | spin_unlock(&cur_trans->dirty_bgs_lock); |
| 3415 | 3414 | ||
| 3416 | again: | 3415 | again: |
| 3417 | if (list_empty(&dirty)) { | ||
| 3418 | btrfs_free_path(path); | ||
| 3419 | return 0; | ||
| 3420 | } | ||
| 3421 | |||
| 3422 | /* | 3416 | /* |
| 3423 | * make sure all the block groups on our dirty list actually | 3417 | * make sure all the block groups on our dirty list actually |
| 3424 | * exist | 3418 | * exist |
| @@ -3431,18 +3425,16 @@ again: | |||
| 3431 | return -ENOMEM; | 3425 | return -ENOMEM; |
| 3432 | } | 3426 | } |
| 3433 | 3427 | ||
| 3428 | /* | ||
| 3429 | * cache_write_mutex is here only to save us from balance or automatic | ||
| 3430 | * removal of empty block groups deleting this block group while we are | ||
| 3431 | * writing out the cache | ||
| 3432 | */ | ||
| 3433 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
| 3434 | while (!list_empty(&dirty)) { | 3434 | while (!list_empty(&dirty)) { |
| 3435 | cache = list_first_entry(&dirty, | 3435 | cache = list_first_entry(&dirty, |
| 3436 | struct btrfs_block_group_cache, | 3436 | struct btrfs_block_group_cache, |
| 3437 | dirty_list); | 3437 | dirty_list); |
| 3438 | |||
| 3439 | /* | ||
| 3440 | * cache_write_mutex is here only to save us from balance | ||
| 3441 | * deleting this block group while we are writing out the | ||
| 3442 | * cache | ||
| 3443 | */ | ||
| 3444 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
| 3445 | |||
| 3446 | /* | 3438 | /* |
| 3447 | * this can happen if something re-dirties a block | 3439 | * this can happen if something re-dirties a block |
| 3448 | * group that is already under IO. Just wait for it to | 3440 | * group that is already under IO. Just wait for it to |
| @@ -3493,9 +3485,30 @@ again: | |||
| 3493 | ret = 0; | 3485 | ret = 0; |
| 3494 | } | 3486 | } |
| 3495 | } | 3487 | } |
| 3496 | if (!ret) | 3488 | if (!ret) { |
| 3497 | ret = write_one_cache_group(trans, root, path, cache); | 3489 | ret = write_one_cache_group(trans, root, path, cache); |
| 3498 | mutex_unlock(&trans->transaction->cache_write_mutex); | 3490 | /* |
| 3491 | * Our block group might still be attached to the list | ||
| 3492 | * of new block groups in the transaction handle of some | ||
| 3493 | * other task (struct btrfs_trans_handle->new_bgs). This | ||
| 3494 | * means its block group item isn't yet in the extent | ||
| 3495 | * tree. If this happens ignore the error, as we will | ||
| 3496 | * try again later in the critical section of the | ||
| 3497 | * transaction commit. | ||
| 3498 | */ | ||
| 3499 | if (ret == -ENOENT) { | ||
| 3500 | ret = 0; | ||
| 3501 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
| 3502 | if (list_empty(&cache->dirty_list)) { | ||
| 3503 | list_add_tail(&cache->dirty_list, | ||
| 3504 | &cur_trans->dirty_bgs); | ||
| 3505 | btrfs_get_block_group(cache); | ||
| 3506 | } | ||
| 3507 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
| 3508 | } else if (ret) { | ||
| 3509 | btrfs_abort_transaction(trans, root, ret); | ||
| 3510 | } | ||
| 3511 | } | ||
| 3499 | 3512 | ||
| 3500 | /* if its not on the io list, we need to put the block group */ | 3513 | /* if its not on the io list, we need to put the block group */ |
| 3501 | if (should_put) | 3514 | if (should_put) |
| @@ -3503,7 +3516,16 @@ again: | |||
| 3503 | 3516 | ||
| 3504 | if (ret) | 3517 | if (ret) |
| 3505 | break; | 3518 | break; |
| 3519 | |||
| 3520 | /* | ||
| 3521 | * Avoid blocking other tasks for too long. It might even save | ||
| 3522 | * us from writing caches for block groups that are going to be | ||
| 3523 | * removed. | ||
| 3524 | */ | ||
| 3525 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
| 3526 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
| 3506 | } | 3527 | } |
| 3528 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
| 3507 | 3529 | ||
| 3508 | /* | 3530 | /* |
| 3509 | * go through delayed refs for all the stuff we've just kicked off | 3531 | * go through delayed refs for all the stuff we've just kicked off |
| @@ -3514,8 +3536,15 @@ again: | |||
| 3514 | loops++; | 3536 | loops++; |
| 3515 | spin_lock(&cur_trans->dirty_bgs_lock); | 3537 | spin_lock(&cur_trans->dirty_bgs_lock); |
| 3516 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | 3538 | list_splice_init(&cur_trans->dirty_bgs, &dirty); |
| 3539 | /* | ||
| 3540 | * dirty_bgs_lock protects us from concurrent block group | ||
| 3541 | * deletes too (not just cache_write_mutex). | ||
| 3542 | */ | ||
| 3543 | if (!list_empty(&dirty)) { | ||
| 3544 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
| 3545 | goto again; | ||
| 3546 | } | ||
| 3517 | spin_unlock(&cur_trans->dirty_bgs_lock); | 3547 | spin_unlock(&cur_trans->dirty_bgs_lock); |
| 3518 | goto again; | ||
| 3519 | } | 3548 | } |
| 3520 | 3549 | ||
| 3521 | btrfs_free_path(path); | 3550 | btrfs_free_path(path); |
| @@ -3588,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 3588 | ret = 0; | 3617 | ret = 0; |
| 3589 | } | 3618 | } |
| 3590 | } | 3619 | } |
| 3591 | if (!ret) | 3620 | if (!ret) { |
| 3592 | ret = write_one_cache_group(trans, root, path, cache); | 3621 | ret = write_one_cache_group(trans, root, path, cache); |
| 3622 | if (ret) | ||
| 3623 | btrfs_abort_transaction(trans, root, ret); | ||
| 3624 | } | ||
| 3593 | 3625 | ||
| 3594 | /* if its not on the io list, we need to put the block group */ | 3626 | /* if its not on the io list, we need to put the block group */ |
| 3595 | if (should_put) | 3627 | if (should_put) |
| @@ -7537,7 +7569,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, | |||
| 7537 | * returns the key for the extent through ins, and a tree buffer for | 7569 | * returns the key for the extent through ins, and a tree buffer for |
| 7538 | * the first block of the extent through buf. | 7570 | * the first block of the extent through buf. |
| 7539 | * | 7571 | * |
| 7540 | * returns the tree buffer or NULL. | 7572 | * returns the tree buffer or an ERR_PTR on error. |
| 7541 | */ | 7573 | */ |
| 7542 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | 7574 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, |
| 7543 | struct btrfs_root *root, | 7575 | struct btrfs_root *root, |
| @@ -7548,6 +7580,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 7548 | struct btrfs_key ins; | 7580 | struct btrfs_key ins; |
| 7549 | struct btrfs_block_rsv *block_rsv; | 7581 | struct btrfs_block_rsv *block_rsv; |
| 7550 | struct extent_buffer *buf; | 7582 | struct extent_buffer *buf; |
| 7583 | struct btrfs_delayed_extent_op *extent_op; | ||
| 7551 | u64 flags = 0; | 7584 | u64 flags = 0; |
| 7552 | int ret; | 7585 | int ret; |
| 7553 | u32 blocksize = root->nodesize; | 7586 | u32 blocksize = root->nodesize; |
| @@ -7568,13 +7601,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 7568 | 7601 | ||
| 7569 | ret = btrfs_reserve_extent(root, blocksize, blocksize, | 7602 | ret = btrfs_reserve_extent(root, blocksize, blocksize, |
| 7570 | empty_size, hint, &ins, 0, 0); | 7603 | empty_size, hint, &ins, 0, 0); |
| 7571 | if (ret) { | 7604 | if (ret) |
| 7572 | unuse_block_rsv(root->fs_info, block_rsv, blocksize); | 7605 | goto out_unuse; |
| 7573 | return ERR_PTR(ret); | ||
| 7574 | } | ||
| 7575 | 7606 | ||
| 7576 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); | 7607 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); |
| 7577 | BUG_ON(IS_ERR(buf)); /* -ENOMEM */ | 7608 | if (IS_ERR(buf)) { |
| 7609 | ret = PTR_ERR(buf); | ||
| 7610 | goto out_free_reserved; | ||
| 7611 | } | ||
| 7578 | 7612 | ||
| 7579 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | 7613 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { |
| 7580 | if (parent == 0) | 7614 | if (parent == 0) |
| @@ -7584,9 +7618,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 7584 | BUG_ON(parent > 0); | 7618 | BUG_ON(parent > 0); |
| 7585 | 7619 | ||
| 7586 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 7620 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 7587 | struct btrfs_delayed_extent_op *extent_op; | ||
| 7588 | extent_op = btrfs_alloc_delayed_extent_op(); | 7621 | extent_op = btrfs_alloc_delayed_extent_op(); |
| 7589 | BUG_ON(!extent_op); /* -ENOMEM */ | 7622 | if (!extent_op) { |
| 7623 | ret = -ENOMEM; | ||
| 7624 | goto out_free_buf; | ||
| 7625 | } | ||
| 7590 | if (key) | 7626 | if (key) |
| 7591 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | 7627 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); |
| 7592 | else | 7628 | else |
| @@ -7601,13 +7637,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 7601 | extent_op->level = level; | 7637 | extent_op->level = level; |
| 7602 | 7638 | ||
| 7603 | ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, | 7639 | ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, |
| 7604 | ins.objectid, | 7640 | ins.objectid, ins.offset, |
| 7605 | ins.offset, parent, root_objectid, | 7641 | parent, root_objectid, level, |
| 7606 | level, BTRFS_ADD_DELAYED_EXTENT, | 7642 | BTRFS_ADD_DELAYED_EXTENT, |
| 7607 | extent_op, 0); | 7643 | extent_op, 0); |
| 7608 | BUG_ON(ret); /* -ENOMEM */ | 7644 | if (ret) |
| 7645 | goto out_free_delayed; | ||
| 7609 | } | 7646 | } |
| 7610 | return buf; | 7647 | return buf; |
| 7648 | |||
| 7649 | out_free_delayed: | ||
| 7650 | btrfs_free_delayed_extent_op(extent_op); | ||
| 7651 | out_free_buf: | ||
| 7652 | free_extent_buffer(buf); | ||
| 7653 | out_free_reserved: | ||
| 7654 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0); | ||
| 7655 | out_unuse: | ||
| 7656 | unuse_block_rsv(root->fs_info, block_rsv, blocksize); | ||
| 7657 | return ERR_PTR(ret); | ||
| 7611 | } | 7658 | } |
| 7612 | 7659 | ||
| 7613 | struct walk_control { | 7660 | struct walk_control { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 782f3bc4651d..c32d226bfecc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -4560,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) | |||
| 4560 | do { | 4560 | do { |
| 4561 | index--; | 4561 | index--; |
| 4562 | page = eb->pages[index]; | 4562 | page = eb->pages[index]; |
| 4563 | if (page && mapped) { | 4563 | if (!page) |
| 4564 | continue; | ||
| 4565 | if (mapped) | ||
| 4564 | spin_lock(&page->mapping->private_lock); | 4566 | spin_lock(&page->mapping->private_lock); |
| 4567 | /* | ||
| 4568 | * We do this since we'll remove the pages after we've | ||
| 4569 | * removed the eb from the radix tree, so we could race | ||
| 4570 | * and have this page now attached to the new eb. So | ||
| 4571 | * only clear page_private if it's still connected to | ||
| 4572 | * this eb. | ||
| 4573 | */ | ||
| 4574 | if (PagePrivate(page) && | ||
| 4575 | page->private == (unsigned long)eb) { | ||
| 4576 | BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | ||
| 4577 | BUG_ON(PageDirty(page)); | ||
| 4578 | BUG_ON(PageWriteback(page)); | ||
| 4565 | /* | 4579 | /* |
| 4566 | * We do this since we'll remove the pages after we've | 4580 | * We need to make sure we haven't be attached |
| 4567 | * removed the eb from the radix tree, so we could race | 4581 | * to a new eb. |
| 4568 | * and have this page now attached to the new eb. So | ||
| 4569 | * only clear page_private if it's still connected to | ||
| 4570 | * this eb. | ||
| 4571 | */ | 4582 | */ |
| 4572 | if (PagePrivate(page) && | 4583 | ClearPagePrivate(page); |
| 4573 | page->private == (unsigned long)eb) { | 4584 | set_page_private(page, 0); |
| 4574 | BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 4585 | /* One for the page private */ |
| 4575 | BUG_ON(PageDirty(page)); | ||
| 4576 | BUG_ON(PageWriteback(page)); | ||
| 4577 | /* | ||
| 4578 | * We need to make sure we haven't be attached | ||
| 4579 | * to a new eb. | ||
| 4580 | */ | ||
| 4581 | ClearPagePrivate(page); | ||
| 4582 | set_page_private(page, 0); | ||
| 4583 | /* One for the page private */ | ||
| 4584 | page_cache_release(page); | ||
| 4585 | } | ||
| 4586 | spin_unlock(&page->mapping->private_lock); | ||
| 4587 | |||
| 4588 | } | ||
| 4589 | if (page) { | ||
| 4590 | /* One for when we alloced the page */ | ||
| 4591 | page_cache_release(page); | 4586 | page_cache_release(page); |
| 4592 | } | 4587 | } |
| 4588 | |||
| 4589 | if (mapped) | ||
| 4590 | spin_unlock(&page->mapping->private_lock); | ||
| 4591 | |||
| 4592 | /* One for when we alloced the page */ | ||
| 4593 | page_cache_release(page); | ||
| 4593 | } while (index != 0); | 4594 | } while (index != 0); |
| 4594 | } | 4595 | } |
| 4595 | 4596 | ||
| @@ -4771,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4771 | start >> PAGE_CACHE_SHIFT); | 4772 | start >> PAGE_CACHE_SHIFT); |
| 4772 | if (eb && atomic_inc_not_zero(&eb->refs)) { | 4773 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
| 4773 | rcu_read_unlock(); | 4774 | rcu_read_unlock(); |
| 4775 | /* | ||
| 4776 | * Lock our eb's refs_lock to avoid races with | ||
| 4777 | * free_extent_buffer. When we get our eb it might be flagged | ||
| 4778 | * with EXTENT_BUFFER_STALE and another task running | ||
| 4779 | * free_extent_buffer might have seen that flag set, | ||
| 4780 | * eb->refs == 2, that the buffer isn't under IO (dirty and | ||
| 4781 | * writeback flags not set) and it's still in the tree (flag | ||
| 4782 | * EXTENT_BUFFER_TREE_REF set), therefore being in the process | ||
| 4783 | * of decrementing the extent buffer's reference count twice. | ||
| 4784 | * So here we could race and increment the eb's reference count, | ||
| 4785 | * clear its stale flag, mark it as dirty and drop our reference | ||
| 4786 | * before the other task finishes executing free_extent_buffer, | ||
| 4787 | * which would later result in an attempt to free an extent | ||
| 4788 | * buffer that is dirty. | ||
| 4789 | */ | ||
| 4790 | if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) { | ||
| 4791 | spin_lock(&eb->refs_lock); | ||
| 4792 | spin_unlock(&eb->refs_lock); | ||
| 4793 | } | ||
| 4774 | mark_extent_buffer_accessed(eb, NULL); | 4794 | mark_extent_buffer_accessed(eb, NULL); |
| 4775 | return eb; | 4795 | return eb; |
| 4776 | } | 4796 | } |
| @@ -4870,6 +4890,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
| 4870 | mark_extent_buffer_accessed(exists, p); | 4890 | mark_extent_buffer_accessed(exists, p); |
| 4871 | goto free_eb; | 4891 | goto free_eb; |
| 4872 | } | 4892 | } |
| 4893 | exists = NULL; | ||
| 4873 | 4894 | ||
| 4874 | /* | 4895 | /* |
| 4875 | * Do this so attach doesn't complain and we need to | 4896 | * Do this so attach doesn't complain and we need to |
| @@ -4933,12 +4954,12 @@ again: | |||
| 4933 | return eb; | 4954 | return eb; |
| 4934 | 4955 | ||
| 4935 | free_eb: | 4956 | free_eb: |
| 4957 | WARN_ON(!atomic_dec_and_test(&eb->refs)); | ||
| 4936 | for (i = 0; i < num_pages; i++) { | 4958 | for (i = 0; i < num_pages; i++) { |
| 4937 | if (eb->pages[i]) | 4959 | if (eb->pages[i]) |
| 4938 | unlock_page(eb->pages[i]); | 4960 | unlock_page(eb->pages[i]); |
| 4939 | } | 4961 | } |
| 4940 | 4962 | ||
| 4941 | WARN_ON(!atomic_dec_and_test(&eb->refs)); | ||
| 4942 | btrfs_release_extent_buffer(eb); | 4963 | btrfs_release_extent_buffer(eb); |
| 4943 | return exists; | 4964 | return exists; |
| 4944 | } | 4965 | } |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 81fa75a8e1f3..9dbe5b548fa6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | |||
| 86 | 86 | ||
| 87 | mapping_set_gfp_mask(inode->i_mapping, | 87 | mapping_set_gfp_mask(inode->i_mapping, |
| 88 | mapping_gfp_mask(inode->i_mapping) & | 88 | mapping_gfp_mask(inode->i_mapping) & |
| 89 | ~(GFP_NOFS & ~__GFP_HIGHMEM)); | 89 | ~(__GFP_FS | __GFP_HIGHMEM)); |
| 90 | 90 | ||
| 91 | return inode; | 91 | return inode; |
| 92 | } | 92 | } |
| @@ -1218,7 +1218,7 @@ out: | |||
| 1218 | * | 1218 | * |
| 1219 | * This function writes out a free space cache struct to disk for quick recovery | 1219 | * This function writes out a free space cache struct to disk for quick recovery |
| 1220 | * on mount. This will return 0 if it was successfull in writing the cache out, | 1220 | * on mount. This will return 0 if it was successfull in writing the cache out, |
| 1221 | * and -1 if it was not. | 1221 | * or an errno if it was not. |
| 1222 | */ | 1222 | */ |
| 1223 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | 1223 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, |
| 1224 | struct btrfs_free_space_ctl *ctl, | 1224 | struct btrfs_free_space_ctl *ctl, |
| @@ -1235,12 +1235,12 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 1235 | int must_iput = 0; | 1235 | int must_iput = 0; |
| 1236 | 1236 | ||
| 1237 | if (!i_size_read(inode)) | 1237 | if (!i_size_read(inode)) |
| 1238 | return -1; | 1238 | return -EIO; |
| 1239 | 1239 | ||
| 1240 | WARN_ON(io_ctl->pages); | 1240 | WARN_ON(io_ctl->pages); |
| 1241 | ret = io_ctl_init(io_ctl, inode, root, 1); | 1241 | ret = io_ctl_init(io_ctl, inode, root, 1); |
| 1242 | if (ret) | 1242 | if (ret) |
| 1243 | return -1; | 1243 | return ret; |
| 1244 | 1244 | ||
| 1245 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { | 1245 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { |
| 1246 | down_write(&block_group->data_rwsem); | 1246 | down_write(&block_group->data_rwsem); |
| @@ -1258,7 +1258,9 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 1258 | } | 1258 | } |
| 1259 | 1259 | ||
| 1260 | /* Lock all pages first so we can lock the extent safely. */ | 1260 | /* Lock all pages first so we can lock the extent safely. */ |
| 1261 | io_ctl_prepare_pages(io_ctl, inode, 0); | 1261 | ret = io_ctl_prepare_pages(io_ctl, inode, 0); |
| 1262 | if (ret) | ||
| 1263 | goto out; | ||
| 1262 | 1264 | ||
| 1263 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 1265 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
| 1264 | 0, &cached_state); | 1266 | 0, &cached_state); |
| @@ -3464,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
| 3464 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 3466 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
| 3465 | int ret; | 3467 | int ret; |
| 3466 | struct btrfs_io_ctl io_ctl; | 3468 | struct btrfs_io_ctl io_ctl; |
| 3469 | bool release_metadata = true; | ||
| 3467 | 3470 | ||
| 3468 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 3471 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
| 3469 | return 0; | 3472 | return 0; |
| @@ -3471,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
| 3471 | memset(&io_ctl, 0, sizeof(io_ctl)); | 3474 | memset(&io_ctl, 0, sizeof(io_ctl)); |
| 3472 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, | 3475 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, |
| 3473 | trans, path, 0); | 3476 | trans, path, 0); |
| 3474 | if (!ret) | 3477 | if (!ret) { |
| 3478 | /* | ||
| 3479 | * At this point writepages() didn't error out, so our metadata | ||
| 3480 | * reservation is released when the writeback finishes, at | ||
| 3481 | * inode.c:btrfs_finish_ordered_io(), regardless of it finishing | ||
| 3482 | * with or without an error. | ||
| 3483 | */ | ||
| 3484 | release_metadata = false; | ||
| 3475 | ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); | 3485 | ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); |
| 3486 | } | ||
| 3476 | 3487 | ||
| 3477 | if (ret) { | 3488 | if (ret) { |
| 3478 | btrfs_delalloc_release_metadata(inode, inode->i_size); | 3489 | if (release_metadata) |
| 3490 | btrfs_delalloc_release_metadata(inode, inode->i_size); | ||
| 3479 | #ifdef DEBUG | 3491 | #ifdef DEBUG |
| 3480 | btrfs_err(root->fs_info, | 3492 | btrfs_err(root->fs_info, |
| 3481 | "failed to write free ino cache for root %llu", | 3493 | "failed to write free ino cache for root %llu", |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ada4d24ed11b..8bb013672aee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -3632,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
| 3632 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 3632 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
| 3633 | BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); | 3633 | BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); |
| 3634 | 3634 | ||
| 3635 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); | ||
| 3636 | inode->i_generation = BTRFS_I(inode)->generation; | ||
| 3637 | inode->i_rdev = 0; | ||
| 3638 | rdev = btrfs_inode_rdev(leaf, inode_item); | ||
| 3639 | |||
| 3640 | BTRFS_I(inode)->index_cnt = (u64)-1; | ||
| 3641 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | ||
| 3642 | |||
| 3643 | cache_index: | ||
| 3635 | /* | 3644 | /* |
| 3636 | * If we were modified in the current generation and evicted from memory | 3645 | * If we were modified in the current generation and evicted from memory |
| 3637 | * and then re-read we need to do a full sync since we don't have any | 3646 | * and then re-read we need to do a full sync since we don't have any |
| 3638 | * idea about which extents were modified before we were evicted from | 3647 | * idea about which extents were modified before we were evicted from |
| 3639 | * cache. | 3648 | * cache. |
| 3649 | * | ||
| 3650 | * This is required for both inode re-read from disk and delayed inode | ||
| 3651 | * in delayed_nodes_tree. | ||
| 3640 | */ | 3652 | */ |
| 3641 | if (BTRFS_I(inode)->last_trans == root->fs_info->generation) | 3653 | if (BTRFS_I(inode)->last_trans == root->fs_info->generation) |
| 3642 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 3654 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
| 3643 | &BTRFS_I(inode)->runtime_flags); | 3655 | &BTRFS_I(inode)->runtime_flags); |
| 3644 | 3656 | ||
| 3645 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); | ||
| 3646 | inode->i_generation = BTRFS_I(inode)->generation; | ||
| 3647 | inode->i_rdev = 0; | ||
| 3648 | rdev = btrfs_inode_rdev(leaf, inode_item); | ||
| 3649 | |||
| 3650 | BTRFS_I(inode)->index_cnt = (u64)-1; | ||
| 3651 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | ||
| 3652 | |||
| 3653 | cache_index: | ||
| 3654 | path->slots[0]++; | 3657 | path->slots[0]++; |
| 3655 | if (inode->i_nlink != 1 || | 3658 | if (inode->i_nlink != 1 || |
| 3656 | path->slots[0] >= btrfs_header_nritems(leaf)) | 3659 | path->slots[0] >= btrfs_header_nritems(leaf)) |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b05653f182c2..1c22c6518504 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -2410,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2410 | "Attempt to delete subvolume %llu during send", | 2410 | "Attempt to delete subvolume %llu during send", |
| 2411 | dest->root_key.objectid); | 2411 | dest->root_key.objectid); |
| 2412 | err = -EPERM; | 2412 | err = -EPERM; |
| 2413 | goto out_dput; | 2413 | goto out_unlock_inode; |
| 2414 | } | 2414 | } |
| 2415 | 2415 | ||
| 2416 | d_invalidate(dentry); | 2416 | d_invalidate(dentry); |
| @@ -2505,6 +2505,7 @@ out_up_write: | |||
| 2505 | root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); | 2505 | root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); |
| 2506 | spin_unlock(&dest->root_item_lock); | 2506 | spin_unlock(&dest->root_item_lock); |
| 2507 | } | 2507 | } |
| 2508 | out_unlock_inode: | ||
| 2508 | mutex_unlock(&inode->i_mutex); | 2509 | mutex_unlock(&inode->i_mutex); |
| 2509 | if (!err) { | 2510 | if (!err) { |
| 2510 | shrink_dcache_sb(root->fs_info->sb); | 2511 | shrink_dcache_sb(root->fs_info->sb); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 157cc54fc634..760c4a5e096b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 722 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | 722 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) |
| 723 | { | 723 | { |
| 724 | int ret = 0; | 724 | int ret = 0; |
| 725 | int ret_wb = 0; | ||
| 725 | u64 end; | 726 | u64 end; |
| 726 | u64 orig_end; | 727 | u64 orig_end; |
| 727 | struct btrfs_ordered_extent *ordered; | 728 | struct btrfs_ordered_extent *ordered; |
| @@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 741 | if (ret) | 742 | if (ret) |
| 742 | return ret; | 743 | return ret; |
| 743 | 744 | ||
| 744 | ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); | 745 | /* |
| 745 | if (ret) | 746 | * If we have a writeback error don't return immediately. Wait first |
| 746 | return ret; | 747 | * for any ordered extents that haven't completed yet. This is to make |
| 748 | * sure no one can dirty the same page ranges and call writepages() | ||
| 749 | * before the ordered extents complete - to avoid failures (-EEXIST) | ||
| 750 | * when adding the new ordered extents to the ordered tree. | ||
| 751 | */ | ||
| 752 | ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); | ||
| 747 | 753 | ||
| 748 | end = orig_end; | 754 | end = orig_end; |
| 749 | while (1) { | 755 | while (1) { |
| @@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 767 | break; | 773 | break; |
| 768 | end--; | 774 | end--; |
| 769 | } | 775 | } |
| 770 | return ret; | 776 | return ret_wb ? ret_wb : ret; |
| 771 | } | 777 | } |
| 772 | 778 | ||
| 773 | /* | 779 | /* |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bcd2a007517..96aebf3bcd5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -1058,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans, | |||
| 1058 | struct extent_map *em; | 1058 | struct extent_map *em; |
| 1059 | struct list_head *search_list = &trans->transaction->pending_chunks; | 1059 | struct list_head *search_list = &trans->transaction->pending_chunks; |
| 1060 | int ret = 0; | 1060 | int ret = 0; |
| 1061 | u64 physical_start = *start; | ||
| 1061 | 1062 | ||
| 1062 | again: | 1063 | again: |
| 1063 | list_for_each_entry(em, search_list, list) { | 1064 | list_for_each_entry(em, search_list, list) { |
| @@ -1068,9 +1069,9 @@ again: | |||
| 1068 | for (i = 0; i < map->num_stripes; i++) { | 1069 | for (i = 0; i < map->num_stripes; i++) { |
| 1069 | if (map->stripes[i].dev != device) | 1070 | if (map->stripes[i].dev != device) |
| 1070 | continue; | 1071 | continue; |
| 1071 | if (map->stripes[i].physical >= *start + len || | 1072 | if (map->stripes[i].physical >= physical_start + len || |
| 1072 | map->stripes[i].physical + em->orig_block_len <= | 1073 | map->stripes[i].physical + em->orig_block_len <= |
| 1073 | *start) | 1074 | physical_start) |
| 1074 | continue; | 1075 | continue; |
| 1075 | *start = map->stripes[i].physical + | 1076 | *start = map->stripes[i].physical + |
| 1076 | em->orig_block_len; | 1077 | em->orig_block_len; |
| @@ -1193,8 +1194,14 @@ again: | |||
| 1193 | */ | 1194 | */ |
| 1194 | if (contains_pending_extent(trans, device, | 1195 | if (contains_pending_extent(trans, device, |
| 1195 | &search_start, | 1196 | &search_start, |
| 1196 | hole_size)) | 1197 | hole_size)) { |
| 1197 | hole_size = 0; | 1198 | if (key.offset >= search_start) { |
| 1199 | hole_size = key.offset - search_start; | ||
| 1200 | } else { | ||
| 1201 | WARN_ON_ONCE(1); | ||
| 1202 | hole_size = 0; | ||
| 1203 | } | ||
| 1204 | } | ||
| 1198 | 1205 | ||
| 1199 | if (hole_size > max_hole_size) { | 1206 | if (hole_size > max_hole_size) { |
| 1200 | max_hole_start = search_start; | 1207 | max_hole_start = search_start; |
