diff options
author | Josef Bacik <jbacik@fusionio.com> | 2013-05-29 14:54:47 -0400 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2013-06-14 11:30:06 -0400 |
commit | d52be818e618bd252601b340ca6df760d77410e8 (patch) | |
tree | 7d1fc410b90a96487cb3a949891fe2f6a5e96f87 /fs/btrfs/inode.c | |
parent | c6adc9cc082e3cffda153999c9b9f8a8baaaaf45 (diff) |
Btrfs: simplify unlink reservations
Dave pointed out a problem where if you filled up a file system as much as
possible you couldn't remove any files. The whole unlink reservation thing is
convoluted because it tries to guess if it's going to add space to unlink
something or not, and has all these odd uncommented cases where it simply does
not try. So to fix this I've added a way to conditionally steal from the global
reserve if we can't make our normal reservation. If we have more than half the
space in the global reserve free we will go ahead and steal from the global
reserve. With this patch Dave's reproducer now works and I can rm all the files
on the file system. Thanks,
Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 212 |
1 files changed, 22 insertions, 190 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 51520755f4dc..c0e95b1554a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3679,53 +3679,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
3679 | } | 3679 | } |
3680 | return ret; | 3680 | return ret; |
3681 | } | 3681 | } |
3682 | |||
3683 | |||
3684 | /* helper to check if there is any shared block in the path */ | ||
3685 | static int check_path_shared(struct btrfs_root *root, | ||
3686 | struct btrfs_path *path) | ||
3687 | { | ||
3688 | struct extent_buffer *eb; | ||
3689 | int level; | ||
3690 | u64 refs = 1; | ||
3691 | |||
3692 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
3693 | int ret; | ||
3694 | |||
3695 | if (!path->nodes[level]) | ||
3696 | break; | ||
3697 | eb = path->nodes[level]; | ||
3698 | if (!btrfs_block_can_be_shared(root, eb)) | ||
3699 | continue; | ||
3700 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1, | ||
3701 | &refs, NULL); | ||
3702 | if (refs > 1) | ||
3703 | return 1; | ||
3704 | } | ||
3705 | return 0; | ||
3706 | } | ||
3707 | 3682 | ||
3708 | /* | 3683 | /* |
3709 | * helper to start transaction for unlink and rmdir. | 3684 | * helper to start transaction for unlink and rmdir. |
3710 | * | 3685 | * |
3711 | * unlink and rmdir are special in btrfs, they do not always free space. | 3686 | * unlink and rmdir are special in btrfs, they do not always free space, so |
3712 | * so in enospc case, we should make sure they will free space before | 3687 | * if we cannot make our reservations the normal way try and see if there is |
3713 | * allowing them to use the global metadata reservation. | 3688 | * plenty of slack room in the global reserve to migrate, otherwise we cannot |
3689 | * allow the unlink to occur. | ||
3714 | */ | 3690 | */ |
3715 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | 3691 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) |
3716 | struct dentry *dentry) | ||
3717 | { | 3692 | { |
3718 | struct btrfs_trans_handle *trans; | 3693 | struct btrfs_trans_handle *trans; |
3719 | struct btrfs_root *root = BTRFS_I(dir)->root; | 3694 | struct btrfs_root *root = BTRFS_I(dir)->root; |
3720 | struct btrfs_path *path; | ||
3721 | struct btrfs_dir_item *di; | ||
3722 | struct inode *inode = dentry->d_inode; | ||
3723 | u64 index; | ||
3724 | int check_link = 1; | ||
3725 | int err = -ENOSPC; | ||
3726 | int ret; | 3695 | int ret; |
3727 | u64 ino = btrfs_ino(inode); | ||
3728 | u64 dir_ino = btrfs_ino(dir); | ||
3729 | 3696 | ||
3730 | /* | 3697 | /* |
3731 | * 1 for the possible orphan item | 3698 | * 1 for the possible orphan item |
@@ -3738,158 +3705,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
3738 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 3705 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
3739 | return trans; | 3706 | return trans; |
3740 | 3707 | ||
3741 | if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 3708 | if (PTR_ERR(trans) == -ENOSPC) { |
3742 | return ERR_PTR(-ENOSPC); | 3709 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); |
3743 | |||
3744 | /* check if there is someone else holds reference */ | ||
3745 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) | ||
3746 | return ERR_PTR(-ENOSPC); | ||
3747 | |||
3748 | if (atomic_read(&inode->i_count) > 2) | ||
3749 | return ERR_PTR(-ENOSPC); | ||
3750 | |||
3751 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
3752 | return ERR_PTR(-ENOSPC); | ||
3753 | |||
3754 | path = btrfs_alloc_path(); | ||
3755 | if (!path) { | ||
3756 | root->fs_info->enospc_unlink = 0; | ||
3757 | return ERR_PTR(-ENOMEM); | ||
3758 | } | ||
3759 | 3710 | ||
3760 | /* 1 for the orphan item */ | 3711 | trans = btrfs_start_transaction(root, 0); |
3761 | trans = btrfs_start_transaction(root, 1); | 3712 | if (IS_ERR(trans)) |
3762 | if (IS_ERR(trans)) { | 3713 | return trans; |
3763 | btrfs_free_path(path); | 3714 | ret = btrfs_cond_migrate_bytes(root->fs_info, |
3764 | root->fs_info->enospc_unlink = 0; | 3715 | &root->fs_info->trans_block_rsv, |
3765 | return trans; | 3716 | num_bytes, 5); |
3766 | } | 3717 | if (ret) { |
3767 | 3718 | btrfs_end_transaction(trans, root); | |
3768 | path->skip_locking = 1; | 3719 | return ERR_PTR(ret); |
3769 | path->search_commit_root = 1; | ||
3770 | |||
3771 | ret = btrfs_lookup_inode(trans, root, path, | ||
3772 | &BTRFS_I(dir)->location, 0); | ||
3773 | if (ret < 0) { | ||
3774 | err = ret; | ||
3775 | goto out; | ||
3776 | } | ||
3777 | if (ret == 0) { | ||
3778 | if (check_path_shared(root, path)) | ||
3779 | goto out; | ||
3780 | } else { | ||
3781 | check_link = 0; | ||
3782 | } | ||
3783 | btrfs_release_path(path); | ||
3784 | |||
3785 | ret = btrfs_lookup_inode(trans, root, path, | ||
3786 | &BTRFS_I(inode)->location, 0); | ||
3787 | if (ret < 0) { | ||
3788 | err = ret; | ||
3789 | goto out; | ||
3790 | } | ||
3791 | if (ret == 0) { | ||
3792 | if (check_path_shared(root, path)) | ||
3793 | goto out; | ||
3794 | } else { | ||
3795 | check_link = 0; | ||
3796 | } | ||
3797 | btrfs_release_path(path); | ||
3798 | |||
3799 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
3800 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
3801 | ino, (u64)-1, 0); | ||
3802 | if (ret < 0) { | ||
3803 | err = ret; | ||
3804 | goto out; | ||
3805 | } | 3720 | } |
3806 | BUG_ON(ret == 0); /* Corruption */ | ||
3807 | if (check_path_shared(root, path)) | ||
3808 | goto out; | ||
3809 | btrfs_release_path(path); | ||
3810 | } | ||
3811 | |||
3812 | if (!check_link) { | ||
3813 | err = 0; | ||
3814 | goto out; | ||
3815 | } | ||
3816 | |||
3817 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, | ||
3818 | dentry->d_name.name, dentry->d_name.len, 0); | ||
3819 | if (IS_ERR(di)) { | ||
3820 | err = PTR_ERR(di); | ||
3821 | goto out; | ||
3822 | } | ||
3823 | if (di) { | ||
3824 | if (check_path_shared(root, path)) | ||
3825 | goto out; | ||
3826 | } else { | ||
3827 | err = 0; | ||
3828 | goto out; | ||
3829 | } | ||
3830 | btrfs_release_path(path); | ||
3831 | |||
3832 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, | ||
3833 | dentry->d_name.len, ino, dir_ino, 0, | ||
3834 | &index); | ||
3835 | if (ret) { | ||
3836 | err = ret; | ||
3837 | goto out; | ||
3838 | } | ||
3839 | |||
3840 | if (check_path_shared(root, path)) | ||
3841 | goto out; | ||
3842 | |||
3843 | btrfs_release_path(path); | ||
3844 | |||
3845 | /* | ||
3846 | * This is a commit root search, if we can lookup inode item and other | ||
3847 | * relative items in the commit root, it means the transaction of | ||
3848 | * dir/file creation has been committed, and the dir index item that we | ||
3849 | * delay to insert has also been inserted into the commit root. So | ||
3850 | * we needn't worry about the delayed insertion of the dir index item | ||
3851 | * here. | ||
3852 | */ | ||
3853 | di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, | ||
3854 | dentry->d_name.name, dentry->d_name.len, 0); | ||
3855 | if (IS_ERR(di)) { | ||
3856 | err = PTR_ERR(di); | ||
3857 | goto out; | ||
3858 | } | ||
3859 | BUG_ON(ret == -ENOENT); | ||
3860 | if (check_path_shared(root, path)) | ||
3861 | goto out; | ||
3862 | |||
3863 | err = 0; | ||
3864 | out: | ||
3865 | btrfs_free_path(path); | ||
3866 | /* Migrate the orphan reservation over */ | ||
3867 | if (!err) | ||
3868 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
3869 | &root->fs_info->global_block_rsv, | ||
3870 | trans->bytes_reserved); | ||
3871 | |||
3872 | if (err) { | ||
3873 | btrfs_end_transaction(trans, root); | ||
3874 | root->fs_info->enospc_unlink = 0; | ||
3875 | return ERR_PTR(err); | ||
3876 | } | ||
3877 | |||
3878 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
3879 | return trans; | ||
3880 | } | ||
3881 | |||
3882 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
3883 | struct btrfs_root *root) | ||
3884 | { | ||
3885 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { | ||
3886 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3887 | trans->bytes_reserved); | ||
3888 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3721 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
3889 | BUG_ON(!root->fs_info->enospc_unlink); | 3722 | trans->bytes_reserved = num_bytes; |
3890 | root->fs_info->enospc_unlink = 0; | ||
3891 | } | 3723 | } |
3892 | btrfs_end_transaction(trans, root); | 3724 | return trans; |
3893 | } | 3725 | } |
3894 | 3726 | ||
3895 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 3727 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
@@ -3899,7 +3731,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3899 | struct inode *inode = dentry->d_inode; | 3731 | struct inode *inode = dentry->d_inode; |
3900 | int ret; | 3732 | int ret; |
3901 | 3733 | ||
3902 | trans = __unlink_start_trans(dir, dentry); | 3734 | trans = __unlink_start_trans(dir); |
3903 | if (IS_ERR(trans)) | 3735 | if (IS_ERR(trans)) |
3904 | return PTR_ERR(trans); | 3736 | return PTR_ERR(trans); |
3905 | 3737 | ||
@@ -3917,7 +3749,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3917 | } | 3749 | } |
3918 | 3750 | ||
3919 | out: | 3751 | out: |
3920 | __unlink_end_trans(trans, root); | 3752 | btrfs_end_transaction(trans, root); |
3921 | btrfs_btree_balance_dirty(root); | 3753 | btrfs_btree_balance_dirty(root); |
3922 | return ret; | 3754 | return ret; |
3923 | } | 3755 | } |
@@ -4014,7 +3846,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4014 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | 3846 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) |
4015 | return -EPERM; | 3847 | return -EPERM; |
4016 | 3848 | ||
4017 | trans = __unlink_start_trans(dir, dentry); | 3849 | trans = __unlink_start_trans(dir); |
4018 | if (IS_ERR(trans)) | 3850 | if (IS_ERR(trans)) |
4019 | return PTR_ERR(trans); | 3851 | return PTR_ERR(trans); |
4020 | 3852 | ||
@@ -4036,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4036 | if (!err) | 3868 | if (!err) |
4037 | btrfs_i_size_write(inode, 0); | 3869 | btrfs_i_size_write(inode, 0); |
4038 | out: | 3870 | out: |
4039 | __unlink_end_trans(trans, root); | 3871 | btrfs_end_transaction(trans, root); |
4040 | btrfs_btree_balance_dirty(root); | 3872 | btrfs_btree_balance_dirty(root); |
4041 | 3873 | ||
4042 | return err; | 3874 | return err; |