aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-05-29 14:54:47 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-06-14 11:30:06 -0400
commitd52be818e618bd252601b340ca6df760d77410e8 (patch)
tree7d1fc410b90a96487cb3a949891fe2f6a5e96f87 /fs/btrfs/inode.c
parentc6adc9cc082e3cffda153999c9b9f8a8baaaaf45 (diff)
Btrfs: simplify unlink reservations
Dave pointed out a problem where if you filled up a file system as much as possible you couldn't remove any files. The whole unlink reservation thing is convoluted because it tries to guess if it's going to add space to unlink something or not, and has all these odd uncommented cases where it simply does not try. So to fix this I've added a way to conditionally steal from the global reserve if we can't make our normal reservation. If we have more than half the space in the global reserve free we will go ahead and steal from the global reserve. With this patch Dave's reproducer now works and I can rm all the files on the file system. Thanks, Reported-by: David Sterba <dsterba@suse.cz> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c212
1 files changed, 22 insertions, 190 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51520755f4dc..c0e95b1554a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3679,53 +3679,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3679 } 3679 }
3680 return ret; 3680 return ret;
3681} 3681}
3682
3683
3684/* helper to check if there is any shared block in the path */
3685static int check_path_shared(struct btrfs_root *root,
3686 struct btrfs_path *path)
3687{
3688 struct extent_buffer *eb;
3689 int level;
3690 u64 refs = 1;
3691
3692 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
3693 int ret;
3694
3695 if (!path->nodes[level])
3696 break;
3697 eb = path->nodes[level];
3698 if (!btrfs_block_can_be_shared(root, eb))
3699 continue;
3700 ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
3701 &refs, NULL);
3702 if (refs > 1)
3703 return 1;
3704 }
3705 return 0;
3706}
3707 3682
3708/* 3683/*
3709 * helper to start transaction for unlink and rmdir. 3684 * helper to start transaction for unlink and rmdir.
3710 * 3685 *
3711 * unlink and rmdir are special in btrfs, they do not always free space. 3686 * unlink and rmdir are special in btrfs, they do not always free space, so
3712 * so in enospc case, we should make sure they will free space before 3687 * if we cannot make our reservations the normal way try and see if there is
3713 * allowing them to use the global metadata reservation. 3688 * plenty of slack room in the global reserve to migrate, otherwise we cannot
3689 * allow the unlink to occur.
3714 */ 3690 */
3715static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, 3691static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
3716 struct dentry *dentry)
3717{ 3692{
3718 struct btrfs_trans_handle *trans; 3693 struct btrfs_trans_handle *trans;
3719 struct btrfs_root *root = BTRFS_I(dir)->root; 3694 struct btrfs_root *root = BTRFS_I(dir)->root;
3720 struct btrfs_path *path;
3721 struct btrfs_dir_item *di;
3722 struct inode *inode = dentry->d_inode;
3723 u64 index;
3724 int check_link = 1;
3725 int err = -ENOSPC;
3726 int ret; 3695 int ret;
3727 u64 ino = btrfs_ino(inode);
3728 u64 dir_ino = btrfs_ino(dir);
3729 3696
3730 /* 3697 /*
3731 * 1 for the possible orphan item 3698 * 1 for the possible orphan item
@@ -3738,158 +3705,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3738 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3705 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3739 return trans; 3706 return trans;
3740 3707
3741 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 3708 if (PTR_ERR(trans) == -ENOSPC) {
3742 return ERR_PTR(-ENOSPC); 3709 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3743
3744 /* check if there is someone else holds reference */
3745 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
3746 return ERR_PTR(-ENOSPC);
3747
3748 if (atomic_read(&inode->i_count) > 2)
3749 return ERR_PTR(-ENOSPC);
3750
3751 if (xchg(&root->fs_info->enospc_unlink, 1))
3752 return ERR_PTR(-ENOSPC);
3753
3754 path = btrfs_alloc_path();
3755 if (!path) {
3756 root->fs_info->enospc_unlink = 0;
3757 return ERR_PTR(-ENOMEM);
3758 }
3759 3710
3760 /* 1 for the orphan item */ 3711 trans = btrfs_start_transaction(root, 0);
3761 trans = btrfs_start_transaction(root, 1); 3712 if (IS_ERR(trans))
3762 if (IS_ERR(trans)) { 3713 return trans;
3763 btrfs_free_path(path); 3714 ret = btrfs_cond_migrate_bytes(root->fs_info,
3764 root->fs_info->enospc_unlink = 0; 3715 &root->fs_info->trans_block_rsv,
3765 return trans; 3716 num_bytes, 5);
3766 } 3717 if (ret) {
3767 3718 btrfs_end_transaction(trans, root);
3768 path->skip_locking = 1; 3719 return ERR_PTR(ret);
3769 path->search_commit_root = 1;
3770
3771 ret = btrfs_lookup_inode(trans, root, path,
3772 &BTRFS_I(dir)->location, 0);
3773 if (ret < 0) {
3774 err = ret;
3775 goto out;
3776 }
3777 if (ret == 0) {
3778 if (check_path_shared(root, path))
3779 goto out;
3780 } else {
3781 check_link = 0;
3782 }
3783 btrfs_release_path(path);
3784
3785 ret = btrfs_lookup_inode(trans, root, path,
3786 &BTRFS_I(inode)->location, 0);
3787 if (ret < 0) {
3788 err = ret;
3789 goto out;
3790 }
3791 if (ret == 0) {
3792 if (check_path_shared(root, path))
3793 goto out;
3794 } else {
3795 check_link = 0;
3796 }
3797 btrfs_release_path(path);
3798
3799 if (ret == 0 && S_ISREG(inode->i_mode)) {
3800 ret = btrfs_lookup_file_extent(trans, root, path,
3801 ino, (u64)-1, 0);
3802 if (ret < 0) {
3803 err = ret;
3804 goto out;
3805 } 3720 }
3806 BUG_ON(ret == 0); /* Corruption */
3807 if (check_path_shared(root, path))
3808 goto out;
3809 btrfs_release_path(path);
3810 }
3811
3812 if (!check_link) {
3813 err = 0;
3814 goto out;
3815 }
3816
3817 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3818 dentry->d_name.name, dentry->d_name.len, 0);
3819 if (IS_ERR(di)) {
3820 err = PTR_ERR(di);
3821 goto out;
3822 }
3823 if (di) {
3824 if (check_path_shared(root, path))
3825 goto out;
3826 } else {
3827 err = 0;
3828 goto out;
3829 }
3830 btrfs_release_path(path);
3831
3832 ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
3833 dentry->d_name.len, ino, dir_ino, 0,
3834 &index);
3835 if (ret) {
3836 err = ret;
3837 goto out;
3838 }
3839
3840 if (check_path_shared(root, path))
3841 goto out;
3842
3843 btrfs_release_path(path);
3844
3845 /*
3846 * This is a commit root search, if we can lookup inode item and other
3847 * relative items in the commit root, it means the transaction of
3848 * dir/file creation has been committed, and the dir index item that we
3849 * delay to insert has also been inserted into the commit root. So
3850 * we needn't worry about the delayed insertion of the dir index item
3851 * here.
3852 */
3853 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
3854 dentry->d_name.name, dentry->d_name.len, 0);
3855 if (IS_ERR(di)) {
3856 err = PTR_ERR(di);
3857 goto out;
3858 }
3859 BUG_ON(ret == -ENOENT);
3860 if (check_path_shared(root, path))
3861 goto out;
3862
3863 err = 0;
3864out:
3865 btrfs_free_path(path);
3866 /* Migrate the orphan reservation over */
3867 if (!err)
3868 err = btrfs_block_rsv_migrate(trans->block_rsv,
3869 &root->fs_info->global_block_rsv,
3870 trans->bytes_reserved);
3871
3872 if (err) {
3873 btrfs_end_transaction(trans, root);
3874 root->fs_info->enospc_unlink = 0;
3875 return ERR_PTR(err);
3876 }
3877
3878 trans->block_rsv = &root->fs_info->global_block_rsv;
3879 return trans;
3880}
3881
3882static void __unlink_end_trans(struct btrfs_trans_handle *trans,
3883 struct btrfs_root *root)
3884{
3885 if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
3886 btrfs_block_rsv_release(root, trans->block_rsv,
3887 trans->bytes_reserved);
3888 trans->block_rsv = &root->fs_info->trans_block_rsv; 3721 trans->block_rsv = &root->fs_info->trans_block_rsv;
3889 BUG_ON(!root->fs_info->enospc_unlink); 3722 trans->bytes_reserved = num_bytes;
3890 root->fs_info->enospc_unlink = 0;
3891 } 3723 }
3892 btrfs_end_transaction(trans, root); 3724 return trans;
3893} 3725}
3894 3726
3895static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 3727static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -3899,7 +3731,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3899 struct inode *inode = dentry->d_inode; 3731 struct inode *inode = dentry->d_inode;
3900 int ret; 3732 int ret;
3901 3733
3902 trans = __unlink_start_trans(dir, dentry); 3734 trans = __unlink_start_trans(dir);
3903 if (IS_ERR(trans)) 3735 if (IS_ERR(trans))
3904 return PTR_ERR(trans); 3736 return PTR_ERR(trans);
3905 3737
@@ -3917,7 +3749,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3917 } 3749 }
3918 3750
3919out: 3751out:
3920 __unlink_end_trans(trans, root); 3752 btrfs_end_transaction(trans, root);
3921 btrfs_btree_balance_dirty(root); 3753 btrfs_btree_balance_dirty(root);
3922 return ret; 3754 return ret;
3923} 3755}
@@ -4014,7 +3846,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4014 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) 3846 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
4015 return -EPERM; 3847 return -EPERM;
4016 3848
4017 trans = __unlink_start_trans(dir, dentry); 3849 trans = __unlink_start_trans(dir);
4018 if (IS_ERR(trans)) 3850 if (IS_ERR(trans))
4019 return PTR_ERR(trans); 3851 return PTR_ERR(trans);
4020 3852
@@ -4036,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4036 if (!err) 3868 if (!err)
4037 btrfs_i_size_write(inode, 0); 3869 btrfs_i_size_write(inode, 0);
4038out: 3870out:
4039 __unlink_end_trans(trans, root); 3871 btrfs_end_transaction(trans, root);
4040 btrfs_btree_balance_dirty(root); 3872 btrfs_btree_balance_dirty(root);
4041 3873
4042 return err; 3874 return err;