aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c501
1 files changed, 220 insertions, 281 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4f9d16b70d3d..6d1b93c8aafb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -42,6 +42,7 @@
42#include <linux/mount.h> 42#include <linux/mount.h>
43#include <linux/btrfs.h> 43#include <linux/btrfs.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h>
45#include "compat.h" 46#include "compat.h"
46#include "ctree.h" 47#include "ctree.h"
47#include "disk-io.h" 48#include "disk-io.h"
@@ -57,6 +58,7 @@
57#include "free-space-cache.h" 58#include "free-space-cache.h"
58#include "inode-map.h" 59#include "inode-map.h"
59#include "backref.h" 60#include "backref.h"
61#include "hash.h"
60 62
61struct btrfs_iget_args { 63struct btrfs_iget_args {
62 u64 ino; 64 u64 ino;
@@ -701,8 +703,12 @@ retry:
701 async_extent->nr_pages = 0; 703 async_extent->nr_pages = 0;
702 async_extent->pages = NULL; 704 async_extent->pages = NULL;
703 705
704 if (ret == -ENOSPC) 706 if (ret == -ENOSPC) {
707 unlock_extent(io_tree, async_extent->start,
708 async_extent->start +
709 async_extent->ram_size - 1);
705 goto retry; 710 goto retry;
711 }
706 goto out_free; 712 goto out_free;
707 } 713 }
708 714
@@ -1529,6 +1535,46 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1529 spin_unlock(&BTRFS_I(inode)->lock); 1535 spin_unlock(&BTRFS_I(inode)->lock);
1530} 1536}
1531 1537
1538static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1539 struct inode *inode)
1540{
1541 spin_lock(&root->delalloc_lock);
1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 root->nr_delalloc_inodes++;
1548 if (root->nr_delalloc_inodes == 1) {
1549 spin_lock(&root->fs_info->delalloc_root_lock);
1550 BUG_ON(!list_empty(&root->delalloc_root));
1551 list_add_tail(&root->delalloc_root,
1552 &root->fs_info->delalloc_roots);
1553 spin_unlock(&root->fs_info->delalloc_root_lock);
1554 }
1555 }
1556 spin_unlock(&root->delalloc_lock);
1557}
1558
1559static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1560 struct inode *inode)
1561{
1562 spin_lock(&root->delalloc_lock);
1563 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1564 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1565 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1566 &BTRFS_I(inode)->runtime_flags);
1567 root->nr_delalloc_inodes--;
1568 if (!root->nr_delalloc_inodes) {
1569 spin_lock(&root->fs_info->delalloc_root_lock);
1570 BUG_ON(list_empty(&root->delalloc_root));
1571 list_del_init(&root->delalloc_root);
1572 spin_unlock(&root->fs_info->delalloc_root_lock);
1573 }
1574 }
1575 spin_unlock(&root->delalloc_lock);
1576}
1577
1532/* 1578/*
1533 * extent_io.c set_bit_hook, used to track delayed allocation 1579 * extent_io.c set_bit_hook, used to track delayed allocation
1534 * bytes in this file, and to maintain the list of inodes that 1580 * bytes in this file, and to maintain the list of inodes that
@@ -1561,16 +1607,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
1561 spin_lock(&BTRFS_I(inode)->lock); 1607 spin_lock(&BTRFS_I(inode)->lock);
1562 BTRFS_I(inode)->delalloc_bytes += len; 1608 BTRFS_I(inode)->delalloc_bytes += len;
1563 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, 1609 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1564 &BTRFS_I(inode)->runtime_flags)) { 1610 &BTRFS_I(inode)->runtime_flags))
1565 spin_lock(&root->fs_info->delalloc_lock); 1611 btrfs_add_delalloc_inodes(root, inode);
1566 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1567 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1568 &root->fs_info->delalloc_inodes);
1569 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1570 &BTRFS_I(inode)->runtime_flags);
1571 }
1572 spin_unlock(&root->fs_info->delalloc_lock);
1573 }
1574 spin_unlock(&BTRFS_I(inode)->lock); 1612 spin_unlock(&BTRFS_I(inode)->lock);
1575 } 1613 }
1576} 1614}
@@ -1604,7 +1642,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1604 btrfs_delalloc_release_metadata(inode, len); 1642 btrfs_delalloc_release_metadata(inode, len);
1605 1643
1606 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1644 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1607 && do_list) 1645 && do_list && !(state->state & EXTENT_NORESERVE))
1608 btrfs_free_reserved_data_space(inode, len); 1646 btrfs_free_reserved_data_space(inode, len);
1609 1647
1610 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, 1648 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@@ -1613,15 +1651,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1613 BTRFS_I(inode)->delalloc_bytes -= len; 1651 BTRFS_I(inode)->delalloc_bytes -= len;
1614 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1652 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1615 test_bit(BTRFS_INODE_IN_DELALLOC_LIST, 1653 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1616 &BTRFS_I(inode)->runtime_flags)) { 1654 &BTRFS_I(inode)->runtime_flags))
1617 spin_lock(&root->fs_info->delalloc_lock); 1655 btrfs_del_delalloc_inode(root, inode);
1618 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1619 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1620 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1621 &BTRFS_I(inode)->runtime_flags);
1622 }
1623 spin_unlock(&root->fs_info->delalloc_lock);
1624 }
1625 spin_unlock(&BTRFS_I(inode)->lock); 1656 spin_unlock(&BTRFS_I(inode)->lock);
1626 } 1657 }
1627} 1658}
@@ -2263,11 +2294,6 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
2263 return 0; 2294 return 0;
2264 return PTR_ERR(root); 2295 return PTR_ERR(root);
2265 } 2296 }
2266 if (btrfs_root_refs(&root->root_item) == 0) {
2267 srcu_read_unlock(&fs_info->subvol_srcu, index);
2268 /* parse ENOENT to 0 */
2269 return 0;
2270 }
2271 2297
2272 /* step 2: get inode */ 2298 /* step 2: get inode */
2273 key.objectid = backref->inum; 2299 key.objectid = backref->inum;
@@ -3215,13 +3241,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3215 /* 1 for the orphan item deletion. */ 3241 /* 1 for the orphan item deletion. */
3216 trans = btrfs_start_transaction(root, 1); 3242 trans = btrfs_start_transaction(root, 1);
3217 if (IS_ERR(trans)) { 3243 if (IS_ERR(trans)) {
3244 iput(inode);
3218 ret = PTR_ERR(trans); 3245 ret = PTR_ERR(trans);
3219 goto out; 3246 goto out;
3220 } 3247 }
3221 ret = btrfs_orphan_add(trans, inode); 3248 ret = btrfs_orphan_add(trans, inode);
3222 btrfs_end_transaction(trans, root); 3249 btrfs_end_transaction(trans, root);
3223 if (ret) 3250 if (ret) {
3251 iput(inode);
3224 goto out; 3252 goto out;
3253 }
3225 3254
3226 ret = btrfs_truncate(inode); 3255 ret = btrfs_truncate(inode);
3227 if (ret) 3256 if (ret)
@@ -3274,8 +3303,17 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3274{ 3303{
3275 u32 nritems = btrfs_header_nritems(leaf); 3304 u32 nritems = btrfs_header_nritems(leaf);
3276 struct btrfs_key found_key; 3305 struct btrfs_key found_key;
3306 static u64 xattr_access = 0;
3307 static u64 xattr_default = 0;
3277 int scanned = 0; 3308 int scanned = 0;
3278 3309
3310 if (!xattr_access) {
3311 xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
3312 strlen(POSIX_ACL_XATTR_ACCESS));
3313 xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
3314 strlen(POSIX_ACL_XATTR_DEFAULT));
3315 }
3316
3279 slot++; 3317 slot++;
3280 while (slot < nritems) { 3318 while (slot < nritems) {
3281 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3319 btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -3285,8 +3323,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3285 return 0; 3323 return 0;
3286 3324
3287 /* we found an xattr, assume we've got an acl */ 3325 /* we found an xattr, assume we've got an acl */
3288 if (found_key.type == BTRFS_XATTR_ITEM_KEY) 3326 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3289 return 1; 3327 if (found_key.offset == xattr_access ||
3328 found_key.offset == xattr_default)
3329 return 1;
3330 }
3290 3331
3291 /* 3332 /*
3292 * we found a key greater than an xattr key, there can't 3333 * we found a key greater than an xattr key, there can't
@@ -3660,53 +3701,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3660 } 3701 }
3661 return ret; 3702 return ret;
3662} 3703}
3663
3664
3665/* helper to check if there is any shared block in the path */
3666static int check_path_shared(struct btrfs_root *root,
3667 struct btrfs_path *path)
3668{
3669 struct extent_buffer *eb;
3670 int level;
3671 u64 refs = 1;
3672
3673 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
3674 int ret;
3675
3676 if (!path->nodes[level])
3677 break;
3678 eb = path->nodes[level];
3679 if (!btrfs_block_can_be_shared(root, eb))
3680 continue;
3681 ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
3682 &refs, NULL);
3683 if (refs > 1)
3684 return 1;
3685 }
3686 return 0;
3687}
3688 3704
3689/* 3705/*
3690 * helper to start transaction for unlink and rmdir. 3706 * helper to start transaction for unlink and rmdir.
3691 * 3707 *
3692 * unlink and rmdir are special in btrfs, they do not always free space. 3708 * unlink and rmdir are special in btrfs, they do not always free space, so
3693 * so in enospc case, we should make sure they will free space before 3709 * if we cannot make our reservations the normal way try and see if there is
3694 * allowing them to use the global metadata reservation. 3710 * plenty of slack room in the global reserve to migrate, otherwise we cannot
3711 * allow the unlink to occur.
3695 */ 3712 */
3696static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, 3713static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
3697 struct dentry *dentry)
3698{ 3714{
3699 struct btrfs_trans_handle *trans; 3715 struct btrfs_trans_handle *trans;
3700 struct btrfs_root *root = BTRFS_I(dir)->root; 3716 struct btrfs_root *root = BTRFS_I(dir)->root;
3701 struct btrfs_path *path;
3702 struct btrfs_dir_item *di;
3703 struct inode *inode = dentry->d_inode;
3704 u64 index;
3705 int check_link = 1;
3706 int err = -ENOSPC;
3707 int ret; 3717 int ret;
3708 u64 ino = btrfs_ino(inode);
3709 u64 dir_ino = btrfs_ino(dir);
3710 3718
3711 /* 3719 /*
3712 * 1 for the possible orphan item 3720 * 1 for the possible orphan item
@@ -3719,158 +3727,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3719 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3727 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3720 return trans; 3728 return trans;
3721 3729
3722 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 3730 if (PTR_ERR(trans) == -ENOSPC) {
3723 return ERR_PTR(-ENOSPC); 3731 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3724
3725 /* check if there is someone else holds reference */
3726 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
3727 return ERR_PTR(-ENOSPC);
3728
3729 if (atomic_read(&inode->i_count) > 2)
3730 return ERR_PTR(-ENOSPC);
3731
3732 if (xchg(&root->fs_info->enospc_unlink, 1))
3733 return ERR_PTR(-ENOSPC);
3734
3735 path = btrfs_alloc_path();
3736 if (!path) {
3737 root->fs_info->enospc_unlink = 0;
3738 return ERR_PTR(-ENOMEM);
3739 }
3740 3732
3741 /* 1 for the orphan item */ 3733 trans = btrfs_start_transaction(root, 0);
3742 trans = btrfs_start_transaction(root, 1); 3734 if (IS_ERR(trans))
3743 if (IS_ERR(trans)) { 3735 return trans;
3744 btrfs_free_path(path); 3736 ret = btrfs_cond_migrate_bytes(root->fs_info,
3745 root->fs_info->enospc_unlink = 0; 3737 &root->fs_info->trans_block_rsv,
3746 return trans; 3738 num_bytes, 5);
3747 } 3739 if (ret) {
3748 3740 btrfs_end_transaction(trans, root);
3749 path->skip_locking = 1; 3741 return ERR_PTR(ret);
3750 path->search_commit_root = 1;
3751
3752 ret = btrfs_lookup_inode(trans, root, path,
3753 &BTRFS_I(dir)->location, 0);
3754 if (ret < 0) {
3755 err = ret;
3756 goto out;
3757 }
3758 if (ret == 0) {
3759 if (check_path_shared(root, path))
3760 goto out;
3761 } else {
3762 check_link = 0;
3763 }
3764 btrfs_release_path(path);
3765
3766 ret = btrfs_lookup_inode(trans, root, path,
3767 &BTRFS_I(inode)->location, 0);
3768 if (ret < 0) {
3769 err = ret;
3770 goto out;
3771 }
3772 if (ret == 0) {
3773 if (check_path_shared(root, path))
3774 goto out;
3775 } else {
3776 check_link = 0;
3777 }
3778 btrfs_release_path(path);
3779
3780 if (ret == 0 && S_ISREG(inode->i_mode)) {
3781 ret = btrfs_lookup_file_extent(trans, root, path,
3782 ino, (u64)-1, 0);
3783 if (ret < 0) {
3784 err = ret;
3785 goto out;
3786 } 3742 }
3787 BUG_ON(ret == 0); /* Corruption */
3788 if (check_path_shared(root, path))
3789 goto out;
3790 btrfs_release_path(path);
3791 }
3792
3793 if (!check_link) {
3794 err = 0;
3795 goto out;
3796 }
3797
3798 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3799 dentry->d_name.name, dentry->d_name.len, 0);
3800 if (IS_ERR(di)) {
3801 err = PTR_ERR(di);
3802 goto out;
3803 }
3804 if (di) {
3805 if (check_path_shared(root, path))
3806 goto out;
3807 } else {
3808 err = 0;
3809 goto out;
3810 }
3811 btrfs_release_path(path);
3812
3813 ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
3814 dentry->d_name.len, ino, dir_ino, 0,
3815 &index);
3816 if (ret) {
3817 err = ret;
3818 goto out;
3819 }
3820
3821 if (check_path_shared(root, path))
3822 goto out;
3823
3824 btrfs_release_path(path);
3825
3826 /*
3827 * This is a commit root search, if we can lookup inode item and other
3828 * relative items in the commit root, it means the transaction of
3829 * dir/file creation has been committed, and the dir index item that we
3830 * delay to insert has also been inserted into the commit root. So
3831 * we needn't worry about the delayed insertion of the dir index item
3832 * here.
3833 */
3834 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
3835 dentry->d_name.name, dentry->d_name.len, 0);
3836 if (IS_ERR(di)) {
3837 err = PTR_ERR(di);
3838 goto out;
3839 }
3840 BUG_ON(ret == -ENOENT);
3841 if (check_path_shared(root, path))
3842 goto out;
3843
3844 err = 0;
3845out:
3846 btrfs_free_path(path);
3847 /* Migrate the orphan reservation over */
3848 if (!err)
3849 err = btrfs_block_rsv_migrate(trans->block_rsv,
3850 &root->fs_info->global_block_rsv,
3851 trans->bytes_reserved);
3852
3853 if (err) {
3854 btrfs_end_transaction(trans, root);
3855 root->fs_info->enospc_unlink = 0;
3856 return ERR_PTR(err);
3857 }
3858
3859 trans->block_rsv = &root->fs_info->global_block_rsv;
3860 return trans;
3861}
3862
3863static void __unlink_end_trans(struct btrfs_trans_handle *trans,
3864 struct btrfs_root *root)
3865{
3866 if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
3867 btrfs_block_rsv_release(root, trans->block_rsv,
3868 trans->bytes_reserved);
3869 trans->block_rsv = &root->fs_info->trans_block_rsv; 3743 trans->block_rsv = &root->fs_info->trans_block_rsv;
3870 BUG_ON(!root->fs_info->enospc_unlink); 3744 trans->bytes_reserved = num_bytes;
3871 root->fs_info->enospc_unlink = 0;
3872 } 3745 }
3873 btrfs_end_transaction(trans, root); 3746 return trans;
3874} 3747}
3875 3748
3876static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 3749static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -3880,7 +3753,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3880 struct inode *inode = dentry->d_inode; 3753 struct inode *inode = dentry->d_inode;
3881 int ret; 3754 int ret;
3882 3755
3883 trans = __unlink_start_trans(dir, dentry); 3756 trans = __unlink_start_trans(dir);
3884 if (IS_ERR(trans)) 3757 if (IS_ERR(trans))
3885 return PTR_ERR(trans); 3758 return PTR_ERR(trans);
3886 3759
@@ -3898,7 +3771,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3898 } 3771 }
3899 3772
3900out: 3773out:
3901 __unlink_end_trans(trans, root); 3774 btrfs_end_transaction(trans, root);
3902 btrfs_btree_balance_dirty(root); 3775 btrfs_btree_balance_dirty(root);
3903 return ret; 3776 return ret;
3904} 3777}
@@ -3995,7 +3868,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3995 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) 3868 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3996 return -EPERM; 3869 return -EPERM;
3997 3870
3998 trans = __unlink_start_trans(dir, dentry); 3871 trans = __unlink_start_trans(dir);
3999 if (IS_ERR(trans)) 3872 if (IS_ERR(trans))
4000 return PTR_ERR(trans); 3873 return PTR_ERR(trans);
4001 3874
@@ -4017,7 +3890,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4017 if (!err) 3890 if (!err)
4018 btrfs_i_size_write(inode, 0); 3891 btrfs_i_size_write(inode, 0);
4019out: 3892out:
4020 __unlink_end_trans(trans, root); 3893 btrfs_end_transaction(trans, root);
4021 btrfs_btree_balance_dirty(root); 3894 btrfs_btree_balance_dirty(root);
4022 3895
4023 return err; 3896 return err;
@@ -4395,6 +4268,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4395 u64 hole_size; 4268 u64 hole_size;
4396 int err = 0; 4269 int err = 0;
4397 4270
4271 /*
4272 * If our size started in the middle of a page we need to zero out the
4273 * rest of the page before we expand the i_size, otherwise we could
4274 * expose stale data.
4275 */
4276 err = btrfs_truncate_page(inode, oldsize, 0, 0);
4277 if (err)
4278 return err;
4279
4398 if (size <= hole_start) 4280 if (size <= hole_start)
4399 return 0; 4281 return 0;
4400 4282
@@ -4822,11 +4704,6 @@ static int fixup_tree_root_location(struct btrfs_root *root,
4822 goto out; 4704 goto out;
4823 } 4705 }
4824 4706
4825 if (btrfs_root_refs(&new_root->root_item) == 0) {
4826 err = -ENOENT;
4827 goto out;
4828 }
4829
4830 *sub_root = new_root; 4707 *sub_root = new_root;
4831 location->objectid = btrfs_root_dirid(&new_root->root_item); 4708 location->objectid = btrfs_root_dirid(&new_root->root_item);
4832 location->type = BTRFS_INODE_ITEM_KEY; 4709 location->type = BTRFS_INODE_ITEM_KEY;
@@ -5092,8 +4969,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5092 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4969 if (!(inode->i_sb->s_flags & MS_RDONLY))
5093 ret = btrfs_orphan_cleanup(sub_root); 4970 ret = btrfs_orphan_cleanup(sub_root);
5094 up_read(&root->fs_info->cleanup_work_sem); 4971 up_read(&root->fs_info->cleanup_work_sem);
5095 if (ret) 4972 if (ret) {
4973 iput(inode);
5096 inode = ERR_PTR(ret); 4974 inode = ERR_PTR(ret);
4975 }
5097 } 4976 }
5098 4977
5099 return inode; 4978 return inode;
@@ -6501,10 +6380,10 @@ out:
6501 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6380 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6502 * block must be cow'd 6381 * block must be cow'd
6503 */ 6382 */
6504static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, 6383noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6505 struct inode *inode, u64 offset, u64 *len, 6384 struct inode *inode, u64 offset, u64 *len,
6506 u64 *orig_start, u64 *orig_block_len, 6385 u64 *orig_start, u64 *orig_block_len,
6507 u64 *ram_bytes) 6386 u64 *ram_bytes)
6508{ 6387{
6509 struct btrfs_path *path; 6388 struct btrfs_path *path;
6510 int ret; 6389 int ret;
@@ -6518,7 +6397,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6518 u64 num_bytes; 6397 u64 num_bytes;
6519 int slot; 6398 int slot;
6520 int found_type; 6399 int found_type;
6521 6400 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6522 path = btrfs_alloc_path(); 6401 path = btrfs_alloc_path();
6523 if (!path) 6402 if (!path)
6524 return -ENOMEM; 6403 return -ENOMEM;
@@ -6558,18 +6437,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6558 /* not a regular extent, must cow */ 6437 /* not a regular extent, must cow */
6559 goto out; 6438 goto out;
6560 } 6439 }
6440
6441 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6442 goto out;
6443
6561 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6444 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6445 if (disk_bytenr == 0)
6446 goto out;
6447
6448 if (btrfs_file_extent_compression(leaf, fi) ||
6449 btrfs_file_extent_encryption(leaf, fi) ||
6450 btrfs_file_extent_other_encoding(leaf, fi))
6451 goto out;
6452
6562 backref_offset = btrfs_file_extent_offset(leaf, fi); 6453 backref_offset = btrfs_file_extent_offset(leaf, fi);
6563 6454
6564 *orig_start = key.offset - backref_offset; 6455 if (orig_start) {
6565 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); 6456 *orig_start = key.offset - backref_offset;
6566 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6457 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6458 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6459 }
6567 6460
6568 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 6461 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6569 if (extent_end < offset + *len) {
6570 /* extent doesn't include our full range, must cow */
6571 goto out;
6572 }
6573 6462
6574 if (btrfs_extent_readonly(root, disk_bytenr)) 6463 if (btrfs_extent_readonly(root, disk_bytenr))
6575 goto out; 6464 goto out;
@@ -6813,8 +6702,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6813 if (IS_ERR(trans)) 6702 if (IS_ERR(trans))
6814 goto must_cow; 6703 goto must_cow;
6815 6704
6816 if (can_nocow_odirect(trans, inode, start, &len, &orig_start, 6705 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6817 &orig_block_len, &ram_bytes) == 1) { 6706 &orig_block_len, &ram_bytes) == 1) {
6818 if (type == BTRFS_ORDERED_PREALLOC) { 6707 if (type == BTRFS_ORDERED_PREALLOC) {
6819 free_extent_map(em); 6708 free_extent_map(em);
6820 em = create_pinned_em(inode, start, len, 6709 em = create_pinned_em(inode, start, len,
@@ -7243,7 +7132,6 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7243{ 7132{
7244 struct btrfs_root *root = BTRFS_I(inode)->root; 7133 struct btrfs_root *root = BTRFS_I(inode)->root;
7245 struct btrfs_dio_private *dip; 7134 struct btrfs_dio_private *dip;
7246 struct bio_vec *bvec = dio_bio->bi_io_vec;
7247 struct bio *io_bio; 7135 struct bio *io_bio;
7248 int skip_sum; 7136 int skip_sum;
7249 int write = rw & REQ_WRITE; 7137 int write = rw & REQ_WRITE;
@@ -7265,16 +7153,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7265 } 7153 }
7266 7154
7267 dip->private = dio_bio->bi_private; 7155 dip->private = dio_bio->bi_private;
7268 io_bio->bi_private = dio_bio->bi_private;
7269 dip->inode = inode; 7156 dip->inode = inode;
7270 dip->logical_offset = file_offset; 7157 dip->logical_offset = file_offset;
7271 7158 dip->bytes = dio_bio->bi_size;
7272 dip->bytes = 0;
7273 do {
7274 dip->bytes += bvec->bv_len;
7275 bvec++;
7276 } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
7277
7278 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; 7159 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
7279 io_bio->bi_private = dip; 7160 io_bio->bi_private = dip;
7280 dip->errors = 0; 7161 dip->errors = 0;
@@ -7373,8 +7254,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7373 atomic_inc(&inode->i_dio_count); 7254 atomic_inc(&inode->i_dio_count);
7374 smp_mb__after_atomic_inc(); 7255 smp_mb__after_atomic_inc();
7375 7256
7257 /*
7258 * The generic stuff only does filemap_write_and_wait_range, which isn't
7259 * enough if we've written compressed pages to this area, so we need to
7260 * call btrfs_wait_ordered_range to make absolutely sure that any
7261 * outstanding dirty pages are on disk.
7262 */
7263 count = iov_length(iov, nr_segs);
7264 btrfs_wait_ordered_range(inode, offset, count);
7265
7376 if (rw & WRITE) { 7266 if (rw & WRITE) {
7377 count = iov_length(iov, nr_segs);
7378 /* 7267 /*
7379 * If the write DIO is beyond the EOF, we need update 7268 * If the write DIO is beyond the EOF, we need update
7380 * the isize, but it is protected by i_mutex. So we can 7269 * the isize, but it is protected by i_mutex. So we can
@@ -7694,16 +7583,12 @@ static int btrfs_truncate(struct inode *inode)
7694{ 7583{
7695 struct btrfs_root *root = BTRFS_I(inode)->root; 7584 struct btrfs_root *root = BTRFS_I(inode)->root;
7696 struct btrfs_block_rsv *rsv; 7585 struct btrfs_block_rsv *rsv;
7697 int ret; 7586 int ret = 0;
7698 int err = 0; 7587 int err = 0;
7699 struct btrfs_trans_handle *trans; 7588 struct btrfs_trans_handle *trans;
7700 u64 mask = root->sectorsize - 1; 7589 u64 mask = root->sectorsize - 1;
7701 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7590 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7702 7591
7703 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
7704 if (ret)
7705 return ret;
7706
7707 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7592 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
7708 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 7593 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
7709 7594
@@ -7961,9 +7846,9 @@ void btrfs_destroy_inode(struct inode *inode)
7961 */ 7846 */
7962 smp_mb(); 7847 smp_mb();
7963 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { 7848 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
7964 spin_lock(&root->fs_info->ordered_extent_lock); 7849 spin_lock(&root->fs_info->ordered_root_lock);
7965 list_del_init(&BTRFS_I(inode)->ordered_operations); 7850 list_del_init(&BTRFS_I(inode)->ordered_operations);
7966 spin_unlock(&root->fs_info->ordered_extent_lock); 7851 spin_unlock(&root->fs_info->ordered_root_lock);
7967 } 7852 }
7968 7853
7969 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7854 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@ -8333,7 +8218,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
8333 * some fairly slow code that needs optimization. This walks the list 8218 * some fairly slow code that needs optimization. This walks the list
8334 * of all the inodes with pending delalloc and forces them to disk. 8219 * of all the inodes with pending delalloc and forces them to disk.
8335 */ 8220 */
8336int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 8221static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8337{ 8222{
8338 struct btrfs_inode *binode; 8223 struct btrfs_inode *binode;
8339 struct inode *inode; 8224 struct inode *inode;
@@ -8342,30 +8227,23 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8342 struct list_head splice; 8227 struct list_head splice;
8343 int ret = 0; 8228 int ret = 0;
8344 8229
8345 if (root->fs_info->sb->s_flags & MS_RDONLY)
8346 return -EROFS;
8347
8348 INIT_LIST_HEAD(&works); 8230 INIT_LIST_HEAD(&works);
8349 INIT_LIST_HEAD(&splice); 8231 INIT_LIST_HEAD(&splice);
8350 8232
8351 spin_lock(&root->fs_info->delalloc_lock); 8233 spin_lock(&root->delalloc_lock);
8352 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 8234 list_splice_init(&root->delalloc_inodes, &splice);
8353 while (!list_empty(&splice)) { 8235 while (!list_empty(&splice)) {
8354 binode = list_entry(splice.next, struct btrfs_inode, 8236 binode = list_entry(splice.next, struct btrfs_inode,
8355 delalloc_inodes); 8237 delalloc_inodes);
8356 8238
8357 list_del_init(&binode->delalloc_inodes); 8239 list_move_tail(&binode->delalloc_inodes,
8358 8240 &root->delalloc_inodes);
8359 inode = igrab(&binode->vfs_inode); 8241 inode = igrab(&binode->vfs_inode);
8360 if (!inode) { 8242 if (!inode) {
8361 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 8243 cond_resched_lock(&root->delalloc_lock);
8362 &binode->runtime_flags);
8363 continue; 8244 continue;
8364 } 8245 }
8365 8246 spin_unlock(&root->delalloc_lock);
8366 list_add_tail(&binode->delalloc_inodes,
8367 &root->fs_info->delalloc_inodes);
8368 spin_unlock(&root->fs_info->delalloc_lock);
8369 8247
8370 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 8248 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8371 if (unlikely(!work)) { 8249 if (unlikely(!work)) {
@@ -8377,16 +8255,39 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8377 &work->work); 8255 &work->work);
8378 8256
8379 cond_resched(); 8257 cond_resched();
8380 spin_lock(&root->fs_info->delalloc_lock); 8258 spin_lock(&root->delalloc_lock);
8381 } 8259 }
8382 spin_unlock(&root->fs_info->delalloc_lock); 8260 spin_unlock(&root->delalloc_lock);
8383 8261
8384 list_for_each_entry_safe(work, next, &works, list) { 8262 list_for_each_entry_safe(work, next, &works, list) {
8385 list_del_init(&work->list); 8263 list_del_init(&work->list);
8386 btrfs_wait_and_free_delalloc_work(work); 8264 btrfs_wait_and_free_delalloc_work(work);
8387 } 8265 }
8266 return 0;
8267out:
8268 list_for_each_entry_safe(work, next, &works, list) {
8269 list_del_init(&work->list);
8270 btrfs_wait_and_free_delalloc_work(work);
8271 }
8272
8273 if (!list_empty_careful(&splice)) {
8274 spin_lock(&root->delalloc_lock);
8275 list_splice_tail(&splice, &root->delalloc_inodes);
8276 spin_unlock(&root->delalloc_lock);
8277 }
8278 return ret;
8279}
8280
8281int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8282{
8283 int ret;
8388 8284
8389 /* the filemap_flush will queue IO into the worker threads, but 8285 if (root->fs_info->sb->s_flags & MS_RDONLY)
8286 return -EROFS;
8287
8288 ret = __start_delalloc_inodes(root, delay_iput);
8289 /*
8290 * the filemap_flush will queue IO into the worker threads, but
8390 * we have to make sure the IO is actually started and that 8291 * we have to make sure the IO is actually started and that
8391 * ordered extents get created before we return 8292 * ordered extents get created before we return
8392 */ 8293 */
@@ -8398,17 +8299,55 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8398 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 8299 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
8399 } 8300 }
8400 atomic_dec(&root->fs_info->async_submit_draining); 8301 atomic_dec(&root->fs_info->async_submit_draining);
8401 return 0; 8302 return ret;
8402out: 8303}
8403 list_for_each_entry_safe(work, next, &works, list) { 8304
8404 list_del_init(&work->list); 8305int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
8405 btrfs_wait_and_free_delalloc_work(work); 8306 int delay_iput)
8307{
8308 struct btrfs_root *root;
8309 struct list_head splice;
8310 int ret;
8311
8312 if (fs_info->sb->s_flags & MS_RDONLY)
8313 return -EROFS;
8314
8315 INIT_LIST_HEAD(&splice);
8316
8317 spin_lock(&fs_info->delalloc_root_lock);
8318 list_splice_init(&fs_info->delalloc_roots, &splice);
8319 while (!list_empty(&splice)) {
8320 root = list_first_entry(&splice, struct btrfs_root,
8321 delalloc_root);
8322 root = btrfs_grab_fs_root(root);
8323 BUG_ON(!root);
8324 list_move_tail(&root->delalloc_root,
8325 &fs_info->delalloc_roots);
8326 spin_unlock(&fs_info->delalloc_root_lock);
8327
8328 ret = __start_delalloc_inodes(root, delay_iput);
8329 btrfs_put_fs_root(root);
8330 if (ret)
8331 goto out;
8332
8333 spin_lock(&fs_info->delalloc_root_lock);
8406 } 8334 }
8335 spin_unlock(&fs_info->delalloc_root_lock);
8407 8336
8337 atomic_inc(&fs_info->async_submit_draining);
8338 while (atomic_read(&fs_info->nr_async_submits) ||
8339 atomic_read(&fs_info->async_delalloc_pages)) {
8340 wait_event(fs_info->async_submit_wait,
8341 (atomic_read(&fs_info->nr_async_submits) == 0 &&
8342 atomic_read(&fs_info->async_delalloc_pages) == 0));
8343 }
8344 atomic_dec(&fs_info->async_submit_draining);
8345 return 0;
8346out:
8408 if (!list_empty_careful(&splice)) { 8347 if (!list_empty_careful(&splice)) {
8409 spin_lock(&root->fs_info->delalloc_lock); 8348 spin_lock(&fs_info->delalloc_root_lock);
8410 list_splice_tail(&splice, &root->fs_info->delalloc_inodes); 8349 list_splice_tail(&splice, &fs_info->delalloc_roots);
8411 spin_unlock(&root->fs_info->delalloc_lock); 8350 spin_unlock(&fs_info->delalloc_root_lock);
8412 } 8351 }
8413 return ret; 8352 return ret;
8414} 8353}