aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c601
1 files changed, 273 insertions, 328 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17f3064b4a3e..021694c08181 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -42,6 +42,7 @@
42#include <linux/mount.h> 42#include <linux/mount.h>
43#include <linux/btrfs.h> 43#include <linux/btrfs.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h>
45#include "compat.h" 46#include "compat.h"
46#include "ctree.h" 47#include "ctree.h"
47#include "disk-io.h" 48#include "disk-io.h"
@@ -57,6 +58,7 @@
57#include "free-space-cache.h" 58#include "free-space-cache.h"
58#include "inode-map.h" 59#include "inode-map.h"
59#include "backref.h" 60#include "backref.h"
61#include "hash.h"
60 62
61struct btrfs_iget_args { 63struct btrfs_iget_args {
62 u64 ino; 64 u64 ino;
@@ -701,8 +703,12 @@ retry:
701 async_extent->nr_pages = 0; 703 async_extent->nr_pages = 0;
702 async_extent->pages = NULL; 704 async_extent->pages = NULL;
703 705
704 if (ret == -ENOSPC) 706 if (ret == -ENOSPC) {
707 unlock_extent(io_tree, async_extent->start,
708 async_extent->start +
709 async_extent->ram_size - 1);
705 goto retry; 710 goto retry;
711 }
706 goto out_free; 712 goto out_free;
707 } 713 }
708 714
@@ -1529,6 +1535,46 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1529 spin_unlock(&BTRFS_I(inode)->lock); 1535 spin_unlock(&BTRFS_I(inode)->lock);
1530} 1536}
1531 1537
1538static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1539 struct inode *inode)
1540{
1541 spin_lock(&root->delalloc_lock);
1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 root->nr_delalloc_inodes++;
1548 if (root->nr_delalloc_inodes == 1) {
1549 spin_lock(&root->fs_info->delalloc_root_lock);
1550 BUG_ON(!list_empty(&root->delalloc_root));
1551 list_add_tail(&root->delalloc_root,
1552 &root->fs_info->delalloc_roots);
1553 spin_unlock(&root->fs_info->delalloc_root_lock);
1554 }
1555 }
1556 spin_unlock(&root->delalloc_lock);
1557}
1558
1559static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1560 struct inode *inode)
1561{
1562 spin_lock(&root->delalloc_lock);
1563 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1564 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1565 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1566 &BTRFS_I(inode)->runtime_flags);
1567 root->nr_delalloc_inodes--;
1568 if (!root->nr_delalloc_inodes) {
1569 spin_lock(&root->fs_info->delalloc_root_lock);
1570 BUG_ON(list_empty(&root->delalloc_root));
1571 list_del_init(&root->delalloc_root);
1572 spin_unlock(&root->fs_info->delalloc_root_lock);
1573 }
1574 }
1575 spin_unlock(&root->delalloc_lock);
1576}
1577
1532/* 1578/*
1533 * extent_io.c set_bit_hook, used to track delayed allocation 1579 * extent_io.c set_bit_hook, used to track delayed allocation
1534 * bytes in this file, and to maintain the list of inodes that 1580 * bytes in this file, and to maintain the list of inodes that
@@ -1561,16 +1607,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
1561 spin_lock(&BTRFS_I(inode)->lock); 1607 spin_lock(&BTRFS_I(inode)->lock);
1562 BTRFS_I(inode)->delalloc_bytes += len; 1608 BTRFS_I(inode)->delalloc_bytes += len;
1563 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, 1609 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1564 &BTRFS_I(inode)->runtime_flags)) { 1610 &BTRFS_I(inode)->runtime_flags))
1565 spin_lock(&root->fs_info->delalloc_lock); 1611 btrfs_add_delalloc_inodes(root, inode);
1566 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1567 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1568 &root->fs_info->delalloc_inodes);
1569 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1570 &BTRFS_I(inode)->runtime_flags);
1571 }
1572 spin_unlock(&root->fs_info->delalloc_lock);
1573 }
1574 spin_unlock(&BTRFS_I(inode)->lock); 1612 spin_unlock(&BTRFS_I(inode)->lock);
1575 } 1613 }
1576} 1614}
@@ -1604,7 +1642,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1604 btrfs_delalloc_release_metadata(inode, len); 1642 btrfs_delalloc_release_metadata(inode, len);
1605 1643
1606 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1644 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1607 && do_list) 1645 && do_list && !(state->state & EXTENT_NORESERVE))
1608 btrfs_free_reserved_data_space(inode, len); 1646 btrfs_free_reserved_data_space(inode, len);
1609 1647
1610 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, 1648 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@@ -1613,15 +1651,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1613 BTRFS_I(inode)->delalloc_bytes -= len; 1651 BTRFS_I(inode)->delalloc_bytes -= len;
1614 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1652 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1615 test_bit(BTRFS_INODE_IN_DELALLOC_LIST, 1653 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1616 &BTRFS_I(inode)->runtime_flags)) { 1654 &BTRFS_I(inode)->runtime_flags))
1617 spin_lock(&root->fs_info->delalloc_lock); 1655 btrfs_del_delalloc_inode(root, inode);
1618 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1619 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1620 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1621 &BTRFS_I(inode)->runtime_flags);
1622 }
1623 spin_unlock(&root->fs_info->delalloc_lock);
1624 }
1625 spin_unlock(&BTRFS_I(inode)->lock); 1656 spin_unlock(&BTRFS_I(inode)->lock);
1626 } 1657 }
1627} 1658}
@@ -2135,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2135 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) 2166 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2136 continue; 2167 continue;
2137 2168
2138 extent_offset = btrfs_file_extent_offset(leaf, extent); 2169 /*
2139 if (key.offset - extent_offset != offset) 2170 * 'offset' refers to the exact key.offset,
2171 * NOT the 'offset' field in btrfs_extent_data_ref, ie.
2172 * (key.offset - extent_offset).
2173 */
2174 if (key.offset != offset)
2140 continue; 2175 continue;
2141 2176
2177 extent_offset = btrfs_file_extent_offset(leaf, extent);
2142 num_bytes = btrfs_file_extent_num_bytes(leaf, extent); 2178 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2179
2143 if (extent_offset >= old->extent_offset + old->offset + 2180 if (extent_offset >= old->extent_offset + old->offset +
2144 old->len || extent_offset + num_bytes <= 2181 old->len || extent_offset + num_bytes <=
2145 old->extent_offset + old->offset) 2182 old->extent_offset + old->offset)
2146 continue; 2183 continue;
2147 2184
2185 ret = 0;
2148 break; 2186 break;
2149 } 2187 }
2150 2188
@@ -2156,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2156 2194
2157 backref->root_id = root_id; 2195 backref->root_id = root_id;
2158 backref->inum = inum; 2196 backref->inum = inum;
2159 backref->file_pos = offset + extent_offset; 2197 backref->file_pos = offset;
2160 backref->num_bytes = num_bytes; 2198 backref->num_bytes = num_bytes;
2161 backref->extent_offset = extent_offset; 2199 backref->extent_offset = extent_offset;
2162 backref->generation = btrfs_file_extent_generation(leaf, extent); 2200 backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2179,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2179 new->path = path; 2217 new->path = path;
2180 2218
2181 list_for_each_entry_safe(old, tmp, &new->head, list) { 2219 list_for_each_entry_safe(old, tmp, &new->head, list) {
2182 ret = iterate_inodes_from_logical(old->bytenr, fs_info, 2220 ret = iterate_inodes_from_logical(old->bytenr +
2221 old->extent_offset, fs_info,
2183 path, record_one_backref, 2222 path, record_one_backref,
2184 old); 2223 old);
2185 BUG_ON(ret < 0 && ret != -ENOENT); 2224 BUG_ON(ret < 0 && ret != -ENOENT);
@@ -2263,11 +2302,6 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
2263 return 0; 2302 return 0;
2264 return PTR_ERR(root); 2303 return PTR_ERR(root);
2265 } 2304 }
2266 if (btrfs_root_refs(&root->root_item) == 0) {
2267 srcu_read_unlock(&fs_info->subvol_srcu, index);
2268 /* parse ENOENT to 0 */
2269 return 0;
2270 }
2271 2305
2272 /* step 2: get inode */ 2306 /* step 2: get inode */
2273 key.objectid = backref->inum; 2307 key.objectid = backref->inum;
@@ -3215,13 +3249,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3215 /* 1 for the orphan item deletion. */ 3249 /* 1 for the orphan item deletion. */
3216 trans = btrfs_start_transaction(root, 1); 3250 trans = btrfs_start_transaction(root, 1);
3217 if (IS_ERR(trans)) { 3251 if (IS_ERR(trans)) {
3252 iput(inode);
3218 ret = PTR_ERR(trans); 3253 ret = PTR_ERR(trans);
3219 goto out; 3254 goto out;
3220 } 3255 }
3221 ret = btrfs_orphan_add(trans, inode); 3256 ret = btrfs_orphan_add(trans, inode);
3222 btrfs_end_transaction(trans, root); 3257 btrfs_end_transaction(trans, root);
3223 if (ret) 3258 if (ret) {
3259 iput(inode);
3224 goto out; 3260 goto out;
3261 }
3225 3262
3226 ret = btrfs_truncate(inode); 3263 ret = btrfs_truncate(inode);
3227 if (ret) 3264 if (ret)
@@ -3274,8 +3311,17 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3274{ 3311{
3275 u32 nritems = btrfs_header_nritems(leaf); 3312 u32 nritems = btrfs_header_nritems(leaf);
3276 struct btrfs_key found_key; 3313 struct btrfs_key found_key;
3314 static u64 xattr_access = 0;
3315 static u64 xattr_default = 0;
3277 int scanned = 0; 3316 int scanned = 0;
3278 3317
3318 if (!xattr_access) {
3319 xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
3320 strlen(POSIX_ACL_XATTR_ACCESS));
3321 xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
3322 strlen(POSIX_ACL_XATTR_DEFAULT));
3323 }
3324
3279 slot++; 3325 slot++;
3280 while (slot < nritems) { 3326 while (slot < nritems) {
3281 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3327 btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -3285,8 +3331,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3285 return 0; 3331 return 0;
3286 3332
3287 /* we found an xattr, assume we've got an acl */ 3333 /* we found an xattr, assume we've got an acl */
3288 if (found_key.type == BTRFS_XATTR_ITEM_KEY) 3334 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3289 return 1; 3335 if (found_key.offset == xattr_access ||
3336 found_key.offset == xattr_default)
3337 return 1;
3338 }
3290 3339
3291 /* 3340 /*
3292 * we found a key greater than an xattr key, there can't 3341 * we found a key greater than an xattr key, there can't
@@ -3660,53 +3709,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3660 } 3709 }
3661 return ret; 3710 return ret;
3662} 3711}
3663
3664
3665/* helper to check if there is any shared block in the path */
3666static int check_path_shared(struct btrfs_root *root,
3667 struct btrfs_path *path)
3668{
3669 struct extent_buffer *eb;
3670 int level;
3671 u64 refs = 1;
3672
3673 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
3674 int ret;
3675
3676 if (!path->nodes[level])
3677 break;
3678 eb = path->nodes[level];
3679 if (!btrfs_block_can_be_shared(root, eb))
3680 continue;
3681 ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
3682 &refs, NULL);
3683 if (refs > 1)
3684 return 1;
3685 }
3686 return 0;
3687}
3688 3712
3689/* 3713/*
3690 * helper to start transaction for unlink and rmdir. 3714 * helper to start transaction for unlink and rmdir.
3691 * 3715 *
3692 * unlink and rmdir are special in btrfs, they do not always free space. 3716 * unlink and rmdir are special in btrfs, they do not always free space, so
3693 * so in enospc case, we should make sure they will free space before 3717 * if we cannot make our reservations the normal way try and see if there is
3694 * allowing them to use the global metadata reservation. 3718 * plenty of slack room in the global reserve to migrate, otherwise we cannot
3719 * allow the unlink to occur.
3695 */ 3720 */
3696static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, 3721static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
3697 struct dentry *dentry)
3698{ 3722{
3699 struct btrfs_trans_handle *trans; 3723 struct btrfs_trans_handle *trans;
3700 struct btrfs_root *root = BTRFS_I(dir)->root; 3724 struct btrfs_root *root = BTRFS_I(dir)->root;
3701 struct btrfs_path *path;
3702 struct btrfs_dir_item *di;
3703 struct inode *inode = dentry->d_inode;
3704 u64 index;
3705 int check_link = 1;
3706 int err = -ENOSPC;
3707 int ret; 3725 int ret;
3708 u64 ino = btrfs_ino(inode);
3709 u64 dir_ino = btrfs_ino(dir);
3710 3726
3711 /* 3727 /*
3712 * 1 for the possible orphan item 3728 * 1 for the possible orphan item
@@ -3719,158 +3735,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3719 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3735 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3720 return trans; 3736 return trans;
3721 3737
3722 if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 3738 if (PTR_ERR(trans) == -ENOSPC) {
3723 return ERR_PTR(-ENOSPC); 3739 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3724 3740
3725 /* check if there is someone else holds reference */ 3741 trans = btrfs_start_transaction(root, 0);
3726 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) 3742 if (IS_ERR(trans))
3727 return ERR_PTR(-ENOSPC); 3743 return trans;
3728 3744 ret = btrfs_cond_migrate_bytes(root->fs_info,
3729 if (atomic_read(&inode->i_count) > 2) 3745 &root->fs_info->trans_block_rsv,
3730 return ERR_PTR(-ENOSPC); 3746 num_bytes, 5);
3731 3747 if (ret) {
3732 if (xchg(&root->fs_info->enospc_unlink, 1)) 3748 btrfs_end_transaction(trans, root);
3733 return ERR_PTR(-ENOSPC); 3749 return ERR_PTR(ret);
3734
3735 path = btrfs_alloc_path();
3736 if (!path) {
3737 root->fs_info->enospc_unlink = 0;
3738 return ERR_PTR(-ENOMEM);
3739 }
3740
3741 /* 1 for the orphan item */
3742 trans = btrfs_start_transaction(root, 1);
3743 if (IS_ERR(trans)) {
3744 btrfs_free_path(path);
3745 root->fs_info->enospc_unlink = 0;
3746 return trans;
3747 }
3748
3749 path->skip_locking = 1;
3750 path->search_commit_root = 1;
3751
3752 ret = btrfs_lookup_inode(trans, root, path,
3753 &BTRFS_I(dir)->location, 0);
3754 if (ret < 0) {
3755 err = ret;
3756 goto out;
3757 }
3758 if (ret == 0) {
3759 if (check_path_shared(root, path))
3760 goto out;
3761 } else {
3762 check_link = 0;
3763 }
3764 btrfs_release_path(path);
3765
3766 ret = btrfs_lookup_inode(trans, root, path,
3767 &BTRFS_I(inode)->location, 0);
3768 if (ret < 0) {
3769 err = ret;
3770 goto out;
3771 }
3772 if (ret == 0) {
3773 if (check_path_shared(root, path))
3774 goto out;
3775 } else {
3776 check_link = 0;
3777 }
3778 btrfs_release_path(path);
3779
3780 if (ret == 0 && S_ISREG(inode->i_mode)) {
3781 ret = btrfs_lookup_file_extent(trans, root, path,
3782 ino, (u64)-1, 0);
3783 if (ret < 0) {
3784 err = ret;
3785 goto out;
3786 } 3750 }
3787 BUG_ON(ret == 0); /* Corruption */
3788 if (check_path_shared(root, path))
3789 goto out;
3790 btrfs_release_path(path);
3791 }
3792
3793 if (!check_link) {
3794 err = 0;
3795 goto out;
3796 }
3797
3798 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3799 dentry->d_name.name, dentry->d_name.len, 0);
3800 if (IS_ERR(di)) {
3801 err = PTR_ERR(di);
3802 goto out;
3803 }
3804 if (di) {
3805 if (check_path_shared(root, path))
3806 goto out;
3807 } else {
3808 err = 0;
3809 goto out;
3810 }
3811 btrfs_release_path(path);
3812
3813 ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
3814 dentry->d_name.len, ino, dir_ino, 0,
3815 &index);
3816 if (ret) {
3817 err = ret;
3818 goto out;
3819 }
3820
3821 if (check_path_shared(root, path))
3822 goto out;
3823
3824 btrfs_release_path(path);
3825
3826 /*
3827 * This is a commit root search, if we can lookup inode item and other
3828 * relative items in the commit root, it means the transaction of
3829 * dir/file creation has been committed, and the dir index item that we
3830 * delay to insert has also been inserted into the commit root. So
3831 * we needn't worry about the delayed insertion of the dir index item
3832 * here.
3833 */
3834 di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
3835 dentry->d_name.name, dentry->d_name.len, 0);
3836 if (IS_ERR(di)) {
3837 err = PTR_ERR(di);
3838 goto out;
3839 }
3840 BUG_ON(ret == -ENOENT);
3841 if (check_path_shared(root, path))
3842 goto out;
3843
3844 err = 0;
3845out:
3846 btrfs_free_path(path);
3847 /* Migrate the orphan reservation over */
3848 if (!err)
3849 err = btrfs_block_rsv_migrate(trans->block_rsv,
3850 &root->fs_info->global_block_rsv,
3851 trans->bytes_reserved);
3852
3853 if (err) {
3854 btrfs_end_transaction(trans, root);
3855 root->fs_info->enospc_unlink = 0;
3856 return ERR_PTR(err);
3857 }
3858
3859 trans->block_rsv = &root->fs_info->global_block_rsv;
3860 return trans;
3861}
3862
3863static void __unlink_end_trans(struct btrfs_trans_handle *trans,
3864 struct btrfs_root *root)
3865{
3866 if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) {
3867 btrfs_block_rsv_release(root, trans->block_rsv,
3868 trans->bytes_reserved);
3869 trans->block_rsv = &root->fs_info->trans_block_rsv; 3751 trans->block_rsv = &root->fs_info->trans_block_rsv;
3870 BUG_ON(!root->fs_info->enospc_unlink); 3752 trans->bytes_reserved = num_bytes;
3871 root->fs_info->enospc_unlink = 0;
3872 } 3753 }
3873 btrfs_end_transaction(trans, root); 3754 return trans;
3874} 3755}
3875 3756
3876static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 3757static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -3880,7 +3761,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3880 struct inode *inode = dentry->d_inode; 3761 struct inode *inode = dentry->d_inode;
3881 int ret; 3762 int ret;
3882 3763
3883 trans = __unlink_start_trans(dir, dentry); 3764 trans = __unlink_start_trans(dir);
3884 if (IS_ERR(trans)) 3765 if (IS_ERR(trans))
3885 return PTR_ERR(trans); 3766 return PTR_ERR(trans);
3886 3767
@@ -3898,7 +3779,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3898 } 3779 }
3899 3780
3900out: 3781out:
3901 __unlink_end_trans(trans, root); 3782 btrfs_end_transaction(trans, root);
3902 btrfs_btree_balance_dirty(root); 3783 btrfs_btree_balance_dirty(root);
3903 return ret; 3784 return ret;
3904} 3785}
@@ -3995,7 +3876,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3995 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) 3876 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
3996 return -EPERM; 3877 return -EPERM;
3997 3878
3998 trans = __unlink_start_trans(dir, dentry); 3879 trans = __unlink_start_trans(dir);
3999 if (IS_ERR(trans)) 3880 if (IS_ERR(trans))
4000 return PTR_ERR(trans); 3881 return PTR_ERR(trans);
4001 3882
@@ -4017,7 +3898,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4017 if (!err) 3898 if (!err)
4018 btrfs_i_size_write(inode, 0); 3899 btrfs_i_size_write(inode, 0);
4019out: 3900out:
4020 __unlink_end_trans(trans, root); 3901 btrfs_end_transaction(trans, root);
4021 btrfs_btree_balance_dirty(root); 3902 btrfs_btree_balance_dirty(root);
4022 3903
4023 return err; 3904 return err;
@@ -4395,6 +4276,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4395 u64 hole_size; 4276 u64 hole_size;
4396 int err = 0; 4277 int err = 0;
4397 4278
4279 /*
4280 * If our size started in the middle of a page we need to zero out the
4281 * rest of the page before we expand the i_size, otherwise we could
4282 * expose stale data.
4283 */
4284 err = btrfs_truncate_page(inode, oldsize, 0, 0);
4285 if (err)
4286 return err;
4287
4398 if (size <= hole_start) 4288 if (size <= hole_start)
4399 return 0; 4289 return 0;
4400 4290
@@ -4509,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4509 int mask = attr->ia_valid; 4399 int mask = attr->ia_valid;
4510 int ret; 4400 int ret;
4511 4401
4512 if (newsize == oldsize)
4513 return 0;
4514
4515 /* 4402 /*
4516 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a 4403 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
4517 * special case where we need to update the times despite not having 4404 * special case where we need to update the times despite not having
@@ -4822,11 +4709,6 @@ static int fixup_tree_root_location(struct btrfs_root *root,
4822 goto out; 4709 goto out;
4823 } 4710 }
4824 4711
4825 if (btrfs_root_refs(&new_root->root_item) == 0) {
4826 err = -ENOENT;
4827 goto out;
4828 }
4829
4830 *sub_root = new_root; 4712 *sub_root = new_root;
4831 location->objectid = btrfs_root_dirid(&new_root->root_item); 4713 location->objectid = btrfs_root_dirid(&new_root->root_item);
4832 location->type = BTRFS_INODE_ITEM_KEY; 4714 location->type = BTRFS_INODE_ITEM_KEY;
@@ -5092,8 +4974,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5092 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4974 if (!(inode->i_sb->s_flags & MS_RDONLY))
5093 ret = btrfs_orphan_cleanup(sub_root); 4975 ret = btrfs_orphan_cleanup(sub_root);
5094 up_read(&root->fs_info->cleanup_work_sem); 4976 up_read(&root->fs_info->cleanup_work_sem);
5095 if (ret) 4977 if (ret) {
4978 iput(inode);
5096 inode = ERR_PTR(ret); 4979 inode = ERR_PTR(ret);
4980 }
5097 } 4981 }
5098 4982
5099 return inode; 4983 return inode;
@@ -5137,10 +5021,9 @@ unsigned char btrfs_filetype_table[] = {
5137 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 5021 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5138}; 5022};
5139 5023
5140static int btrfs_real_readdir(struct file *filp, void *dirent, 5024static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5141 filldir_t filldir)
5142{ 5025{
5143 struct inode *inode = file_inode(filp); 5026 struct inode *inode = file_inode(file);
5144 struct btrfs_root *root = BTRFS_I(inode)->root; 5027 struct btrfs_root *root = BTRFS_I(inode)->root;
5145 struct btrfs_item *item; 5028 struct btrfs_item *item;
5146 struct btrfs_dir_item *di; 5029 struct btrfs_dir_item *di;
@@ -5161,29 +5044,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
5161 char tmp_name[32]; 5044 char tmp_name[32];
5162 char *name_ptr; 5045 char *name_ptr;
5163 int name_len; 5046 int name_len;
5164 int is_curr = 0; /* filp->f_pos points to the current index? */ 5047 int is_curr = 0; /* ctx->pos points to the current index? */
5165 5048
5166 /* FIXME, use a real flag for deciding about the key type */ 5049 /* FIXME, use a real flag for deciding about the key type */
5167 if (root->fs_info->tree_root == root) 5050 if (root->fs_info->tree_root == root)
5168 key_type = BTRFS_DIR_ITEM_KEY; 5051 key_type = BTRFS_DIR_ITEM_KEY;
5169 5052
5170 /* special case for "." */ 5053 if (!dir_emit_dots(file, ctx))
5171 if (filp->f_pos == 0) { 5054 return 0;
5172 over = filldir(dirent, ".", 1, 5055
5173 filp->f_pos, btrfs_ino(inode), DT_DIR);
5174 if (over)
5175 return 0;
5176 filp->f_pos = 1;
5177 }
5178 /* special case for .., just use the back ref */
5179 if (filp->f_pos == 1) {
5180 u64 pino = parent_ino(filp->f_path.dentry);
5181 over = filldir(dirent, "..", 2,
5182 filp->f_pos, pino, DT_DIR);
5183 if (over)
5184 return 0;
5185 filp->f_pos = 2;
5186 }
5187 path = btrfs_alloc_path(); 5056 path = btrfs_alloc_path();
5188 if (!path) 5057 if (!path)
5189 return -ENOMEM; 5058 return -ENOMEM;
@@ -5197,7 +5066,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
5197 } 5066 }
5198 5067
5199 btrfs_set_key_type(&key, key_type); 5068 btrfs_set_key_type(&key, key_type);
5200 key.offset = filp->f_pos; 5069 key.offset = ctx->pos;
5201 key.objectid = btrfs_ino(inode); 5070 key.objectid = btrfs_ino(inode);
5202 5071
5203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5072 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -5223,14 +5092,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
5223 break; 5092 break;
5224 if (btrfs_key_type(&found_key) != key_type) 5093 if (btrfs_key_type(&found_key) != key_type)
5225 break; 5094 break;
5226 if (found_key.offset < filp->f_pos) 5095 if (found_key.offset < ctx->pos)
5227 goto next; 5096 goto next;
5228 if (key_type == BTRFS_DIR_INDEX_KEY && 5097 if (key_type == BTRFS_DIR_INDEX_KEY &&
5229 btrfs_should_delete_dir_index(&del_list, 5098 btrfs_should_delete_dir_index(&del_list,
5230 found_key.offset)) 5099 found_key.offset))
5231 goto next; 5100 goto next;
5232 5101
5233 filp->f_pos = found_key.offset; 5102 ctx->pos = found_key.offset;
5234 is_curr = 1; 5103 is_curr = 1;
5235 5104
5236 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 5105 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
@@ -5274,9 +5143,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
5274 over = 0; 5143 over = 0;
5275 goto skip; 5144 goto skip;
5276 } 5145 }
5277 over = filldir(dirent, name_ptr, name_len, 5146 over = !dir_emit(ctx, name_ptr, name_len,
5278 found_key.offset, location.objectid, 5147 location.objectid, d_type);
5279 d_type);
5280 5148
5281skip: 5149skip:
5282 if (name_ptr != tmp_name) 5150 if (name_ptr != tmp_name)
@@ -5295,22 +5163,38 @@ next:
5295 5163
5296 if (key_type == BTRFS_DIR_INDEX_KEY) { 5164 if (key_type == BTRFS_DIR_INDEX_KEY) {
5297 if (is_curr) 5165 if (is_curr)
5298 filp->f_pos++; 5166 ctx->pos++;
5299 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, 5167 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5300 &ins_list);
5301 if (ret) 5168 if (ret)
5302 goto nopos; 5169 goto nopos;
5303 } 5170 }
5304 5171
5305 /* Reached end of directory/root. Bump pos past the last item. */ 5172 /* Reached end of directory/root. Bump pos past the last item. */
5306 if (key_type == BTRFS_DIR_INDEX_KEY) 5173 ctx->pos++;
5307 /* 5174
5308 * 32-bit glibc will use getdents64, but then strtol - 5175 /*
5309 * so the last number we can serve is this. 5176 * Stop new entries from being returned after we return the last
5310 */ 5177 * entry.
5311 filp->f_pos = 0x7fffffff; 5178 *
5312 else 5179 * New directory entries are assigned a strictly increasing
5313 filp->f_pos++; 5180 * offset. This means that new entries created during readdir
5181 * are *guaranteed* to be seen in the future by that readdir.
5182 * This has broken buggy programs which operate on names as
5183 * they're returned by readdir. Until we re-use freed offsets
5184 * we have this hack to stop new entries from being returned
5185 * under the assumption that they'll never reach this huge
5186 * offset.
5187 *
5188 * This is being careful not to overflow 32bit loff_t unless the
5189 * last entry requires it because doing so has broken 32bit apps
5190 * in the past.
5191 */
5192 if (key_type == BTRFS_DIR_INDEX_KEY) {
5193 if (ctx->pos >= INT_MAX)
5194 ctx->pos = LLONG_MAX;
5195 else
5196 ctx->pos = INT_MAX;
5197 }
5314nopos: 5198nopos:
5315 ret = 0; 5199 ret = 0;
5316err: 5200err:
@@ -6518,10 +6402,10 @@ out:
6518 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6402 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6519 * block must be cow'd 6403 * block must be cow'd
6520 */ 6404 */
6521static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, 6405noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6522 struct inode *inode, u64 offset, u64 *len, 6406 struct inode *inode, u64 offset, u64 *len,
6523 u64 *orig_start, u64 *orig_block_len, 6407 u64 *orig_start, u64 *orig_block_len,
6524 u64 *ram_bytes) 6408 u64 *ram_bytes)
6525{ 6409{
6526 struct btrfs_path *path; 6410 struct btrfs_path *path;
6527 int ret; 6411 int ret;
@@ -6535,7 +6419,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6535 u64 num_bytes; 6419 u64 num_bytes;
6536 int slot; 6420 int slot;
6537 int found_type; 6421 int found_type;
6538 6422 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6539 path = btrfs_alloc_path(); 6423 path = btrfs_alloc_path();
6540 if (!path) 6424 if (!path)
6541 return -ENOMEM; 6425 return -ENOMEM;
@@ -6575,18 +6459,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6575 /* not a regular extent, must cow */ 6459 /* not a regular extent, must cow */
6576 goto out; 6460 goto out;
6577 } 6461 }
6462
6463 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6464 goto out;
6465
6578 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6466 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6467 if (disk_bytenr == 0)
6468 goto out;
6469
6470 if (btrfs_file_extent_compression(leaf, fi) ||
6471 btrfs_file_extent_encryption(leaf, fi) ||
6472 btrfs_file_extent_other_encoding(leaf, fi))
6473 goto out;
6474
6579 backref_offset = btrfs_file_extent_offset(leaf, fi); 6475 backref_offset = btrfs_file_extent_offset(leaf, fi);
6580 6476
6581 *orig_start = key.offset - backref_offset; 6477 if (orig_start) {
6582 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); 6478 *orig_start = key.offset - backref_offset;
6583 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6479 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6480 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6481 }
6584 6482
6585 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 6483 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6586 if (extent_end < offset + *len) {
6587 /* extent doesn't include our full range, must cow */
6588 goto out;
6589 }
6590 6484
6591 if (btrfs_extent_readonly(root, disk_bytenr)) 6485 if (btrfs_extent_readonly(root, disk_bytenr))
6592 goto out; 6486 goto out;
@@ -6830,8 +6724,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6830 if (IS_ERR(trans)) 6724 if (IS_ERR(trans))
6831 goto must_cow; 6725 goto must_cow;
6832 6726
6833 if (can_nocow_odirect(trans, inode, start, &len, &orig_start, 6727 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6834 &orig_block_len, &ram_bytes) == 1) { 6728 &orig_block_len, &ram_bytes) == 1) {
6835 if (type == BTRFS_ORDERED_PREALLOC) { 6729 if (type == BTRFS_ORDERED_PREALLOC) {
6836 free_extent_map(em); 6730 free_extent_map(em);
6837 em = create_pinned_em(inode, start, len, 6731 em = create_pinned_em(inode, start, len,
@@ -7260,7 +7154,6 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7260{ 7154{
7261 struct btrfs_root *root = BTRFS_I(inode)->root; 7155 struct btrfs_root *root = BTRFS_I(inode)->root;
7262 struct btrfs_dio_private *dip; 7156 struct btrfs_dio_private *dip;
7263 struct bio_vec *bvec = dio_bio->bi_io_vec;
7264 struct bio *io_bio; 7157 struct bio *io_bio;
7265 int skip_sum; 7158 int skip_sum;
7266 int write = rw & REQ_WRITE; 7159 int write = rw & REQ_WRITE;
@@ -7282,16 +7175,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7282 } 7175 }
7283 7176
7284 dip->private = dio_bio->bi_private; 7177 dip->private = dio_bio->bi_private;
7285 io_bio->bi_private = dio_bio->bi_private;
7286 dip->inode = inode; 7178 dip->inode = inode;
7287 dip->logical_offset = file_offset; 7179 dip->logical_offset = file_offset;
7288 7180 dip->bytes = dio_bio->bi_size;
7289 dip->bytes = 0;
7290 do {
7291 dip->bytes += bvec->bv_len;
7292 bvec++;
7293 } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
7294
7295 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; 7181 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
7296 io_bio->bi_private = dip; 7182 io_bio->bi_private = dip;
7297 dip->errors = 0; 7183 dip->errors = 0;
@@ -7390,8 +7276,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7390 atomic_inc(&inode->i_dio_count); 7276 atomic_inc(&inode->i_dio_count);
7391 smp_mb__after_atomic_inc(); 7277 smp_mb__after_atomic_inc();
7392 7278
7279 /*
7280 * The generic stuff only does filemap_write_and_wait_range, which isn't
7281 * enough if we've written compressed pages to this area, so we need to
7282 * call btrfs_wait_ordered_range to make absolutely sure that any
7283 * outstanding dirty pages are on disk.
7284 */
7285 count = iov_length(iov, nr_segs);
7286 btrfs_wait_ordered_range(inode, offset, count);
7287
7393 if (rw & WRITE) { 7288 if (rw & WRITE) {
7394 count = iov_length(iov, nr_segs);
7395 /* 7289 /*
7396 * If the write DIO is beyond the EOF, we need update 7290 * If the write DIO is beyond the EOF, we need update
7397 * the isize, but it is protected by i_mutex. So we can 7291 * the isize, but it is protected by i_mutex. So we can
@@ -7510,7 +7404,8 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
7510 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); 7404 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
7511} 7405}
7512 7406
7513static void btrfs_invalidatepage(struct page *page, unsigned long offset) 7407static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7408 unsigned int length)
7514{ 7409{
7515 struct inode *inode = page->mapping->host; 7410 struct inode *inode = page->mapping->host;
7516 struct extent_io_tree *tree; 7411 struct extent_io_tree *tree;
@@ -7710,16 +7605,12 @@ static int btrfs_truncate(struct inode *inode)
7710{ 7605{
7711 struct btrfs_root *root = BTRFS_I(inode)->root; 7606 struct btrfs_root *root = BTRFS_I(inode)->root;
7712 struct btrfs_block_rsv *rsv; 7607 struct btrfs_block_rsv *rsv;
7713 int ret; 7608 int ret = 0;
7714 int err = 0; 7609 int err = 0;
7715 struct btrfs_trans_handle *trans; 7610 struct btrfs_trans_handle *trans;
7716 u64 mask = root->sectorsize - 1; 7611 u64 mask = root->sectorsize - 1;
7717 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7612 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7718 7613
7719 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
7720 if (ret)
7721 return ret;
7722
7723 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7614 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
7724 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 7615 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
7725 7616
@@ -7977,9 +7868,9 @@ void btrfs_destroy_inode(struct inode *inode)
7977 */ 7868 */
7978 smp_mb(); 7869 smp_mb();
7979 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { 7870 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
7980 spin_lock(&root->fs_info->ordered_extent_lock); 7871 spin_lock(&root->fs_info->ordered_root_lock);
7981 list_del_init(&BTRFS_I(inode)->ordered_operations); 7872 list_del_init(&BTRFS_I(inode)->ordered_operations);
7982 spin_unlock(&root->fs_info->ordered_extent_lock); 7873 spin_unlock(&root->fs_info->ordered_root_lock);
7983 } 7874 }
7984 7875
7985 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7876 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@ -8349,7 +8240,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
8349 * some fairly slow code that needs optimization. This walks the list 8240 * some fairly slow code that needs optimization. This walks the list
8350 * of all the inodes with pending delalloc and forces them to disk. 8241 * of all the inodes with pending delalloc and forces them to disk.
8351 */ 8242 */
8352int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 8243static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8353{ 8244{
8354 struct btrfs_inode *binode; 8245 struct btrfs_inode *binode;
8355 struct inode *inode; 8246 struct inode *inode;
@@ -8358,30 +8249,23 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8358 struct list_head splice; 8249 struct list_head splice;
8359 int ret = 0; 8250 int ret = 0;
8360 8251
8361 if (root->fs_info->sb->s_flags & MS_RDONLY)
8362 return -EROFS;
8363
8364 INIT_LIST_HEAD(&works); 8252 INIT_LIST_HEAD(&works);
8365 INIT_LIST_HEAD(&splice); 8253 INIT_LIST_HEAD(&splice);
8366 8254
8367 spin_lock(&root->fs_info->delalloc_lock); 8255 spin_lock(&root->delalloc_lock);
8368 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 8256 list_splice_init(&root->delalloc_inodes, &splice);
8369 while (!list_empty(&splice)) { 8257 while (!list_empty(&splice)) {
8370 binode = list_entry(splice.next, struct btrfs_inode, 8258 binode = list_entry(splice.next, struct btrfs_inode,
8371 delalloc_inodes); 8259 delalloc_inodes);
8372 8260
8373 list_del_init(&binode->delalloc_inodes); 8261 list_move_tail(&binode->delalloc_inodes,
8374 8262 &root->delalloc_inodes);
8375 inode = igrab(&binode->vfs_inode); 8263 inode = igrab(&binode->vfs_inode);
8376 if (!inode) { 8264 if (!inode) {
8377 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, 8265 cond_resched_lock(&root->delalloc_lock);
8378 &binode->runtime_flags);
8379 continue; 8266 continue;
8380 } 8267 }
8381 8268 spin_unlock(&root->delalloc_lock);
8382 list_add_tail(&binode->delalloc_inodes,
8383 &root->fs_info->delalloc_inodes);
8384 spin_unlock(&root->fs_info->delalloc_lock);
8385 8269
8386 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 8270 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8387 if (unlikely(!work)) { 8271 if (unlikely(!work)) {
@@ -8393,16 +8277,39 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8393 &work->work); 8277 &work->work);
8394 8278
8395 cond_resched(); 8279 cond_resched();
8396 spin_lock(&root->fs_info->delalloc_lock); 8280 spin_lock(&root->delalloc_lock);
8397 } 8281 }
8398 spin_unlock(&root->fs_info->delalloc_lock); 8282 spin_unlock(&root->delalloc_lock);
8399 8283
8400 list_for_each_entry_safe(work, next, &works, list) { 8284 list_for_each_entry_safe(work, next, &works, list) {
8401 list_del_init(&work->list); 8285 list_del_init(&work->list);
8402 btrfs_wait_and_free_delalloc_work(work); 8286 btrfs_wait_and_free_delalloc_work(work);
8403 } 8287 }
8288 return 0;
8289out:
8290 list_for_each_entry_safe(work, next, &works, list) {
8291 list_del_init(&work->list);
8292 btrfs_wait_and_free_delalloc_work(work);
8293 }
8294
8295 if (!list_empty_careful(&splice)) {
8296 spin_lock(&root->delalloc_lock);
8297 list_splice_tail(&splice, &root->delalloc_inodes);
8298 spin_unlock(&root->delalloc_lock);
8299 }
8300 return ret;
8301}
8302
8303int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8304{
8305 int ret;
8306
8307 if (root->fs_info->sb->s_flags & MS_RDONLY)
8308 return -EROFS;
8404 8309
8405 /* the filemap_flush will queue IO into the worker threads, but 8310 ret = __start_delalloc_inodes(root, delay_iput);
8311 /*
8312 * the filemap_flush will queue IO into the worker threads, but
8406 * we have to make sure the IO is actually started and that 8313 * we have to make sure the IO is actually started and that
8407 * ordered extents get created before we return 8314 * ordered extents get created before we return
8408 */ 8315 */
@@ -8414,17 +8321,55 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8414 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 8321 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
8415 } 8322 }
8416 atomic_dec(&root->fs_info->async_submit_draining); 8323 atomic_dec(&root->fs_info->async_submit_draining);
8417 return 0; 8324 return ret;
8418out: 8325}
8419 list_for_each_entry_safe(work, next, &works, list) { 8326
8420 list_del_init(&work->list); 8327int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
8421 btrfs_wait_and_free_delalloc_work(work); 8328 int delay_iput)
8329{
8330 struct btrfs_root *root;
8331 struct list_head splice;
8332 int ret;
8333
8334 if (fs_info->sb->s_flags & MS_RDONLY)
8335 return -EROFS;
8336
8337 INIT_LIST_HEAD(&splice);
8338
8339 spin_lock(&fs_info->delalloc_root_lock);
8340 list_splice_init(&fs_info->delalloc_roots, &splice);
8341 while (!list_empty(&splice)) {
8342 root = list_first_entry(&splice, struct btrfs_root,
8343 delalloc_root);
8344 root = btrfs_grab_fs_root(root);
8345 BUG_ON(!root);
8346 list_move_tail(&root->delalloc_root,
8347 &fs_info->delalloc_roots);
8348 spin_unlock(&fs_info->delalloc_root_lock);
8349
8350 ret = __start_delalloc_inodes(root, delay_iput);
8351 btrfs_put_fs_root(root);
8352 if (ret)
8353 goto out;
8354
8355 spin_lock(&fs_info->delalloc_root_lock);
8422 } 8356 }
8357 spin_unlock(&fs_info->delalloc_root_lock);
8423 8358
8359 atomic_inc(&fs_info->async_submit_draining);
8360 while (atomic_read(&fs_info->nr_async_submits) ||
8361 atomic_read(&fs_info->async_delalloc_pages)) {
8362 wait_event(fs_info->async_submit_wait,
8363 (atomic_read(&fs_info->nr_async_submits) == 0 &&
8364 atomic_read(&fs_info->async_delalloc_pages) == 0));
8365 }
8366 atomic_dec(&fs_info->async_submit_draining);
8367 return 0;
8368out:
8424 if (!list_empty_careful(&splice)) { 8369 if (!list_empty_careful(&splice)) {
8425 spin_lock(&root->fs_info->delalloc_lock); 8370 spin_lock(&fs_info->delalloc_root_lock);
8426 list_splice_tail(&splice, &root->fs_info->delalloc_inodes); 8371 list_splice_tail(&splice, &fs_info->delalloc_roots);
8427 spin_unlock(&root->fs_info->delalloc_lock); 8372 spin_unlock(&fs_info->delalloc_root_lock);
8428 } 8373 }
8429 return ret; 8374 return ret;
8430} 8375}
@@ -8731,7 +8676,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
8731static const struct file_operations btrfs_dir_file_operations = { 8676static const struct file_operations btrfs_dir_file_operations = {
8732 .llseek = generic_file_llseek, 8677 .llseek = generic_file_llseek,
8733 .read = generic_read_dir, 8678 .read = generic_read_dir,
8734 .readdir = btrfs_real_readdir, 8679 .iterate = btrfs_real_readdir,
8735 .unlocked_ioctl = btrfs_ioctl, 8680 .unlocked_ioctl = btrfs_ioctl,
8736#ifdef CONFIG_COMPAT 8681#ifdef CONFIG_COMPAT
8737 .compat_ioctl = btrfs_ioctl, 8682 .compat_ioctl = btrfs_ioctl,