Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason: "This has our merge window series of cleanups and fixes. These target a wide range of issues, but do include some important fixes for qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a few definitions to make them easier for userland to consume. Also whiteout support is included now that issues with overlayfs have been cleared up. I have one more fix pending for page faults during btrfs_copy_from_user, but I wanted to get this bulk out the door first" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits) btrfs: fix memory leak during RAID 5/6 device replacement Btrfs: add semaphore to synchronize direct IO writes with fsync Btrfs: fix race between block group relocation and nocow writes Btrfs: fix race between fsync and direct IO writes for prealloc extents Btrfs: fix number of transaction units for renames with whiteout Btrfs: pin logs earlier when doing a rename exchange operation Btrfs: unpin logs if rename exchange operation fails Btrfs: fix inode leak on failure to setup whiteout inode in rename btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT Btrfs: pin log earlier when renaming Btrfs: unpin log if rename operation fails Btrfs: don't do unnecessary delalloc flushes when relocating Btrfs: don't wait for unrelated IO to finish before relocation Btrfs: fix empty symlink after creating symlink and fsync parent dir Btrfs: fix for incorrect directory entries after fsync log replay btrfs: build fixup for qgroup_account_snapshot btrfs: qgroup: Fix qgroup accounting when creating snapshot Btrfs: fix fspath error deallocation btrfs: make find_workspace warn if there are no workspaces btrfs: make find_workspace always succeed ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-21 13:49:22 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-21 13:49:22 -0400
commit: 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch)
tree: e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/inode.c
parent: 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff)
parent: c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff)
1 files changed, 400 insertions, 66 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6b7fe291a174..91419ef79b00 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -824,6 +824,7 @@ retry:
                                                async_extent->ram_size - 1, 0);
                        goto out_free_reserve;
                }
+                btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
                /*
                 * clear dirty, set writeback and unlock the pages.
@@ -861,6 +862,7 @@ retry:
        }
        return;
 out_free_reserve:
+        btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
 out_free:
        extent_clear_unlock_delalloc(inode, async_extent->start,
@@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode,
                                goto out_drop_extent_cache;
                }
+                btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
                if (disk_num_bytes < cur_alloc_size)
                        break;
@@ -1066,6 +1070,7 @@ out:
 out_drop_extent_cache:
        btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
 out_reserve:
+        btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
 out_unlock:
        extent_clear_unlock_delalloc(inode, start, end, locked_page,
@@ -1377,6 +1382,9 @@ next_slot:
                         */
                        if (csum_exist_in_range(root, disk_bytenr, num_bytes))
                                goto out_check;
+                        if (!btrfs_inc_nocow_writers(root->fs_info,
+                                                     disk_bytenr))
+                                goto out_check;
                        nocow = 1;
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                        extent_end = found_key.offset +
@@ -1391,6 +1399,9 @@ out_check:
                        path->slots[0]++;
                        if (!nolock && nocow)
                                btrfs_end_write_no_snapshoting(root);
+                        if (nocow)
+                                btrfs_dec_nocow_writers(root->fs_info,
+                                                        disk_bytenr);
                        goto next_slot;
                }
                if (!nocow) {
@@ -1411,6 +1422,9 @@ out_check:
                        if (ret) {
                                if (!nolock && nocow)
                                        btrfs_end_write_no_snapshoting(root);
+                                if (nocow)
+                                        btrfs_dec_nocow_writers(root->fs_info,
+                                                                disk_bytenr);
                                goto error;
                        }
                        cow_start = (u64)-1;
@@ -1453,6 +1467,8 @@ out_check:
                ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
                                               num_bytes, num_bytes, type);
+                if (nocow)
+                        btrfs_dec_nocow_writers(root->fs_info, disk_bytenr);
                BUG_ON(ret); /* -ENOMEM */
                if (root->root_key.objectid ==
@@ -7129,6 +7145,43 @@ out:
        return em;
 }
+static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
+                                                  const u64 start,
+                                                  const u64 len,
+                                                  const u64 orig_start,
+                                                  const u64 block_start,
+                                                  const u64 block_len,
+                                                  const u64 orig_block_len,
+                                                  const u64 ram_bytes,
+                                                  const int type)
+{
+        struct extent_map *em = NULL;
+        int ret;
+        down_read(&BTRFS_I(inode)->dio_sem);
+        if (type != BTRFS_ORDERED_NOCOW) {
+                em = create_pinned_em(inode, start, len, orig_start,
+                                      block_start, block_len, orig_block_len,
+                                      ram_bytes, type);
+                if (IS_ERR(em))
+                        goto out;
+        }
+        ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
+                                           len, block_len, type);
+        if (ret) {
+                if (em) {
+                        free_extent_map(em);
+                        btrfs_drop_extent_cache(inode, start,
+                                                start + len - 1, 0);
+                }
+                em = ERR_PTR(ret);
+        }
+ out:
+        up_read(&BTRFS_I(inode)->dio_sem);
+        return em;
+}
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
@@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        if (ret)
                return ERR_PTR(ret);
-        /*
+        em = btrfs_create_dio_extent(inode, start, ins.offset, start,
-         * Create the ordered extent before the extent map. This is to avoid
+                                     ins.objectid, ins.offset, ins.offset,
-         * races with the fast fsync path that would lead to it logging file
+                                     ins.offset, 0);
-         * extent items that point to disk extents that were not yet written to.
+        btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
-         * The fast fsync path collects ordered extents into a local list and
+        if (IS_ERR(em))
-         * then collects all the new extent maps, so we must create the ordered
-         * extent first and make sure the fast fsync path collects any new
-         * ordered extents after collecting new extent maps as well.
-         * The fsync path simply can not rely on inode_dio_wait() because it
-         * causes deadlock with AIO.
-         */
-        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
-                                           ins.offset, ins.offset, 0);
-        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-                return ERR_PTR(ret);
-        }
-        em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-                              ins.offset, ins.offset, ins.offset, 0);
-        if (IS_ERR(em)) {
-                struct btrfs_ordered_extent *oe;
-                btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-                oe = btrfs_lookup_ordered_extent(inode, start);
-                ASSERT(oe);
-                if (WARN_ON(!oe))
-                        return em;
-                set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
-                set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
-                btrfs_remove_ordered_extent(inode, oe);
-                /* Once for our lookup and once for the ordered extents tree. */
-                btrfs_put_ordered_extent(oe);
-                btrfs_put_ordered_extent(oe);
-        }
        return em;
 }
@@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                block_start = em->block_start + (start - em->start);
                if (can_nocow_extent(inode, start, &len, &orig_start,
-                                     &orig_block_len, &ram_bytes) == 1) {
+                                     &orig_block_len, &ram_bytes) == 1 &&
+                    btrfs_inc_nocow_writers(root->fs_info, block_start)) {
+                        struct extent_map *em2;
+                        em2 = btrfs_create_dio_extent(inode, start, len,
+                                                      orig_start, block_start,
+                                                      len, orig_block_len,
+                                                      ram_bytes, type);
+                        btrfs_dec_nocow_writers(root->fs_info, block_start);
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
-                                em = create_pinned_em(inode, start, len,
+                                em = em2;
-                                                       orig_start,
-                                                       block_start, len,
-                                                       orig_block_len,
-                                                       ram_bytes, type);
-                                if (IS_ERR(em)) {
-                                        ret = PTR_ERR(em);
-                                        goto unlock_err;
-                                }
                        }
+                        if (em2 && IS_ERR(em2)) {
-                        ret = btrfs_add_ordered_extent_dio(inode, start,
+                                ret = PTR_ERR(em2);
-                                           block_start, len, len, type);
-                        if (ret) {
-                                free_extent_map(em);
                                goto unlock_err;
                        }
                        goto unlock;
@@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&ei->delalloc_inodes);
        INIT_LIST_HEAD(&ei->delayed_iput);
        RB_CLEAR_NODE(&ei->rb_node);
+        init_rwsem(&ei->dio_sem);
        return inode;
 }
@@ -9387,10 +9410,281 @@ static int btrfs_getattr(struct vfsmount *mnt,
        return 0;
 }
+static int btrfs_rename_exchange(struct inode *old_dir,
+                              struct dentry *old_dentry,
+                              struct inode *new_dir,
+                              struct dentry *new_dentry)
+{
+        struct btrfs_trans_handle *trans;
+        struct btrfs_root *root = BTRFS_I(old_dir)->root;
+        struct btrfs_root *dest = BTRFS_I(new_dir)->root;
+        struct inode *new_inode = new_dentry->d_inode;
+        struct inode *old_inode = old_dentry->d_inode;
+        struct timespec ctime = CURRENT_TIME;
+        struct dentry *parent;
+        u64 old_ino = btrfs_ino(old_inode);
+        u64 new_ino = btrfs_ino(new_inode);
+        u64 old_idx = 0;
+        u64 new_idx = 0;
+        u64 root_objectid;
+        int ret;
+        bool root_log_pinned = false;
+        bool dest_log_pinned = false;
+        /* we only allow rename subvolume link between subvolumes */
+        if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+                return -EXDEV;
+        /* close the race window with snapshot create/destroy ioctl */
+        if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+                down_read(&root->fs_info->subvol_sem);
+        if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+                down_read(&dest->fs_info->subvol_sem);
+        /*
+         * We want to reserve the absolute worst case amount of items.  So if
+         * both inodes are subvols and we need to unlink them then that would
+         * require 4 item modifications, but if they are both normal inodes it
+         * would require 5 item modifications, so we'll assume their normal
+         * inodes.  So 5 * 2 is 10, plus 2 for the new links, so 12 total items
+         * should cover the worst case number of items we'll modify.
+         */
+        trans = btrfs_start_transaction(root, 12);
+        if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                goto out_notrans;
+        }
+        /*
+         * We need to find a free sequence number both in the source and
+         * in the destination directory for the exchange.
+         */
+        ret = btrfs_set_inode_index(new_dir, &old_idx);
+        if (ret)
+                goto out_fail;
+        ret = btrfs_set_inode_index(old_dir, &new_idx);
+        if (ret)
+                goto out_fail;
+        BTRFS_I(old_inode)->dir_index = 0ULL;
+        BTRFS_I(new_inode)->dir_index = 0ULL;
+        /* Reference for the source. */
+        if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
+                /* force full log commit if subvolume involved. */
+                btrfs_set_log_full_commit(root->fs_info, trans);
+        } else {
+                btrfs_pin_log_trans(root);
+                root_log_pinned = true;
+                ret = btrfs_insert_inode_ref(trans, dest,
+                                             new_dentry->d_name.name,
+                                             new_dentry->d_name.len,
+                                             old_ino,
+                                             btrfs_ino(new_dir), old_idx);
+                if (ret)
+                        goto out_fail;
+        }
+        /* And now for the dest. */
+        if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
+                /* force full log commit if subvolume involved. */
+                btrfs_set_log_full_commit(dest->fs_info, trans);
+        } else {
+                btrfs_pin_log_trans(dest);
+                dest_log_pinned = true;
+                ret = btrfs_insert_inode_ref(trans, root,
+                                             old_dentry->d_name.name,
+                                             old_dentry->d_name.len,
+                                             new_ino,
+                                             btrfs_ino(old_dir), new_idx);
+                if (ret)
+                        goto out_fail;
+        }
+        /* Update inode version and ctime/mtime. */
+        inode_inc_iversion(old_dir);
+        inode_inc_iversion(new_dir);
+        inode_inc_iversion(old_inode);
+        inode_inc_iversion(new_inode);
+        old_dir->i_ctime = old_dir->i_mtime = ctime;
+        new_dir->i_ctime = new_dir->i_mtime = ctime;
+        old_inode->i_ctime = ctime;
+        new_inode->i_ctime = ctime;
+        if (old_dentry->d_parent != new_dentry->d_parent) {
+                btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+                btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+        }
+        /* src is a subvolume */
+        if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
+                root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+                ret = btrfs_unlink_subvol(trans, root, old_dir,
+                                          root_objectid,
+                                          old_dentry->d_name.name,
+                                          old_dentry->d_name.len);
+        } else { /* src is an inode */
+                ret = __btrfs_unlink_inode(trans, root, old_dir,
+                                           old_dentry->d_inode,
+                                           old_dentry->d_name.name,
+                                           old_dentry->d_name.len);
+                if (!ret)
+                        ret = btrfs_update_inode(trans, root, old_inode);
+        }
+        if (ret) {
+                btrfs_abort_transaction(trans, root, ret);
+                goto out_fail;
+        }
+        /* dest is a subvolume */
+        if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
+                root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
+                ret = btrfs_unlink_subvol(trans, dest, new_dir,
+                                          root_objectid,
+                                          new_dentry->d_name.name,
+                                          new_dentry->d_name.len);
+        } else { /* dest is an inode */
+                ret = __btrfs_unlink_inode(trans, dest, new_dir,
+                                           new_dentry->d_inode,
+                                           new_dentry->d_name.name,
+                                           new_dentry->d_name.len);
+                if (!ret)
+                        ret = btrfs_update_inode(trans, dest, new_inode);
+        }
+        if (ret) {
+                btrfs_abort_transaction(trans, root, ret);
+                goto out_fail;
+        }
+        ret = btrfs_add_link(trans, new_dir, old_inode,
+                             new_dentry->d_name.name,
+                             new_dentry->d_name.len, 0, old_idx);
+        if (ret) {
+                btrfs_abort_transaction(trans, root, ret);
+                goto out_fail;
+        }
+        ret = btrfs_add_link(trans, old_dir, new_inode,
+                             old_dentry->d_name.name,
+                             old_dentry->d_name.len, 0, new_idx);
+        if (ret) {
+                btrfs_abort_transaction(trans, root, ret);
+                goto out_fail;
+        }
+        if (old_inode->i_nlink == 1)
+                BTRFS_I(old_inode)->dir_index = old_idx;
+        if (new_inode->i_nlink == 1)
+                BTRFS_I(new_inode)->dir_index = new_idx;
+        if (root_log_pinned) {
+                parent = new_dentry->d_parent;
+                btrfs_log_new_name(trans, old_inode, old_dir, parent);
+                btrfs_end_log_trans(root);
+                root_log_pinned = false;
+        }
+        if (dest_log_pinned) {
+                parent = old_dentry->d_parent;
+                btrfs_log_new_name(trans, new_inode, new_dir, parent);
+                btrfs_end_log_trans(dest);
+                dest_log_pinned = false;
+        }
+out_fail:
+        /*
+         * If we have pinned a log and an error happened, we unpin tasks
+         * trying to sync the log and force them to fallback to a transaction
+         * commit if the log currently contains any of the inodes involved in
+         * this rename operation (to ensure we do not persist a log with an
+         * inconsistent state for any of these inodes or leading to any
+         * inconsistencies when replayed). If the transaction was aborted, the
+         * abortion reason is propagated to userspace when attempting to commit
+         * the transaction. If the log does not contain any of these inodes, we
+         * allow the tasks to sync it.
+         */
+        if (ret && (root_log_pinned || dest_log_pinned)) {
+                if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
+                    btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
+                    btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
+                    (new_inode &&
+                     btrfs_inode_in_log(new_inode, root->fs_info->generation)))
+                    btrfs_set_log_full_commit(root->fs_info, trans);
+                if (root_log_pinned) {
+                        btrfs_end_log_trans(root);
+                        root_log_pinned = false;
+                }
+                if (dest_log_pinned) {
+                        btrfs_end_log_trans(dest);
+                        dest_log_pinned = false;
+                }
+        }
+        ret = btrfs_end_transaction(trans, root);
+out_notrans:
+        if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+                up_read(&dest->fs_info->subvol_sem);
+        if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+                up_read(&root->fs_info->subvol_sem);
+        return ret;
+}
+static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
+                                     struct btrfs_root *root,
+                                     struct inode *dir,
+                                     struct dentry *dentry)
+{
+        int ret;
+        struct inode *inode;
+        u64 objectid;
+        u64 index;
+        ret = btrfs_find_free_ino(root, &objectid);
+        if (ret)
+                return ret;
+        inode = btrfs_new_inode(trans, root, dir,
+                                dentry->d_name.name,
+                                dentry->d_name.len,
+                                btrfs_ino(dir),
+                                objectid,
+                                S_IFCHR | WHITEOUT_MODE,
+                                &index);
+        if (IS_ERR(inode)) {
+                ret = PTR_ERR(inode);
+                return ret;
+        }
+        inode->i_op = &btrfs_special_inode_operations;
+        init_special_inode(inode, inode->i_mode,
+                WHITEOUT_DEV);
+        ret = btrfs_init_inode_security(trans, inode, dir,
+                                &dentry->d_name);
+        if (ret)
+                goto out;
+        ret = btrfs_add_nondir(trans, dir, dentry,
+                                inode, 0, index);
+        if (ret)
+                goto out;
+        ret = btrfs_update_inode(trans, root, inode);
+out:
+        unlock_new_inode(inode);
+        if (ret)
+                inode_dec_link_count(inode);
+        iput(inode);
+        return ret;
+}
 static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                           struct inode *new_dir, struct dentry *new_dentry)
+                           struct inode *new_dir, struct dentry *new_dentry,
+                           unsigned int flags)
 {
        struct btrfs_trans_handle *trans;
+        unsigned int trans_num_items;
        struct btrfs_root *root = BTRFS_I(old_dir)->root;
        struct btrfs_root *dest = BTRFS_I(new_dir)->root;
        struct inode *new_inode = d_inode(new_dentry);
@@ -9399,6 +9693,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        u64 root_objectid;
        int ret;
        u64 old_ino = btrfs_ino(old_inode);
+        bool log_pinned = false;
        if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
                return -EPERM;
@@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         * We want to reserve the absolute worst case amount of items.  So if
         * both inodes are subvols and we need to unlink them then that would
         * require 4 item modifications, but if they are both normal inodes it
-         * would require 5 item modifications, so we'll assume their normal
+         * would require 5 item modifications, so we'll assume they are normal
         * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
         * should cover the worst case number of items we'll modify.
+         * If our rename has the whiteout flag, we need more 5 units for the
+         * new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item
+         * when selinux is enabled).
         */
-        trans = btrfs_start_transaction(root, 11);
+        trans_num_items = 11;
+        if (flags & RENAME_WHITEOUT)
+                trans_num_items += 5;
+        trans = btrfs_start_transaction(root, trans_num_items);
        if (IS_ERR(trans)) {
-                ret = PTR_ERR(trans);
+                ret = PTR_ERR(trans);
-                goto out_notrans;
+                goto out_notrans;
-        }
+        }
        if (dest != root)
                btrfs_record_root_in_trans(trans, dest);
@@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                /* force full log commit if subvolume involved. */
                btrfs_set_log_full_commit(root->fs_info, trans);
        } else {
+                btrfs_pin_log_trans(root);
+                log_pinned = true;
                ret = btrfs_insert_inode_ref(trans, dest,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
@@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                             btrfs_ino(new_dir), index);
                if (ret)
                        goto out_fail;
-                /*
-                 * this is an ugly little race, but the rename is required
-                 * to make sure that if we crash, the inode is either at the
-                 * old name or the new one.  pinning the log transaction lets
-                 * us make sure we don't allow a log commit to come in after
-                 * we unlink the name but before we add the new name back in.
-                 */
-                btrfs_pin_log_trans(root);
        }
        inode_inc_iversion(old_dir);
@@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (old_inode->i_nlink == 1)
                BTRFS_I(old_inode)->dir_index = index;
-        if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+        if (log_pinned) {
                struct dentry *parent = new_dentry->d_parent;
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
                btrfs_end_log_trans(root);
+                log_pinned = false;
+        }
+        if (flags & RENAME_WHITEOUT) {
+                ret = btrfs_whiteout_for_rename(trans, root, old_dir,
+                                                old_dentry);
+                if (ret) {
+                        btrfs_abort_transaction(trans, root, ret);
+                        goto out_fail;
+                }
        }
 out_fail:
+        /*
+         * If we have pinned the log and an error happened, we unpin tasks
+         * trying to sync the log and force them to fallback to a transaction
+         * commit if the log currently contains any of the inodes involved in
+         * this rename operation (to ensure we do not persist a log with an
+         * inconsistent state for any of these inodes or leading to any
+         * inconsistencies when replayed). If the transaction was aborted, the
+         * abortion reason is propagated to userspace when attempting to commit
+         * the transaction. If the log does not contain any of these inodes, we
+         * allow the tasks to sync it.
+         */
+        if (ret && log_pinned) {
+                if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
+                    btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
+                    btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
+                    (new_inode &&
+                     btrfs_inode_in_log(new_inode, root->fs_info->generation)))
+                    btrfs_set_log_full_commit(root->fs_info, trans);
+                btrfs_end_log_trans(root);
+                log_pinned = false;
+        }
        btrfs_end_transaction(trans, root);
 out_notrans:
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
@@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir, struct dentry *new_dentry,
                         unsigned int flags)
 {
-        if (flags & ~RENAME_NOREPLACE)
+        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
-        return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
+        if (flags & RENAME_EXCHANGE)
+                return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
+                                          new_dentry);
+        return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 static void btrfs_run_delalloc_work(struct btrfs_work *work)
@@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                btrfs_end_transaction(trans, root);
                        break;
                }
+                btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
                last_alloc = ins.offset;
                ret = insert_reserved_file_extent(trans, inode,
@@ -10184,7 +10518,7 @@ static const struct file_operations btrfs_dir_file_operations = {
        .iterate        = btrfs_real_readdir,
        .unlocked_ioctl = btrfs_ioctl,
 #ifdef CONFIG_COMPAT
-        .compat_ioctl   = btrfs_ioctl,
+        .compat_ioctl   = btrfs_compat_ioctl,
 #endif
        .release        = btrfs_release_file,
        .fsync          = btrfs_sync_file,
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-21 13:49:22 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-21 13:49:22 -0400
commit	07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch)
tree	e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/inode.c
parent	63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff)
parent	c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff)