1 files changed, 236 insertions, 7 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6c95159302dd..016c90fc85db 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
                if (btrfs_inode_generation(eb, src_item) == 0) {
                        struct extent_buffer *dst_eb = path->nodes[0];
+                        const u64 ino_size = btrfs_inode_size(eb, src_item);
+                        /*
+                         * For regular files an ino_size == 0 is used only when
+                         * logging that an inode exists, as part of a directory
+                         * fsync, and the inode wasn't fsynced before. In this
+                         * case don't set the size of the inode in the fs/subvol
+                         * tree, otherwise we would be throwing valid data away.
+                         */
                        if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
-                            S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
+                            S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
+                            ino_size != 0) {
                                struct btrfs_map_token token;
-                                u64 ino_size = btrfs_inode_size(eb, src_item);
                                btrfs_init_map_token(&token);
                                btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -3124,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path,
                          struct btrfs_path *dst_path, int key_type,
+                          struct btrfs_log_ctx *ctx,
                          u64 min_offset, u64 *last_offset_ret)
 {
        struct btrfs_key min_key;
@@ -3208,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                src = path->nodes[0];
                nritems = btrfs_header_nritems(src);
                for (i = path->slots[0]; i < nritems; i++) {
+                        struct btrfs_dir_item *di;
                        btrfs_item_key_to_cpu(src, &min_key, i);
                        if (min_key.objectid != ino || min_key.type != key_type)
@@ -3218,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                                err = ret;
                                goto done;
                        }
+                        /*
+                         * We must make sure that when we log a directory entry,
+                         * the corresponding inode, after log replay, has a
+                         * matching link count. For example:
+                         *
+                         * touch foo
+                         * mkdir mydir
+                         * sync
+                         * ln foo mydir/bar
+                         * xfs_io -c "fsync" mydir
+                         * <crash>
+                         * <mount fs and log replay>
+                         *
+                         * Would result in a fsync log that when replayed, our
+                         * file inode would have a link count of 1, but we get
+                         * two directory entries pointing to the same inode.
+                         * After removing one of the names, it would not be
+                         * possible to remove the other name, which resulted
+                         * always in stale file handle errors, and would not
+                         * be possible to rmdir the parent directory, since
+                         * its i_size could never decrement to the value
+                         * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
+                         */
+                        di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
+                        btrfs_dir_item_key_to_cpu(src, di, &tmp);
+                        if (ctx &&
+                            (btrfs_dir_transid(src, di) == trans->transid ||
+                             btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
+                            tmp.type != BTRFS_ROOT_ITEM_KEY)
+                                ctx->log_new_dentries = true;
                }
                path->slots[0] = nritems;
@@ -3279,7 +3321,8 @@ done:
 static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path,
-                          struct btrfs_path *dst_path)
+                          struct btrfs_path *dst_path,
+                          struct btrfs_log_ctx *ctx)
 {
        u64 min_key;
        u64 max_key;
@@ -3291,7 +3334,7 @@ again:
        max_key = 0;
        while (1) {
                ret = log_dir_items(trans, root, inode, path,
-                                    dst_path, key_type, min_key,
+                                    dst_path, key_type, ctx, min_key,
                                    &max_key);
                if (ret)
                        return ret;
@@ -4067,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
        if (ret < 0) {
                return ret;
        } else if (ret > 0) {
-                *size_ret = i_size_read(inode);
+                *size_ret = 0;
        } else {
                struct btrfs_inode_item *item;
@@ -4374,15 +4417,18 @@ log_extents:
        }
        if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-                ret = log_directory_changes(trans, root, inode, path, dst_path);
+                ret = log_directory_changes(trans, root, inode, path, dst_path,
+                                            ctx);
                if (ret) {
                        err = ret;
                        goto out_unlock;
                }
        }
+        spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->logged_trans = trans->transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
+        spin_unlock(&BTRFS_I(inode)->lock);
 out_unlock:
        if (unlikely(err))
                btrfs_put_logged_extents(&logged_list);
@@ -4469,6 +4515,181 @@ out:
        return ret;
 }
+struct btrfs_dir_list {
+        u64 ino;
+        struct list_head list;
+};
+/*
+ * Log the inodes of the new dentries of a directory. See log_dir_items() for
+ * details about the why it is needed.
+ * This is a recursive operation - if an existing dentry corresponds to a
+ * directory, that directory's new entries are logged too (same behaviour as
+ * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
+ * the dentries point to we do not lock their i_mutex, otherwise lockdep
+ * complains about the following circular lock dependency / possible deadlock:
+ *
+ *        CPU0                                        CPU1
+ *        ----                                        ----
+ * lock(&type->i_mutex_dir_key#3/2);
+ *                                            lock(sb_internal#2);
+ *                                            lock(&type->i_mutex_dir_key#3/2);
+ * lock(&sb->s_type->i_mutex_key#14);
+ *
+ * Where sb_internal is the lock (a counter that works as a lock) acquired by
+ * sb_start_intwrite() in btrfs_start_transaction().
+ * Not locking i_mutex of the inodes is still safe because:
+ *
+ * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
+ *    that while logging the inode new references (names) are added or removed
+ *    from the inode, leaving the logged inode item with a link count that does
+ *    not match the number of logged inode reference items. This is fine because
+ *    at log replay time we compute the real number of links and correct the
+ *    link count in the inode item (see replay_one_buffer() and
+ *    link_to_fixup_dir());
+ *
+ * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
+ *    while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
+ *    BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
+ *    has a size that doesn't match the sum of the lengths of all the logged
+ *    names. This does not result in a problem because if a dir_item key is
+ *    logged but its matching dir_index key is not logged, at log replay time we
+ *    don't use it to replay the respective name (see replay_one_name()). On the
+ *    other hand if only the dir_index key ends up being logged, the respective
+ *    name is added to the fs/subvol tree with both the dir_item and dir_index
+ *    keys created (see replay_one_name()).
+ *    The directory's inode item with a wrong i_size is not a problem as well,
+ *    since we don't use it at log replay time to set the i_size in the inode
+ *    item of the fs/subvol tree (see overwrite_item()).
+ */
+static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct inode *start_inode,
+                                struct btrfs_log_ctx *ctx)
+{
+        struct btrfs_root *log = root->log_root;
+        struct btrfs_path *path;
+        LIST_HEAD(dir_list);
+        struct btrfs_dir_list *dir_elem;
+        int ret = 0;
+        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
+        dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
+        if (!dir_elem) {
+                btrfs_free_path(path);
+                return -ENOMEM;
+        }
+        dir_elem->ino = btrfs_ino(start_inode);
+        list_add_tail(&dir_elem->list, &dir_list);
+        while (!list_empty(&dir_list)) {
+                struct extent_buffer *leaf;
+                struct btrfs_key min_key;
+                int nritems;
+                int i;
+                dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
+                                            list);
+                if (ret)
+                        goto next_dir_inode;
+                min_key.objectid = dir_elem->ino;
+                min_key.type = BTRFS_DIR_ITEM_KEY;
+                min_key.offset = 0;
+again:
+                btrfs_release_path(path);
+                ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+                if (ret < 0) {
+                        goto next_dir_inode;
+                } else if (ret > 0) {
+                        ret = 0;
+                        goto next_dir_inode;
+                }
+process_leaf:
+                leaf = path->nodes[0];
+                nritems = btrfs_header_nritems(leaf);
+                for (i = path->slots[0]; i < nritems; i++) {
+                        struct btrfs_dir_item *di;
+                        struct btrfs_key di_key;
+                        struct inode *di_inode;
+                        struct btrfs_dir_list *new_dir_elem;
+                        int log_mode = LOG_INODE_EXISTS;
+                        int type;
+                        btrfs_item_key_to_cpu(leaf, &min_key, i);
+                        if (min_key.objectid != dir_elem->ino ||
+                            min_key.type != BTRFS_DIR_ITEM_KEY)
+                                goto next_dir_inode;
+                        di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
+                        type = btrfs_dir_type(leaf, di);
+                        if (btrfs_dir_transid(leaf, di) < trans->transid &&
+                            type != BTRFS_FT_DIR)
+                                continue;
+                        btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
+                        if (di_key.type == BTRFS_ROOT_ITEM_KEY)
+                                continue;
+                        di_inode = btrfs_iget(root->fs_info->sb, &di_key,
+                                              root, NULL);
+                        if (IS_ERR(di_inode)) {
+                                ret = PTR_ERR(di_inode);
+                                goto next_dir_inode;
+                        }
+                        if (btrfs_inode_in_log(di_inode, trans->transid)) {
+                                iput(di_inode);
+                                continue;
+                        }
+                        ctx->log_new_dentries = false;
+                        if (type == BTRFS_FT_DIR)
+                                log_mode = LOG_INODE_ALL;
+                        btrfs_release_path(path);
+                        ret = btrfs_log_inode(trans, root, di_inode,
+                                              log_mode, 0, LLONG_MAX, ctx);
+                        iput(di_inode);
+                        if (ret)
+                                goto next_dir_inode;
+                        if (ctx->log_new_dentries) {
+                                new_dir_elem = kmalloc(sizeof(*new_dir_elem),
+                                                       GFP_NOFS);
+                                if (!new_dir_elem) {
+                                        ret = -ENOMEM;
+                                        goto next_dir_inode;
+                                }
+                                new_dir_elem->ino = di_key.objectid;
+                                list_add_tail(&new_dir_elem->list, &dir_list);
+                        }
+                        break;
+                }
+                if (i == nritems) {
+                        ret = btrfs_next_leaf(log, path);
+                        if (ret < 0) {
+                                goto next_dir_inode;
+                        } else if (ret > 0) {
+                                ret = 0;
+                                goto next_dir_inode;
+                        }
+                        goto process_leaf;
+                }
+                if (min_key.offset < (u64)-1) {
+                        min_key.offset++;
+                        goto again;
+                }
+next_dir_inode:
+                list_del(&dir_elem->list);
+                kfree(dir_elem);
+        }
+        btrfs_free_path(path);
+        return ret;
+}
 /*
 * helper function around btrfs_log_inode to make sure newly created
 * parent directories also end up in the log.  A minimal inode and backref
@@ -4491,6 +4712,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        const struct dentry * const first_parent = parent;
        const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
                                 last_committed);
+        bool log_dentries = false;
+        struct inode *orig_inode = inode;
        sb = inode->i_sb;
@@ -4546,6 +4769,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_trans;
        }
+        if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+                log_dentries = true;
        while (1) {
                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
                        break;
@@ -4582,7 +4808,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                dput(old_parent);
                old_parent = parent;
        }
-        ret = 0;
+        if (log_dentries)
+                ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+        else
+                ret = 0;
 end_trans:
        dput(old_parent);
        if (ret < 0) {

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6c95159302dd..016c90fc85db 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
492		492
493	if (btrfs_inode_generation(eb, src_item) == 0) {	493	if (btrfs_inode_generation(eb, src_item) == 0) {
494	struct extent_buffer *dst_eb = path->nodes[0];	494	struct extent_buffer *dst_eb = path->nodes[0];
		495	const u64 ino_size = btrfs_inode_size(eb, src_item);
495		496
		497	/*
		498	* For regular files an ino_size == 0 is used only when
		499	* logging that an inode exists, as part of a directory
		500	* fsync, and the inode wasn't fsynced before. In this
		501	* case don't set the size of the inode in the fs/subvol
		502	* tree, otherwise we would be throwing valid data away.
		503	*/
496	if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&	504	if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497	S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {	505	S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
		506	ino_size != 0) {
498	struct btrfs_map_token token;	507	struct btrfs_map_token token;
499	u64 ino_size = btrfs_inode_size(eb, src_item);
500		508
501	btrfs_init_map_token(&token);	509	btrfs_init_map_token(&token);
502	btrfs_set_token_inode_size(dst_eb, dst_item,	510	btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -3124,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3124	struct btrfs_root root, struct inode inode,	3132	struct btrfs_root root, struct inode inode,
3125	struct btrfs_path *path,	3133	struct btrfs_path *path,
3126	struct btrfs_path *dst_path, int key_type,	3134	struct btrfs_path *dst_path, int key_type,
		3135	struct btrfs_log_ctx *ctx,
3127	u64 min_offset, u64 *last_offset_ret)	3136	u64 min_offset, u64 *last_offset_ret)
3128	{	3137	{
3129	struct btrfs_key min_key;	3138	struct btrfs_key min_key;
@@ -3208,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3208	src = path->nodes[0];	3217	src = path->nodes[0];
3209	nritems = btrfs_header_nritems(src);	3218	nritems = btrfs_header_nritems(src);
3210	for (i = path->slots[0]; i < nritems; i++) {	3219	for (i = path->slots[0]; i < nritems; i++) {
		3220	struct btrfs_dir_item *di;
		3221
3211	btrfs_item_key_to_cpu(src, &min_key, i);	3222	btrfs_item_key_to_cpu(src, &min_key, i);
3212		3223
3213	if (min_key.objectid != ino \|\| min_key.type != key_type)	3224	if (min_key.objectid != ino \|\| min_key.type != key_type)
@@ -3218,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3218	err = ret;	3229	err = ret;
3219	goto done;	3230	goto done;
3220	}	3231	}
		3232
		3233	/*
		3234	* We must make sure that when we log a directory entry,
		3235	* the corresponding inode, after log replay, has a
		3236	* matching link count. For example:
		3237	*
		3238	* touch foo
		3239	* mkdir mydir
		3240	* sync
		3241	* ln foo mydir/bar
		3242	* xfs_io -c "fsync" mydir
		3243	* <crash>
		3244	* <mount fs and log replay>
		3245	*
		3246	* Would result in a fsync log that when replayed, our
		3247	* file inode would have a link count of 1, but we get
		3248	* two directory entries pointing to the same inode.
		3249	* After removing one of the names, it would not be
		3250	* possible to remove the other name, which resulted
		3251	* always in stale file handle errors, and would not
		3252	* be possible to rmdir the parent directory, since
		3253	* its i_size could never decrement to the value
		3254	* BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
		3255	*/
		3256	di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
		3257	btrfs_dir_item_key_to_cpu(src, di, &tmp);
		3258	if (ctx &&
		3259	(btrfs_dir_transid(src, di) == trans->transid \|\|
		3260	btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
		3261	tmp.type != BTRFS_ROOT_ITEM_KEY)
		3262	ctx->log_new_dentries = true;
3221	}	3263	}
3222	path->slots[0] = nritems;	3264	path->slots[0] = nritems;
3223		3265
@@ -3279,7 +3321,8 @@ done:
3279	static noinline int log_directory_changes(struct btrfs_trans_handle *trans,	3321	static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
3280	struct btrfs_root root, struct inode inode,	3322	struct btrfs_root root, struct inode inode,
3281	struct btrfs_path *path,	3323	struct btrfs_path *path,
3282	struct btrfs_path *dst_path)	3324	struct btrfs_path *dst_path,
		3325	struct btrfs_log_ctx *ctx)
3283	{	3326	{
3284	u64 min_key;	3327	u64 min_key;
3285	u64 max_key;	3328	u64 max_key;
@@ -3291,7 +3334,7 @@ again:
3291	max_key = 0;	3334	max_key = 0;
3292	while (1) {	3335	while (1) {
3293	ret = log_dir_items(trans, root, inode, path,	3336	ret = log_dir_items(trans, root, inode, path,
3294	dst_path, key_type, min_key,	3337	dst_path, key_type, ctx, min_key,
3295	&max_key);	3338	&max_key);
3296	if (ret)	3339	if (ret)
3297	return ret;	3340	return ret;
@@ -4067,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root log, struct inode inode,
4067	if (ret < 0) {	4110	if (ret < 0) {
4068	return ret;	4111	return ret;
4069	} else if (ret > 0) {	4112	} else if (ret > 0) {
4070	*size_ret = i_size_read(inode);	4113	*size_ret = 0;
4071	} else {	4114	} else {
4072	struct btrfs_inode_item *item;	4115	struct btrfs_inode_item *item;
4073		4116
@@ -4374,15 +4417,18 @@ log_extents:
4374	}	4417	}
4375		4418
4376	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {	4419	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
4377	ret = log_directory_changes(trans, root, inode, path, dst_path);	4420	ret = log_directory_changes(trans, root, inode, path, dst_path,
		4421	ctx);
4378	if (ret) {	4422	if (ret) {
4379	err = ret;	4423	err = ret;
4380	goto out_unlock;	4424	goto out_unlock;
4381	}	4425	}
4382	}	4426	}
4383		4427
		4428	spin_lock(&BTRFS_I(inode)->lock);
4384	BTRFS_I(inode)->logged_trans = trans->transid;	4429	BTRFS_I(inode)->logged_trans = trans->transid;
4385	BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;	4430	BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
		4431	spin_unlock(&BTRFS_I(inode)->lock);
4386	out_unlock:	4432	out_unlock:
4387	if (unlikely(err))	4433	if (unlikely(err))
4388	btrfs_put_logged_extents(&logged_list);	4434	btrfs_put_logged_extents(&logged_list);
@@ -4469,6 +4515,181 @@ out:
4469	return ret;	4515	return ret;
4470	}	4516	}
4471		4517
		4518	struct btrfs_dir_list {
		4519	u64 ino;
		4520	struct list_head list;
		4521	};
		4522
		4523	/*
		4524	* Log the inodes of the new dentries of a directory. See log_dir_items() for
		4525	* details about the why it is needed.
		4526	* This is a recursive operation - if an existing dentry corresponds to a
		4527	* directory, that directory's new entries are logged too (same behaviour as
		4528	* ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
		4529	* the dentries point to we do not lock their i_mutex, otherwise lockdep
		4530	* complains about the following circular lock dependency / possible deadlock:
		4531	*
		4532	* CPU0 CPU1
		4533	* ---- ----
		4534	* lock(&type->i_mutex_dir_key#3/2);
		4535	* lock(sb_internal#2);
		4536	* lock(&type->i_mutex_dir_key#3/2);
		4537	* lock(&sb->s_type->i_mutex_key#14);
		4538	*
		4539	* Where sb_internal is the lock (a counter that works as a lock) acquired by
		4540	* sb_start_intwrite() in btrfs_start_transaction().
		4541	* Not locking i_mutex of the inodes is still safe because:
		4542	*
		4543	* 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
		4544	* that while logging the inode new references (names) are added or removed
		4545	* from the inode, leaving the logged inode item with a link count that does
		4546	* not match the number of logged inode reference items. This is fine because
		4547	* at log replay time we compute the real number of links and correct the
		4548	* link count in the inode item (see replay_one_buffer() and
		4549	* link_to_fixup_dir());
		4550	*
		4551	* 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
		4552	* while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
		4553	* BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
		4554	* has a size that doesn't match the sum of the lengths of all the logged
		4555	* names. This does not result in a problem because if a dir_item key is
		4556	* logged but its matching dir_index key is not logged, at log replay time we
		4557	* don't use it to replay the respective name (see replay_one_name()). On the
		4558	* other hand if only the dir_index key ends up being logged, the respective
		4559	* name is added to the fs/subvol tree with both the dir_item and dir_index
		4560	* keys created (see replay_one_name()).
		4561	* The directory's inode item with a wrong i_size is not a problem as well,
		4562	* since we don't use it at log replay time to set the i_size in the inode
		4563	* item of the fs/subvol tree (see overwrite_item()).
		4564	*/
		4565	static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
		4566	struct btrfs_root *root,
		4567	struct inode *start_inode,
		4568	struct btrfs_log_ctx *ctx)
		4569	{
		4570	struct btrfs_root *log = root->log_root;
		4571	struct btrfs_path *path;
		4572	LIST_HEAD(dir_list);
		4573	struct btrfs_dir_list *dir_elem;
		4574	int ret = 0;
		4575
		4576	path = btrfs_alloc_path();
		4577	if (!path)
		4578	return -ENOMEM;
		4579
		4580	dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
		4581	if (!dir_elem) {
		4582	btrfs_free_path(path);
		4583	return -ENOMEM;
		4584	}
		4585	dir_elem->ino = btrfs_ino(start_inode);
		4586	list_add_tail(&dir_elem->list, &dir_list);
		4587
		4588	while (!list_empty(&dir_list)) {
		4589	struct extent_buffer *leaf;
		4590	struct btrfs_key min_key;
		4591	int nritems;
		4592	int i;
		4593
		4594	dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
		4595	list);
		4596	if (ret)
		4597	goto next_dir_inode;
		4598
		4599	min_key.objectid = dir_elem->ino;
		4600	min_key.type = BTRFS_DIR_ITEM_KEY;
		4601	min_key.offset = 0;
		4602	again:
		4603	btrfs_release_path(path);
		4604	ret = btrfs_search_forward(log, &min_key, path, trans->transid);
		4605	if (ret < 0) {
		4606	goto next_dir_inode;
		4607	} else if (ret > 0) {
		4608	ret = 0;
		4609	goto next_dir_inode;
		4610	}
		4611
		4612	process_leaf:
		4613	leaf = path->nodes[0];
		4614	nritems = btrfs_header_nritems(leaf);
		4615	for (i = path->slots[0]; i < nritems; i++) {
		4616	struct btrfs_dir_item *di;
		4617	struct btrfs_key di_key;
		4618	struct inode *di_inode;
		4619	struct btrfs_dir_list *new_dir_elem;
		4620	int log_mode = LOG_INODE_EXISTS;
		4621	int type;
		4622
		4623	btrfs_item_key_to_cpu(leaf, &min_key, i);
		4624	if (min_key.objectid != dir_elem->ino \|\|
		4625	min_key.type != BTRFS_DIR_ITEM_KEY)
		4626	goto next_dir_inode;
		4627
		4628	di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
		4629	type = btrfs_dir_type(leaf, di);
		4630	if (btrfs_dir_transid(leaf, di) < trans->transid &&
		4631	type != BTRFS_FT_DIR)
		4632	continue;
		4633	btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
		4634	if (di_key.type == BTRFS_ROOT_ITEM_KEY)
		4635	continue;
		4636
		4637	di_inode = btrfs_iget(root->fs_info->sb, &di_key,
		4638	root, NULL);
		4639	if (IS_ERR(di_inode)) {
		4640	ret = PTR_ERR(di_inode);
		4641	goto next_dir_inode;
		4642	}
		4643
		4644	if (btrfs_inode_in_log(di_inode, trans->transid)) {
		4645	iput(di_inode);
		4646	continue;
		4647	}
		4648
		4649	ctx->log_new_dentries = false;
		4650	if (type == BTRFS_FT_DIR)
		4651	log_mode = LOG_INODE_ALL;
		4652	btrfs_release_path(path);
		4653	ret = btrfs_log_inode(trans, root, di_inode,
		4654	log_mode, 0, LLONG_MAX, ctx);
		4655	iput(di_inode);
		4656	if (ret)
		4657	goto next_dir_inode;
		4658	if (ctx->log_new_dentries) {
		4659	new_dir_elem = kmalloc(sizeof(*new_dir_elem),
		4660	GFP_NOFS);
		4661	if (!new_dir_elem) {
		4662	ret = -ENOMEM;
		4663	goto next_dir_inode;
		4664	}
		4665	new_dir_elem->ino = di_key.objectid;
		4666	list_add_tail(&new_dir_elem->list, &dir_list);
		4667	}
		4668	break;
		4669	}
		4670	if (i == nritems) {
		4671	ret = btrfs_next_leaf(log, path);
		4672	if (ret < 0) {
		4673	goto next_dir_inode;
		4674	} else if (ret > 0) {
		4675	ret = 0;
		4676	goto next_dir_inode;
		4677	}
		4678	goto process_leaf;
		4679	}
		4680	if (min_key.offset < (u64)-1) {
		4681	min_key.offset++;
		4682	goto again;
		4683	}
		4684	next_dir_inode:
		4685	list_del(&dir_elem->list);
		4686	kfree(dir_elem);
		4687	}
		4688
		4689	btrfs_free_path(path);
		4690	return ret;
		4691	}
		4692
4472	/*	4693	/*
4473	* helper function around btrfs_log_inode to make sure newly created	4694	* helper function around btrfs_log_inode to make sure newly created
4474	* parent directories also end up in the log. A minimal inode and backref	4695	* parent directories also end up in the log. A minimal inode and backref
@@ -4491,6 +4712,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4491	const struct dentry * const first_parent = parent;	4712	const struct dentry * const first_parent = parent;
4492	const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >	4713	const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4493	last_committed);	4714	last_committed);
		4715	bool log_dentries = false;
		4716	struct inode *orig_inode = inode;
4494		4717
4495	sb = inode->i_sb;	4718	sb = inode->i_sb;
4496		4719
@@ -4546,6 +4769,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4546	goto end_trans;	4769	goto end_trans;
4547	}	4770	}
4548		4771
		4772	if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
		4773	log_dentries = true;
		4774
4549	while (1) {	4775	while (1) {
4550	if (!parent \|\| !parent->d_inode \|\| sb != parent->d_inode->i_sb)	4776	if (!parent \|\| !parent->d_inode \|\| sb != parent->d_inode->i_sb)
4551	break;	4777	break;
@@ -4582,7 +4808,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4582	dput(old_parent);	4808	dput(old_parent);
4583	old_parent = parent;	4809	old_parent = parent;
4584	}	4810	}
4585	ret = 0;	4811	if (log_dentries)
		4812	ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
		4813	else
		4814	ret = 0;
4586	end_trans:	4815	end_trans:
4587	dput(old_parent);	4816	dput(old_parent);
4588	if (ret < 0) {	4817	if (ret < 0) {