aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-06-19 19:44:51 -0400
committerChris Mason <clm@fb.com>2015-06-30 17:36:47 -0400
commit36283bf777d963fac099213297e155d071096994 (patch)
tree8dda56ac7201eff8337f6a761e60bc59acacff1a /fs/btrfs/tree-log.c
parente4545de5b035c7debb73d260c78377dbb69cbfb5 (diff)
Btrfs: fix fsync xattr loss in the fast fsync path
After commit 4f764e515361 ("Btrfs: remove deleted xattrs on fsync log replay"), we can end up in a situation where during log replay we end up deleting xattrs that were never deleted when their file was last fsynced. This happens in the fast fsync path (flag BTRFS_INODE_NEEDS_FULL_SYNC is not set in the inode) if the inode has the flag BTRFS_INODE_COPY_EVERYTHING set, the xattr was added in a past transaction and the leaf where the xattr is located was not updated (COWed or created) in the current transaction. In this scenario the xattr item never ends up in the log tree and therefore at log replay time, which makes the replay code delete the xattr from the fs/subvol tree as it thinks that xattr was deleted prior to the last fsync. Fix this by always logging all xattrs, which is the simplest and most reliable way to detect deleted xattrs and replay the deletes at log replay time. This issue is reproducible with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey . ./common/attr # real QA test starts here # We create a lot of xattrs for a single file. Only btrfs and xfs are currently # able to store such a large mount of xattrs per file, other filesystems such # as ext3/4 and f2fs for example, fail with ENOSPC even if we attempt to add # less than 1000 xattrs with very small values. _supported_fs btrfs xfs _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_attrs _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and make sure everything is # durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" $SCRATCH_MNT/foo | _filter_xfs_io sync # Add many small xattrs to our file. # We create such a large amount because it's needed to trigger the issue found # in btrfs - we need to have an amount that causes the fs to have at least 3 # btree leafs with xattrs stored in them, and it must work on any leaf size # (maximum leaf/node size is 64Kb). num_xattrs=2000 for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" $SETFATTR_PROG -n $name -v "val_$(printf "%04d" $i)" $SCRATCH_MNT/foo done # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now update our file's data and fsync the file. # After a successful fsync, if the fsync log/journal is replayed we expect to # see all the xattrs we added before with the same values (and the updated file # data of course). Btrfs used to delete some of these xattrs when it replayed # its fsync log/journal. $XFS_IO_PROG -c "pwrite -S 0xbb 8K 16K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo echo "File xattrs after crash and log replay:" for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" echo -n "$name=" $GETFATTR_PROG --absolute-names -n $name --only-values $SCRATCH_MNT/foo echo done status=0 exit The golden output expects all xattrs to be available, and with the correct values, after the fsync log is replayed. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 76c4f9d1b80a..66f87156882f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,86 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
4117 return 0; 4117 return 0;
4118} 4118}
4119 4119
4120/*
4121 * At the moment we always log all xattrs. This is to figure out at log replay
4122 * time which xattrs must have their deletion replayed. If a xattr is missing
4123 * in the log tree and exists in the fs/subvol tree, we delete it. This is
4124 * because if a xattr is deleted, the inode is fsynced and a power failure
4125 * happens, causing the log to be replayed the next time the fs is mounted,
4126 * we want the xattr to not exist anymore (same behaviour as other filesystems
4127 * with a journal, ext3/4, xfs, f2fs, etc).
4128 */
4129static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
4130 struct btrfs_root *root,
4131 struct inode *inode,
4132 struct btrfs_path *path,
4133 struct btrfs_path *dst_path)
4134{
4135 int ret;
4136 struct btrfs_key key;
4137 const u64 ino = btrfs_ino(inode);
4138 int ins_nr = 0;
4139 int start_slot = 0;
4140
4141 key.objectid = ino;
4142 key.type = BTRFS_XATTR_ITEM_KEY;
4143 key.offset = 0;
4144
4145 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4146 if (ret < 0)
4147 return ret;
4148
4149 while (true) {
4150 int slot = path->slots[0];
4151 struct extent_buffer *leaf = path->nodes[0];
4152 int nritems = btrfs_header_nritems(leaf);
4153
4154 if (slot >= nritems) {
4155 if (ins_nr > 0) {
4156 u64 last_extent = 0;
4157
4158 ret = copy_items(trans, inode, dst_path, path,
4159 &last_extent, start_slot,
4160 ins_nr, 1, 0);
4161 /* can't be 1, extent items aren't processed */
4162 ASSERT(ret <= 0);
4163 if (ret < 0)
4164 return ret;
4165 ins_nr = 0;
4166 }
4167 ret = btrfs_next_leaf(root, path);
4168 if (ret < 0)
4169 return ret;
4170 else if (ret > 0)
4171 break;
4172 continue;
4173 }
4174
4175 btrfs_item_key_to_cpu(leaf, &key, slot);
4176 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
4177 break;
4178
4179 if (ins_nr == 0)
4180 start_slot = slot;
4181 ins_nr++;
4182 path->slots[0]++;
4183 cond_resched();
4184 }
4185 if (ins_nr > 0) {
4186 u64 last_extent = 0;
4187
4188 ret = copy_items(trans, inode, dst_path, path,
4189 &last_extent, start_slot,
4190 ins_nr, 1, 0);
4191 /* can't be 1, extent items aren't processed */
4192 ASSERT(ret <= 0);
4193 if (ret < 0)
4194 return ret;
4195 }
4196
4197 return 0;
4198}
4199
4120/* log a single inode in the tree log. 4200/* log a single inode in the tree log.
4121 * At least one parent directory for this inode must exist in the tree 4201 * At least one parent directory for this inode must exist in the tree
4122 * or be logged already. 4202 * or be logged already.
@@ -4289,6 +4369,25 @@ again:
4289 if (min_key.type == BTRFS_INODE_ITEM_KEY) 4369 if (min_key.type == BTRFS_INODE_ITEM_KEY)
4290 need_log_inode_item = false; 4370 need_log_inode_item = false;
4291 4371
4372 /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
4373 if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
4374 if (ins_nr == 0)
4375 goto next_slot;
4376 ret = copy_items(trans, inode, dst_path, path,
4377 &last_extent, ins_start_slot,
4378 ins_nr, inode_only, logged_isize);
4379 if (ret < 0) {
4380 err = ret;
4381 goto out_unlock;
4382 }
4383 ins_nr = 0;
4384 if (ret) {
4385 btrfs_release_path(path);
4386 continue;
4387 }
4388 goto next_slot;
4389 }
4390
4292 src = path->nodes[0]; 4391 src = path->nodes[0];
4293 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 4392 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
4294 ins_nr++; 4393 ins_nr++;
@@ -4356,6 +4455,11 @@ next_slot:
4356 ins_nr = 0; 4455 ins_nr = 0;
4357 } 4456 }
4358 4457
4458 btrfs_release_path(path);
4459 btrfs_release_path(dst_path);
4460 err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
4461 if (err)
4462 goto out_unlock;
4359log_extents: 4463log_extents:
4360 btrfs_release_path(path); 4464 btrfs_release_path(path);
4361 btrfs_release_path(dst_path); 4465 btrfs_release_path(dst_path);