aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-01-10 05:56:48 -0500
committerChris Mason <clm@fb.com>2015-01-21 21:02:04 -0500
commitd36808e0d4fc4802892dbd1dba964912cddf1323 (patch)
tree11173d20f8a65c9ac59040292e417302bbcd7cb9 /fs
parent6219872dc6e56529159f04e73587ed0fcd63eb20 (diff)
Btrfs: fix directory inconsistency after fsync log replay
If we have an inode (file) with a link count greater than 1, remove one of its hard links, fsync the inode, power fail/crash and then replay the fsync log on the next mount, we end up getting the parent directory's metadata inconsistent - its i_size still reflects the deleted hard link and has dangling index entries (with no matching inode reference entries). This prevents the directory from ever being deletable, as its i_size can never decrease to BTRFS_EMPTY_DIR_SIZE even if all of its children inodes are deleted, and the dangling index entries can never be removed (as they point to an inode that does not exist anymore). This is easy to reproduce with the following excerpt from the test case for xfstests that I just made: _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create a test file with 2 hard links in the same directory. mkdir -p $SCRATCH_MNT/a/b echo "hello world" > $SCRATCH_MNT/a/b/foo ln $SCRATCH_MNT/a/b/foo $SCRATCH_MNT/a/b/bar # Make sure all metadata and data are durably persisted. sync # Now remove one of the hard links and fsync the inode. rm -f $SCRATCH_MNT/a/b/bar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/a/b/foo # Simulate a crash/power loss. This makes sure the next mount # will see an fsync log and will replay that log. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey # Remove the last hard link of the file and attempt to remove its parent # directory - this failed in btrfs because the fsync log and replay code # didn't decrement the parent directory's i_size and left dangling directory # index entries - this made the btrfs rmdir implementation always fail with # the error -ENOTEMPTY. # # The dangling directory index entries were visible to user space, but it was # impossible to do anything on them (unlink, open, read, write, stat, etc) # because the inode they pointed to did not exist anymore. # # The parent directory's metadata inconsistency (stale index entries) was # also detected by btrfs' fsck tool, which is run automatically by the fstests # framework when the test finishes. The error message reported by fsck was: # # root 5 inode 259 errors 2001, no inode item, link count wrong # unresolved ref dir 258 index 3 namelen 3 name bar filetype 1 errors 4, no inode ref # rm -f $SCRATCH_MNT/a/b/* rmdir $SCRATCH_MNT/a/b rmdir $SCRATCH_MNT/a To fix this just make sure that after an unlink, if the inode is fsync'ed, he parent inode is fully logged in the fsync log. A test case for xfstests follows soon. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/tree-log.c20
1 files changed, 18 insertions, 2 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 67e5bf709dca..60e1d0083faa 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4273,6 +4273,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4273 struct dentry *old_parent = NULL; 4273 struct dentry *old_parent = NULL;
4274 int ret = 0; 4274 int ret = 0;
4275 u64 last_committed = root->fs_info->last_trans_committed; 4275 u64 last_committed = root->fs_info->last_trans_committed;
4276 const struct dentry * const first_parent = parent;
4277 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4278 last_committed);
4276 4279
4277 sb = inode->i_sb; 4280 sb = inode->i_sb;
4278 4281
@@ -4328,7 +4331,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4328 goto end_trans; 4331 goto end_trans;
4329 } 4332 }
4330 4333
4331 inode_only = LOG_INODE_EXISTS;
4332 while (1) { 4334 while (1) {
4333 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4335 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4334 break; 4336 break;
@@ -4337,8 +4339,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4337 if (root != BTRFS_I(inode)->root) 4339 if (root != BTRFS_I(inode)->root)
4338 break; 4340 break;
4339 4341
4342 /*
4343 * On unlink we must make sure our immediate parent directory
4344 * inode is fully logged. This is to prevent leaving dangling
4345 * directory index entries and a wrong directory inode's i_size.
4346 * Not doing so can result in a directory being impossible to
4347 * delete after log replay (rmdir will always fail with error
4348 * -ENOTEMPTY).
4349 */
4350 if (did_unlink && parent == first_parent)
4351 inode_only = LOG_INODE_ALL;
4352 else
4353 inode_only = LOG_INODE_EXISTS;
4354
4340 if (BTRFS_I(inode)->generation > 4355 if (BTRFS_I(inode)->generation >
4341 root->fs_info->last_trans_committed) { 4356 root->fs_info->last_trans_committed ||
4357 inode_only == LOG_INODE_ALL) {
4342 ret = btrfs_log_inode(trans, root, inode, inode_only, 4358 ret = btrfs_log_inode(trans, root, inode, inode_only,
4343 0, LLONG_MAX, ctx); 4359 0, LLONG_MAX, ctx);
4344 if (ret) 4360 if (ret)