diff options
author | Filipe Manana <fdmanana@suse.com> | 2015-02-23 14:53:35 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-03-26 20:55:51 -0400 |
commit | 4f764e5153616fccaed38e7524d6f5f89a49a060 (patch) | |
tree | ad2984601de69eb593c69031ff97d1c9baa2bd45 | |
parent | 9b305632cb220ffa495cbbefb313f3c34d4decc4 (diff) |
Btrfs: remove deleted xattrs on fsync log replay
If we deleted xattrs from a file and fsynced the file, after a log replay
the xattrs would remain associated to the file. This was an unexpected
behaviour and differs from what other filesystems do, such as for example
xfs and ext3/4.
Fix this by, on fsync log replay, check if every xattr in the fs/subvol
tree (that belongs to a logged inode) has a matching xattr in the log,
and if it does not, delete it from the fs/subvol tree. This is a similar
approach to what we do for dentries when we replay a directory from the
fsync log.
This issue is trivial to reproduce, and the following excerpt from my
test for xfstests triggers the issue:
_crash_and_mount()
{
# Simulate a crash/power loss.
_load_flakey_table $FLAKEY_DROP_WRITES
_unmount_flakey
_load_flakey_table $FLAKEY_ALLOW_WRITES
_mount_flakey
}
rm -f $seqres.full
_scratch_mkfs >> $seqres.full 2>&1
_init_flakey
_mount_flakey
# Create out test file and add 3 xattrs to it.
touch $SCRATCH_MNT/foobar
$SETFATTR_PROG -n user.attr1 -v val1 $SCRATCH_MNT/foobar
$SETFATTR_PROG -n user.attr2 -v val2 $SCRATCH_MNT/foobar
$SETFATTR_PROG -n user.attr3 -v val3 $SCRATCH_MNT/foobar
# Make sure everything is durably persisted.
sync
# Now delete the second xattr and fsync the inode.
$SETFATTR_PROG -x user.attr2 $SCRATCH_MNT/foobar
$XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
_crash_and_mount
# After the fsync log is replayed, the file should have only 2 xattrs, the ones
# named user.attr1 and user.attr3. The btrfs fsync log replay bug left the file
# with the 3 xattrs that we had before deleting the second one and fsyncing the
# file.
echo "xattr names and values after first fsync log replay:"
$GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch
# Now write some data to our file, fsync it, remove the first xattr, add a new
# hard link to our file and commit the fsync log by fsyncing some other new
# file. This is to verify that after log replay our first xattr does not exist
# anymore.
echo "hello world!" >> $SCRATCH_MNT/foobar
$XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
$SETFATTR_PROG -x user.attr1 $SCRATCH_MNT/foobar
ln $SCRATCH_MNT/foobar $SCRATCH_MNT/foobar_link
touch $SCRATCH_MNT/qwerty
$XFS_IO_PROG -c "fsync" $SCRATCH_MNT/qwerty
_crash_and_mount
# Now only the xattr with name user.attr3 should be set in our file.
echo "xattr names and values after second fsync log replay:"
$GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch
status=0
exit
The expected golden output, which is produced with this patch applied or
when testing against xfs or ext3/4, is:
xattr names and values after first fsync log replay:
# file: SCRATCH_MNT/foobar
user.attr1="val1"
user.attr3="val3"
xattr names and values after second fsync log replay:
# file: SCRATCH_MNT/foobar
user.attr3="val3"
Without this patch applied, the output is:
xattr names and values after first fsync log replay:
# file: SCRATCH_MNT/foobar
user.attr1="val1"
user.attr2="val2"
user.attr3="val3"
xattr names and values after second fsync log replay:
# file: SCRATCH_MNT/foobar
user.attr1="val1"
user.attr2="val2"
user.attr3="val3"
A patch with a test case for xfstests follows soon.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/tree-log.c | 123 |
1 files changed, 109 insertions, 14 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 066e754b1294..6c95159302dd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1951,6 +1951,104 @@ out: | |||
1951 | return ret; | 1951 | return ret; |
1952 | } | 1952 | } |
1953 | 1953 | ||
1954 | static int replay_xattr_deletes(struct btrfs_trans_handle *trans, | ||
1955 | struct btrfs_root *root, | ||
1956 | struct btrfs_root *log, | ||
1957 | struct btrfs_path *path, | ||
1958 | const u64 ino) | ||
1959 | { | ||
1960 | struct btrfs_key search_key; | ||
1961 | struct btrfs_path *log_path; | ||
1962 | int i; | ||
1963 | int nritems; | ||
1964 | int ret; | ||
1965 | |||
1966 | log_path = btrfs_alloc_path(); | ||
1967 | if (!log_path) | ||
1968 | return -ENOMEM; | ||
1969 | |||
1970 | search_key.objectid = ino; | ||
1971 | search_key.type = BTRFS_XATTR_ITEM_KEY; | ||
1972 | search_key.offset = 0; | ||
1973 | again: | ||
1974 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
1975 | if (ret < 0) | ||
1976 | goto out; | ||
1977 | process_leaf: | ||
1978 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
1979 | for (i = path->slots[0]; i < nritems; i++) { | ||
1980 | struct btrfs_key key; | ||
1981 | struct btrfs_dir_item *di; | ||
1982 | struct btrfs_dir_item *log_di; | ||
1983 | u32 total_size; | ||
1984 | u32 cur; | ||
1985 | |||
1986 | btrfs_item_key_to_cpu(path->nodes[0], &key, i); | ||
1987 | if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) { | ||
1988 | ret = 0; | ||
1989 | goto out; | ||
1990 | } | ||
1991 | |||
1992 | di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item); | ||
1993 | total_size = btrfs_item_size_nr(path->nodes[0], i); | ||
1994 | cur = 0; | ||
1995 | while (cur < total_size) { | ||
1996 | u16 name_len = btrfs_dir_name_len(path->nodes[0], di); | ||
1997 | u16 data_len = btrfs_dir_data_len(path->nodes[0], di); | ||
1998 | u32 this_len = sizeof(*di) + name_len + data_len; | ||
1999 | char *name; | ||
2000 | |||
2001 | name = kmalloc(name_len, GFP_NOFS); | ||
2002 | if (!name) { | ||
2003 | ret = -ENOMEM; | ||
2004 | goto out; | ||
2005 | } | ||
2006 | read_extent_buffer(path->nodes[0], name, | ||
2007 | (unsigned long)(di + 1), name_len); | ||
2008 | |||
2009 | log_di = btrfs_lookup_xattr(NULL, log, log_path, ino, | ||
2010 | name, name_len, 0); | ||
2011 | btrfs_release_path(log_path); | ||
2012 | if (!log_di) { | ||
2013 | /* Doesn't exist in log tree, so delete it. */ | ||
2014 | btrfs_release_path(path); | ||
2015 | di = btrfs_lookup_xattr(trans, root, path, ino, | ||
2016 | name, name_len, -1); | ||
2017 | kfree(name); | ||
2018 | if (IS_ERR(di)) { | ||
2019 | ret = PTR_ERR(di); | ||
2020 | goto out; | ||
2021 | } | ||
2022 | ASSERT(di); | ||
2023 | ret = btrfs_delete_one_dir_name(trans, root, | ||
2024 | path, di); | ||
2025 | if (ret) | ||
2026 | goto out; | ||
2027 | btrfs_release_path(path); | ||
2028 | search_key = key; | ||
2029 | goto again; | ||
2030 | } | ||
2031 | kfree(name); | ||
2032 | if (IS_ERR(log_di)) { | ||
2033 | ret = PTR_ERR(log_di); | ||
2034 | goto out; | ||
2035 | } | ||
2036 | cur += this_len; | ||
2037 | di = (struct btrfs_dir_item *)((char *)di + this_len); | ||
2038 | } | ||
2039 | } | ||
2040 | ret = btrfs_next_leaf(root, path); | ||
2041 | if (ret > 0) | ||
2042 | ret = 0; | ||
2043 | else if (ret == 0) | ||
2044 | goto process_leaf; | ||
2045 | out: | ||
2046 | btrfs_free_path(log_path); | ||
2047 | btrfs_release_path(path); | ||
2048 | return ret; | ||
2049 | } | ||
2050 | |||
2051 | |||
1954 | /* | 2052 | /* |
1955 | * deletion replay happens before we copy any new directory items | 2053 | * deletion replay happens before we copy any new directory items |
1956 | * out of the log or out of backreferences from inodes. It | 2054 | * out of the log or out of backreferences from inodes. It |
@@ -2104,6 +2202,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2104 | 2202 | ||
2105 | inode_item = btrfs_item_ptr(eb, i, | 2203 | inode_item = btrfs_item_ptr(eb, i, |
2106 | struct btrfs_inode_item); | 2204 | struct btrfs_inode_item); |
2205 | ret = replay_xattr_deletes(wc->trans, root, log, | ||
2206 | path, key.objectid); | ||
2207 | if (ret) | ||
2208 | break; | ||
2107 | mode = btrfs_inode_mode(eb, inode_item); | 2209 | mode = btrfs_inode_mode(eb, inode_item); |
2108 | if (S_ISDIR(mode)) { | 2210 | if (S_ISDIR(mode)) { |
2109 | ret = replay_dir_deletes(wc->trans, | 2211 | ret = replay_dir_deletes(wc->trans, |
@@ -4072,10 +4174,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4072 | if (S_ISDIR(inode->i_mode)) { | 4174 | if (S_ISDIR(inode->i_mode)) { |
4073 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; | 4175 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; |
4074 | 4176 | ||
4075 | if (inode_only == LOG_INODE_EXISTS) { | 4177 | if (inode_only == LOG_INODE_EXISTS) |
4076 | max_key_type = BTRFS_INODE_EXTREF_KEY; | 4178 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
4077 | max_key.type = max_key_type; | ||
4078 | } | ||
4079 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 4179 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
4080 | } else { | 4180 | } else { |
4081 | if (inode_only == LOG_INODE_EXISTS) { | 4181 | if (inode_only == LOG_INODE_EXISTS) { |
@@ -4100,7 +4200,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4100 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 4200 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
4101 | &BTRFS_I(inode)->runtime_flags)) { | 4201 | &BTRFS_I(inode)->runtime_flags)) { |
4102 | if (inode_only == LOG_INODE_EXISTS) { | 4202 | if (inode_only == LOG_INODE_EXISTS) { |
4103 | max_key.type = BTRFS_INODE_EXTREF_KEY; | 4203 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4104 | ret = drop_objectid_items(trans, log, path, ino, | 4204 | ret = drop_objectid_items(trans, log, path, ino, |
4105 | max_key.type); | 4205 | max_key.type); |
4106 | } else { | 4206 | } else { |
@@ -4111,17 +4211,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4111 | ret = btrfs_truncate_inode_items(trans, log, | 4211 | ret = btrfs_truncate_inode_items(trans, log, |
4112 | inode, 0, 0); | 4212 | inode, 0, 0); |
4113 | } | 4213 | } |
4114 | } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, | 4214 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, |
4115 | &BTRFS_I(inode)->runtime_flags) || | 4215 | &BTRFS_I(inode)->runtime_flags) || |
4116 | inode_only == LOG_INODE_EXISTS) { | 4216 | inode_only == LOG_INODE_EXISTS) { |
4117 | if (inode_only == LOG_INODE_ALL) { | 4217 | if (inode_only == LOG_INODE_ALL) |
4118 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4119 | &BTRFS_I(inode)->runtime_flags); | ||
4120 | fast_search = true; | 4218 | fast_search = true; |
4121 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 4219 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4122 | } else { | ||
4123 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4124 | } | ||
4125 | ret = drop_objectid_items(trans, log, path, ino, | 4220 | ret = drop_objectid_items(trans, log, path, ino, |
4126 | max_key.type); | 4221 | max_key.type); |
4127 | } else { | 4222 | } else { |