aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-02-23 14:53:35 -0500
committerChris Mason <clm@fb.com>2015-03-26 20:55:51 -0400
commit4f764e5153616fccaed38e7524d6f5f89a49a060 (patch)
treead2984601de69eb593c69031ff97d1c9baa2bd45
parent9b305632cb220ffa495cbbefb313f3c34d4decc4 (diff)
Btrfs: remove deleted xattrs on fsync log replay
If we deleted xattrs from a file and fsynced the file, after a log replay the xattrs would remain associated to the file. This was an unexpected behaviour and differs from what other filesystems do, such as for example xfs and ext3/4. Fix this by, on fsync log replay, check if every xattr in the fs/subvol tree (that belongs to a logged inode) has a matching xattr in the log, and if it does not, delete it from the fs/subvol tree. This is a similar approach to what we do for dentries when we replay a directory from the fsync log. This issue is trivial to reproduce, and the following excerpt from my test for xfstests triggers the issue: _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create out test file and add 3 xattrs to it. touch $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr1 -v val1 $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr2 -v val2 $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr3 -v val3 $SCRATCH_MNT/foobar # Make sure everything is durably persisted. sync # Now delete the second xattr and fsync the inode. $SETFATTR_PROG -x user.attr2 $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar _crash_and_mount # After the fsync log is replayed, the file should have only 2 xattrs, the ones # named user.attr1 and user.attr3. The btrfs fsync log replay bug left the file # with the 3 xattrs that we had before deleting the second one and fsyncing the # file. echo "xattr names and values after first fsync log replay:" $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch # Now write some data to our file, fsync it, remove the first xattr, add a new # hard link to our file and commit the fsync log by fsyncing some other new # file. This is to verify that after log replay our first xattr does not exist # anymore. echo "hello world!" >> $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar $SETFATTR_PROG -x user.attr1 $SCRATCH_MNT/foobar ln $SCRATCH_MNT/foobar $SCRATCH_MNT/foobar_link touch $SCRATCH_MNT/qwerty $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/qwerty _crash_and_mount # Now only the xattr with name user.attr3 should be set in our file. echo "xattr names and values after second fsync log replay:" $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch status=0 exit The expected golden output, which is produced with this patch applied or when testing against xfs or ext3/4, is: xattr names and values after first fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr3="val3" xattr names and values after second fsync log replay: # file: SCRATCH_MNT/foobar user.attr3="val3" Without this patch applied, the output is: xattr names and values after first fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr2="val2" user.attr3="val3" xattr names and values after second fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr2="val2" user.attr3="val3" A patch with a test case for xfstests follows soon. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/tree-log.c123
1 files changed, 109 insertions, 14 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 066e754b1294..6c95159302dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1951,6 +1951,104 @@ out:
1951 return ret; 1951 return ret;
1952} 1952}
1953 1953
1954static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
1955 struct btrfs_root *root,
1956 struct btrfs_root *log,
1957 struct btrfs_path *path,
1958 const u64 ino)
1959{
1960 struct btrfs_key search_key;
1961 struct btrfs_path *log_path;
1962 int i;
1963 int nritems;
1964 int ret;
1965
1966 log_path = btrfs_alloc_path();
1967 if (!log_path)
1968 return -ENOMEM;
1969
1970 search_key.objectid = ino;
1971 search_key.type = BTRFS_XATTR_ITEM_KEY;
1972 search_key.offset = 0;
1973again:
1974 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
1975 if (ret < 0)
1976 goto out;
1977process_leaf:
1978 nritems = btrfs_header_nritems(path->nodes[0]);
1979 for (i = path->slots[0]; i < nritems; i++) {
1980 struct btrfs_key key;
1981 struct btrfs_dir_item *di;
1982 struct btrfs_dir_item *log_di;
1983 u32 total_size;
1984 u32 cur;
1985
1986 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1987 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
1988 ret = 0;
1989 goto out;
1990 }
1991
1992 di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
1993 total_size = btrfs_item_size_nr(path->nodes[0], i);
1994 cur = 0;
1995 while (cur < total_size) {
1996 u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
1997 u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
1998 u32 this_len = sizeof(*di) + name_len + data_len;
1999 char *name;
2000
2001 name = kmalloc(name_len, GFP_NOFS);
2002 if (!name) {
2003 ret = -ENOMEM;
2004 goto out;
2005 }
2006 read_extent_buffer(path->nodes[0], name,
2007 (unsigned long)(di + 1), name_len);
2008
2009 log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
2010 name, name_len, 0);
2011 btrfs_release_path(log_path);
2012 if (!log_di) {
2013 /* Doesn't exist in log tree, so delete it. */
2014 btrfs_release_path(path);
2015 di = btrfs_lookup_xattr(trans, root, path, ino,
2016 name, name_len, -1);
2017 kfree(name);
2018 if (IS_ERR(di)) {
2019 ret = PTR_ERR(di);
2020 goto out;
2021 }
2022 ASSERT(di);
2023 ret = btrfs_delete_one_dir_name(trans, root,
2024 path, di);
2025 if (ret)
2026 goto out;
2027 btrfs_release_path(path);
2028 search_key = key;
2029 goto again;
2030 }
2031 kfree(name);
2032 if (IS_ERR(log_di)) {
2033 ret = PTR_ERR(log_di);
2034 goto out;
2035 }
2036 cur += this_len;
2037 di = (struct btrfs_dir_item *)((char *)di + this_len);
2038 }
2039 }
2040 ret = btrfs_next_leaf(root, path);
2041 if (ret > 0)
2042 ret = 0;
2043 else if (ret == 0)
2044 goto process_leaf;
2045out:
2046 btrfs_free_path(log_path);
2047 btrfs_release_path(path);
2048 return ret;
2049}
2050
2051
1954/* 2052/*
1955 * deletion replay happens before we copy any new directory items 2053 * deletion replay happens before we copy any new directory items
1956 * out of the log or out of backreferences from inodes. It 2054 * out of the log or out of backreferences from inodes. It
@@ -2104,6 +2202,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2104 2202
2105 inode_item = btrfs_item_ptr(eb, i, 2203 inode_item = btrfs_item_ptr(eb, i,
2106 struct btrfs_inode_item); 2204 struct btrfs_inode_item);
2205 ret = replay_xattr_deletes(wc->trans, root, log,
2206 path, key.objectid);
2207 if (ret)
2208 break;
2107 mode = btrfs_inode_mode(eb, inode_item); 2209 mode = btrfs_inode_mode(eb, inode_item);
2108 if (S_ISDIR(mode)) { 2210 if (S_ISDIR(mode)) {
2109 ret = replay_dir_deletes(wc->trans, 2211 ret = replay_dir_deletes(wc->trans,
@@ -4072,10 +4174,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4072 if (S_ISDIR(inode->i_mode)) { 4174 if (S_ISDIR(inode->i_mode)) {
4073 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4175 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
4074 4176
4075 if (inode_only == LOG_INODE_EXISTS) { 4177 if (inode_only == LOG_INODE_EXISTS)
4076 max_key_type = BTRFS_INODE_EXTREF_KEY; 4178 max_key_type = BTRFS_XATTR_ITEM_KEY;
4077 max_key.type = max_key_type;
4078 }
4079 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4179 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
4080 } else { 4180 } else {
4081 if (inode_only == LOG_INODE_EXISTS) { 4181 if (inode_only == LOG_INODE_EXISTS) {
@@ -4100,7 +4200,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4100 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4200 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4101 &BTRFS_I(inode)->runtime_flags)) { 4201 &BTRFS_I(inode)->runtime_flags)) {
4102 if (inode_only == LOG_INODE_EXISTS) { 4202 if (inode_only == LOG_INODE_EXISTS) {
4103 max_key.type = BTRFS_INODE_EXTREF_KEY; 4203 max_key.type = BTRFS_XATTR_ITEM_KEY;
4104 ret = drop_objectid_items(trans, log, path, ino, 4204 ret = drop_objectid_items(trans, log, path, ino,
4105 max_key.type); 4205 max_key.type);
4106 } else { 4206 } else {
@@ -4111,17 +4211,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4111 ret = btrfs_truncate_inode_items(trans, log, 4211 ret = btrfs_truncate_inode_items(trans, log,
4112 inode, 0, 0); 4212 inode, 0, 0);
4113 } 4213 }
4114 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, 4214 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4115 &BTRFS_I(inode)->runtime_flags) || 4215 &BTRFS_I(inode)->runtime_flags) ||
4116 inode_only == LOG_INODE_EXISTS) { 4216 inode_only == LOG_INODE_EXISTS) {
4117 if (inode_only == LOG_INODE_ALL) { 4217 if (inode_only == LOG_INODE_ALL)
4118 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4119 &BTRFS_I(inode)->runtime_flags);
4120 fast_search = true; 4218 fast_search = true;
4121 max_key.type = BTRFS_XATTR_ITEM_KEY; 4219 max_key.type = BTRFS_XATTR_ITEM_KEY;
4122 } else {
4123 max_key.type = BTRFS_INODE_EXTREF_KEY;
4124 }
4125 ret = drop_objectid_items(trans, log, path, ino, 4220 ret = drop_objectid_items(trans, log, path, ino,
4126 max_key.type); 4221 max_key.type);
4127 } else { 4222 } else {