aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2018-02-28 10:56:10 -0500
committerDavid Sterba <dsterba@suse.com>2018-03-01 10:18:40 -0500
commit1f250e929a9c9332fd6ea34da684afee73837cfe (patch)
tree60f5429edbab38e9c5b3dcc7be30f06ee104fe08
parent9a6509c4daa91400b52a5fd541a5521c649a8fea (diff)
Btrfs: fix log replay failure after unlink and link combination
If we have a file with 2 (or more) hard links in the same directory, remove one of the hard links, create a new file (or link an existing file) in the same directory with the name of the removed hard link, and then finally fsync the new file, we end up with a log that fails to replay, causing a mount failure. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/testdir $ touch /mnt/testdir/foo $ ln /mnt/testdir/foo /mnt/testdir/bar $ sync $ unlink /mnt/testdir/bar $ touch /mnt/testdir/bar $ xfs_io -c "fsync" /mnt/testdir/bar <power failure> $ mount /dev/sdb /mnt mount: mount(2) failed: /mnt: No such file or directory When replaying the log, for that example, we also see the following in dmesg/syslog: [71813.671307] BTRFS info (device dm-0): failed to delete reference to bar, inode 258 parent 257 [71813.674204] ------------[ cut here ]------------ [71813.675694] BTRFS: Transaction aborted (error -2) [71813.677236] WARNING: CPU: 1 PID: 13231 at fs/btrfs/inode.c:4128 __btrfs_unlink_inode+0x17b/0x355 [btrfs] [71813.679669] Modules linked in: btrfs xfs f2fs dm_flakey dm_mod dax ghash_clmulni_intel ppdev pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper evdev psmouse i2c_piix4 parport_pc i2c_core pcspkr sg serio_raw parport button sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod ata_generic sd_mod virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel floppy virtio e1000 scsi_mod [last unloaded: btrfs] [71813.679669] CPU: 1 PID: 13231 Comm: mount Tainted: G W 4.15.0-rc9-btrfs-next-56+ #1 [71813.679669] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [71813.679669] RIP: 0010:__btrfs_unlink_inode+0x17b/0x355 [btrfs] [71813.679669] RSP: 0018:ffffc90001cef738 EFLAGS: 00010286 [71813.679669] RAX: 0000000000000025 RBX: ffff880217ce4708 RCX: 0000000000000001 [71813.679669] RDX: 0000000000000000 RSI: ffffffff81c14bae RDI: 00000000ffffffff [71813.679669] RBP: ffffc90001cef7c0 R08: 0000000000000001 R09: 0000000000000001 [71813.679669] R10: ffffc90001cef5e0 R11: ffffffff8343f007 R12: ffff880217d474c8 [71813.679669] R13: 00000000fffffffe R14: ffff88021ccf1548 R15: 0000000000000101 [71813.679669] FS: 00007f7cee84c480(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000 [71813.679669] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [71813.679669] CR2: 00007f7cedc1abf9 CR3: 00000002354b4003 CR4: 00000000001606e0 [71813.679669] Call Trace: [71813.679669] btrfs_unlink_inode+0x17/0x41 [btrfs] [71813.679669] drop_one_dir_item+0xfa/0x131 [btrfs] [71813.679669] add_inode_ref+0x71e/0x851 [btrfs] [71813.679669] ? __lock_is_held+0x39/0x71 [71813.679669] ? replay_one_buffer+0x53/0x53a [btrfs] [71813.679669] replay_one_buffer+0x4a4/0x53a [btrfs] [71813.679669] ? rcu_read_unlock+0x3a/0x57 [71813.679669] ? __lock_is_held+0x39/0x71 [71813.679669] walk_up_log_tree+0x101/0x1d2 [btrfs] [71813.679669] walk_log_tree+0xad/0x188 [btrfs] [71813.679669] btrfs_recover_log_trees+0x1fa/0x31e [btrfs] [71813.679669] ? replay_one_extent+0x544/0x544 [btrfs] [71813.679669] open_ctree+0x1cf6/0x2209 [btrfs] [71813.679669] btrfs_mount_root+0x368/0x482 [btrfs] [71813.679669] ? trace_hardirqs_on_caller+0x14c/0x1a6 [71813.679669] ? __lockdep_init_map+0x176/0x1c2 [71813.679669] ? mount_fs+0x64/0x10b [71813.679669] mount_fs+0x64/0x10b [71813.679669] vfs_kern_mount+0x68/0xce [71813.679669] btrfs_mount+0x13e/0x772 [btrfs] [71813.679669] ? trace_hardirqs_on_caller+0x14c/0x1a6 [71813.679669] ? __lockdep_init_map+0x176/0x1c2 [71813.679669] ? mount_fs+0x64/0x10b [71813.679669] mount_fs+0x64/0x10b [71813.679669] vfs_kern_mount+0x68/0xce [71813.679669] do_mount+0x6e5/0x973 [71813.679669] ? memdup_user+0x3e/0x5c [71813.679669] SyS_mount+0x72/0x98 [71813.679669] entry_SYSCALL_64_fastpath+0x1e/0x8b [71813.679669] RIP: 0033:0x7f7cedf150ba [71813.679669] RSP: 002b:00007ffca71da688 EFLAGS: 00000206 [71813.679669] Code: 7f a0 e8 51 0c fd ff 48 8b 43 50 f0 0f ba a8 30 2c 00 00 02 72 17 41 83 fd fb 74 11 44 89 ee 48 c7 c7 7d 11 7f a0 e8 38 f5 8d e0 <0f> ff 44 89 e9 ba 20 10 00 00 eb 4d 48 8b 4d b0 48 8b 75 88 4c [71813.679669] ---[ end trace 83bd473fc5b4663b ]--- [71813.854764] BTRFS: error (device dm-0) in __btrfs_unlink_inode:4128: errno=-2 No such entry [71813.886994] BTRFS: error (device dm-0) in btrfs_replay_log:2307: errno=-2 No such entry (Failed to recover log tree) [71813.903357] BTRFS error (device dm-0): cleaner transaction attach returned -30 [71814.128078] BTRFS error (device dm-0): open_ctree failed This happens because the log has inode reference items for both inode 258 (the first file we created) and inode 259 (the second file created), and when processing the reference item for inode 258, we replace the corresponding item in the subvolume tree (which has two names, "foo" and "bar") witht he one in the log (which only has one name, "foo") without removing the corresponding dir index keys from the parent directory. Later, when processing the inode reference item for inode 259, which has a name of "bar" associated to it, we notice that dir index entries exist for that name and for a different inode, so we attempt to unlink that name, which fails because the inode reference item for inode 258 no longer has the name "bar" associated to it, making a call to btrfs_unlink_inode() fail with a -ENOENT error. Fix this by unlinking all the names in an inode reference item from a subvolume tree that are not present in the inode reference item found in the log tree, before overwriting it with the item from the log tree. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/inode-item.c44
-rw-r--r--fs/btrfs/tree-log.c112
3 files changed, 139 insertions, 22 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0f521ba5f2f9..da308774b8a4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3095,7 +3095,10 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
3095 u64 inode_objectid, u64 ref_objectid, int ins_len, 3095 u64 inode_objectid, u64 ref_objectid, int ins_len,
3096 int cow); 3096 int cow);
3097 3097
3098int btrfs_find_name_in_ext_backref(struct btrfs_path *path, 3098int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
3099 const char *name,
3100 int name_len, struct btrfs_inode_ref **ref_ret);
3101int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
3099 u64 ref_objectid, const char *name, 3102 u64 ref_objectid, const char *name,
3100 int name_len, 3103 int name_len,
3101 struct btrfs_inode_extref **extref_ret); 3104 struct btrfs_inode_extref **extref_ret);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 39c968f80157..65e1a76bf755 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -22,10 +22,10 @@
22#include "transaction.h" 22#include "transaction.h"
23#include "print-tree.h" 23#include "print-tree.h"
24 24
25static int find_name_in_backref(struct btrfs_path *path, const char *name, 25int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot,
26 int name_len, struct btrfs_inode_ref **ref_ret) 26 const char *name,
27 int name_len, struct btrfs_inode_ref **ref_ret)
27{ 28{
28 struct extent_buffer *leaf;
29 struct btrfs_inode_ref *ref; 29 struct btrfs_inode_ref *ref;
30 unsigned long ptr; 30 unsigned long ptr;
31 unsigned long name_ptr; 31 unsigned long name_ptr;
@@ -33,9 +33,8 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
33 u32 cur_offset = 0; 33 u32 cur_offset = 0;
34 int len; 34 int len;
35 35
36 leaf = path->nodes[0]; 36 item_size = btrfs_item_size_nr(leaf, slot);
37 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 37 ptr = btrfs_item_ptr_offset(leaf, slot);
38 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
39 while (cur_offset < item_size) { 38 while (cur_offset < item_size) {
40 ref = (struct btrfs_inode_ref *)(ptr + cur_offset); 39 ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
41 len = btrfs_inode_ref_name_len(leaf, ref); 40 len = btrfs_inode_ref_name_len(leaf, ref);
@@ -44,18 +43,19 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
44 if (len != name_len) 43 if (len != name_len)
45 continue; 44 continue;
46 if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) { 45 if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
47 *ref_ret = ref; 46 if (ref_ret)
47 *ref_ret = ref;
48 return 1; 48 return 1;
49 } 49 }
50 } 50 }
51 return 0; 51 return 0;
52} 52}
53 53
54int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, 54int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
55 u64 ref_objectid,
55 const char *name, int name_len, 56 const char *name, int name_len,
56 struct btrfs_inode_extref **extref_ret) 57 struct btrfs_inode_extref **extref_ret)
57{ 58{
58 struct extent_buffer *leaf;
59 struct btrfs_inode_extref *extref; 59 struct btrfs_inode_extref *extref;
60 unsigned long ptr; 60 unsigned long ptr;
61 unsigned long name_ptr; 61 unsigned long name_ptr;
@@ -63,9 +63,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
63 u32 cur_offset = 0; 63 u32 cur_offset = 0;
64 int ref_name_len; 64 int ref_name_len;
65 65
66 leaf = path->nodes[0]; 66 item_size = btrfs_item_size_nr(leaf, slot);
67 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 67 ptr = btrfs_item_ptr_offset(leaf, slot);
68 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
69 68
70 /* 69 /*
71 * Search all extended backrefs in this item. We're only 70 * Search all extended backrefs in this item. We're only
@@ -113,7 +112,9 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
113 return ERR_PTR(ret); 112 return ERR_PTR(ret);
114 if (ret > 0) 113 if (ret > 0)
115 return NULL; 114 return NULL;
116 if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref)) 115 if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
116 ref_objectid, name, name_len,
117 &extref))
117 return NULL; 118 return NULL;
118 return extref; 119 return extref;
119} 120}
@@ -155,7 +156,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
155 * This should always succeed so error here will make the FS 156 * This should always succeed so error here will make the FS
156 * readonly. 157 * readonly.
157 */ 158 */
158 if (!btrfs_find_name_in_ext_backref(path, ref_objectid, 159 if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
160 ref_objectid,
159 name, name_len, &extref)) { 161 name, name_len, &extref)) {
160 btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); 162 btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
161 ret = -EROFS; 163 ret = -EROFS;
@@ -225,7 +227,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
225 } else if (ret < 0) { 227 } else if (ret < 0) {
226 goto out; 228 goto out;
227 } 229 }
228 if (!find_name_in_backref(path, name, name_len, &ref)) { 230 if (!btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
231 name, name_len, &ref)) {
229 ret = -ENOENT; 232 ret = -ENOENT;
230 search_ext_refs = 1; 233 search_ext_refs = 1;
231 goto out; 234 goto out;
@@ -293,7 +296,9 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
293 ret = btrfs_insert_empty_item(trans, root, path, &key, 296 ret = btrfs_insert_empty_item(trans, root, path, &key,
294 ins_len); 297 ins_len);
295 if (ret == -EEXIST) { 298 if (ret == -EEXIST) {
296 if (btrfs_find_name_in_ext_backref(path, ref_objectid, 299 if (btrfs_find_name_in_ext_backref(path->nodes[0],
300 path->slots[0],
301 ref_objectid,
297 name, name_len, NULL)) 302 name, name_len, NULL))
298 goto out; 303 goto out;
299 304
@@ -351,7 +356,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
351 if (ret == -EEXIST) { 356 if (ret == -EEXIST) {
352 u32 old_size; 357 u32 old_size;
353 358
354 if (find_name_in_backref(path, name, name_len, &ref)) 359 if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
360 name, name_len, &ref))
355 goto out; 361 goto out;
356 362
357 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 363 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
@@ -365,7 +371,9 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
365 ret = 0; 371 ret = 0;
366 } else if (ret < 0) { 372 } else if (ret < 0) {
367 if (ret == -EOVERFLOW) { 373 if (ret == -EOVERFLOW) {
368 if (find_name_in_backref(path, name, name_len, &ref)) 374 if (btrfs_find_name_in_backref(path->nodes[0],
375 path->slots[0],
376 name, name_len, &ref))
369 ret = -EEXIST; 377 ret = -EEXIST;
370 else 378 else
371 ret = -EMLINK; 379 ret = -EMLINK;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f7a18751314a..4c50f823949c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -966,7 +966,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
966 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 966 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
967 967
968 if (key->type == BTRFS_INODE_EXTREF_KEY) { 968 if (key->type == BTRFS_INODE_EXTREF_KEY) {
969 if (btrfs_find_name_in_ext_backref(path, ref_objectid, 969 if (btrfs_find_name_in_ext_backref(path->nodes[0],
970 path->slots[0],
971 ref_objectid,
970 name, namelen, NULL)) 972 name, namelen, NULL))
971 match = 1; 973 match = 1;
972 974
@@ -1190,7 +1192,8 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1190 read_extent_buffer(eb, *name, (unsigned long)&extref->name, 1192 read_extent_buffer(eb, *name, (unsigned long)&extref->name,
1191 *namelen); 1193 *namelen);
1192 1194
1193 *index = btrfs_inode_extref_index(eb, extref); 1195 if (index)
1196 *index = btrfs_inode_extref_index(eb, extref);
1194 if (parent_objectid) 1197 if (parent_objectid)
1195 *parent_objectid = btrfs_inode_extref_parent(eb, extref); 1198 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
1196 1199
@@ -1211,12 +1214,102 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
1211 1214
1212 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); 1215 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
1213 1216
1214 *index = btrfs_inode_ref_index(eb, ref); 1217 if (index)
1218 *index = btrfs_inode_ref_index(eb, ref);
1215 1219
1216 return 0; 1220 return 0;
1217} 1221}
1218 1222
1219/* 1223/*
1224 * Take an inode reference item from the log tree and iterate all names from the
1225 * inode reference item in the subvolume tree with the same key (if it exists).
1226 * For any name that is not in the inode reference item from the log tree, do a
1227 * proper unlink of that name (that is, remove its entry from the inode
1228 * reference item and both dir index keys).
1229 */
1230static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
1231 struct btrfs_root *root,
1232 struct btrfs_path *path,
1233 struct btrfs_inode *inode,
1234 struct extent_buffer *log_eb,
1235 int log_slot,
1236 struct btrfs_key *key)
1237{
1238 int ret;
1239 unsigned long ref_ptr;
1240 unsigned long ref_end;
1241 struct extent_buffer *eb;
1242
1243again:
1244 btrfs_release_path(path);
1245 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
1246 if (ret > 0) {
1247 ret = 0;
1248 goto out;
1249 }
1250 if (ret < 0)
1251 goto out;
1252
1253 eb = path->nodes[0];
1254 ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
1255 ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]);
1256 while (ref_ptr < ref_end) {
1257 char *name = NULL;
1258 int namelen;
1259 u64 parent_id;
1260
1261 if (key->type == BTRFS_INODE_EXTREF_KEY) {
1262 ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
1263 NULL, &parent_id);
1264 } else {
1265 parent_id = key->offset;
1266 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1267 NULL);
1268 }
1269 if (ret)
1270 goto out;
1271
1272 if (key->type == BTRFS_INODE_EXTREF_KEY)
1273 ret = btrfs_find_name_in_ext_backref(log_eb, log_slot,
1274 parent_id, name,
1275 namelen, NULL);
1276 else
1277 ret = btrfs_find_name_in_backref(log_eb, log_slot, name,
1278 namelen, NULL);
1279
1280 if (!ret) {
1281 struct inode *dir;
1282
1283 btrfs_release_path(path);
1284 dir = read_one_inode(root, parent_id);
1285 if (!dir) {
1286 ret = -ENOENT;
1287 kfree(name);
1288 goto out;
1289 }
1290 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
1291 inode, name, namelen);
1292 kfree(name);
1293 iput(dir);
1294 if (ret)
1295 goto out;
1296 goto again;
1297 }
1298
1299 kfree(name);
1300 ref_ptr += namelen;
1301 if (key->type == BTRFS_INODE_EXTREF_KEY)
1302 ref_ptr += sizeof(struct btrfs_inode_extref);
1303 else
1304 ref_ptr += sizeof(struct btrfs_inode_ref);
1305 }
1306 ret = 0;
1307 out:
1308 btrfs_release_path(path);
1309 return ret;
1310}
1311
1312/*
1220 * replay one inode back reference item found in the log tree. 1313 * replay one inode back reference item found in the log tree.
1221 * eb, slot and key refer to the buffer and key found in the log tree. 1314 * eb, slot and key refer to the buffer and key found in the log tree.
1222 * root is the destination we are replaying into, and path is for temp 1315 * root is the destination we are replaying into, and path is for temp
@@ -1344,6 +1437,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1344 } 1437 }
1345 } 1438 }
1346 1439
1440 /*
1441 * Before we overwrite the inode reference item in the subvolume tree
1442 * with the item from the log tree, we must unlink all names from the
1443 * parent directory that are in the subvolume's tree inode reference
1444 * item, otherwise we end up with an inconsistent subvolume tree where
1445 * dir index entries exist for a name but there is no inode reference
1446 * item with the same name.
1447 */
1448 ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot,
1449 key);
1450 if (ret)
1451 goto out;
1452
1347 /* finally write the back reference in the inode */ 1453 /* finally write the back reference in the inode */
1348 ret = overwrite_item(trans, root, path, eb, slot, key); 1454 ret = overwrite_item(trans, root, path, eb, slot, key);
1349out: 1455out: