diff options
author | Filipe Manana <fdmanana@suse.com> | 2018-10-08 06:12:55 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2018-10-15 11:23:39 -0400 |
commit | f2d72f42d5fa3bf33761d9e47201745f624fcff5 (patch) | |
tree | cf7e20708d27e4399c421520da94b43756e23cad /fs/btrfs/tree-log.c | |
parent | ad80cf50c3f09a88eed918feeb95edaaf8d72b0a (diff) |
Btrfs: fix warning when replaying log after fsync of a tmpfile
When replaying a log which contains a tmpfile (which necessarily has a
link count of 0) we end up calling inc_nlink(), at
fs/btrfs/tree-log.c:replay_one_buffer(), which produces a warning like
the following:
[195191.943673] WARNING: CPU: 0 PID: 6924 at fs/inode.c:342 inc_nlink+0x33/0x40
[195191.943723] CPU: 0 PID: 6924 Comm: mount Not tainted 4.19.0-rc6-btrfs-next-38 #1
[195191.943724] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
[195191.943726] RIP: 0010:inc_nlink+0x33/0x40
[195191.943728] RSP: 0018:ffffb96e425e3870 EFLAGS: 00010246
[195191.943730] RAX: 0000000000000000 RBX: ffff8c0d1e6af4f0 RCX: 0000000000000006
[195191.943731] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8c0d1e6af4f0
[195191.943731] RBP: 0000000000000097 R08: 0000000000000001 R09: 0000000000000000
[195191.943732] R10: 0000000000000000 R11: 0000000000000000 R12: ffffb96e425e3a60
[195191.943733] R13: ffff8c0d10cff0c8 R14: ffff8c0d0d515348 R15: ffff8c0d78a1b3f8
[195191.943735] FS: 00007f570ee24480(0000) GS:ffff8c0dfb200000(0000) knlGS:0000000000000000
[195191.943736] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[195191.943737] CR2: 00005593286277c8 CR3: 00000000bb8f2006 CR4: 00000000003606f0
[195191.943739] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[195191.943740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[195191.943741] Call Trace:
[195191.943778] replay_one_buffer+0x797/0x7d0 [btrfs]
[195191.943802] walk_up_log_tree+0x1c1/0x250 [btrfs]
[195191.943809] ? rcu_read_lock_sched_held+0x3f/0x70
[195191.943825] walk_log_tree+0xae/0x1d0 [btrfs]
[195191.943840] btrfs_recover_log_trees+0x1d7/0x4d0 [btrfs]
[195191.943856] ? replay_dir_deletes+0x280/0x280 [btrfs]
[195191.943870] open_ctree+0x1c3b/0x22a0 [btrfs]
[195191.943887] btrfs_mount_root+0x6b4/0x800 [btrfs]
[195191.943894] ? rcu_read_lock_sched_held+0x3f/0x70
[195191.943899] ? pcpu_alloc+0x55b/0x7c0
[195191.943906] ? mount_fs+0x3b/0x140
[195191.943908] mount_fs+0x3b/0x140
[195191.943912] ? __init_waitqueue_head+0x36/0x50
[195191.943916] vfs_kern_mount+0x62/0x160
[195191.943927] btrfs_mount+0x134/0x890 [btrfs]
[195191.943936] ? rcu_read_lock_sched_held+0x3f/0x70
[195191.943938] ? pcpu_alloc+0x55b/0x7c0
[195191.943943] ? mount_fs+0x3b/0x140
[195191.943952] ? btrfs_remount+0x570/0x570 [btrfs]
[195191.943954] mount_fs+0x3b/0x140
[195191.943956] ? __init_waitqueue_head+0x36/0x50
[195191.943960] vfs_kern_mount+0x62/0x160
[195191.943963] do_mount+0x1f9/0xd40
[195191.943967] ? memdup_user+0x4b/0x70
[195191.943971] ksys_mount+0x7e/0xd0
[195191.943974] __x64_sys_mount+0x21/0x30
[195191.943977] do_syscall_64+0x60/0x1b0
[195191.943980] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[195191.943983] RIP: 0033:0x7f570e4e524a
[195191.943986] RSP: 002b:00007ffd83589478 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[195191.943989] RAX: ffffffffffffffda RBX: 0000563f335b2060 RCX: 00007f570e4e524a
[195191.943990] RDX: 0000563f335b2240 RSI: 0000563f335b2280 RDI: 0000563f335b2260
[195191.943992] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000020
[195191.943993] R10: 00000000c0ed0000 R11: 0000000000000206 R12: 0000563f335b2260
[195191.943994] R13: 0000563f335b2240 R14: 0000000000000000 R15: 00000000ffffffff
[195191.944002] irq event stamp: 8688
[195191.944010] hardirqs last enabled at (8687): [<ffffffff9cb004c3>] console_unlock+0x503/0x640
[195191.944012] hardirqs last disabled at (8688): [<ffffffff9ca037dd>] trace_hardirqs_off_thunk+0x1a/0x1c
[195191.944018] softirqs last enabled at (8638): [<ffffffff9cc0a5d1>] __set_page_dirty_nobuffers+0x101/0x150
[195191.944020] softirqs last disabled at (8634): [<ffffffff9cc26bbe>] wb_wakeup_delayed+0x2e/0x60
[195191.944022] ---[ end trace 5d6e873a9a0b811a ]---
This happens because the inode does not have the flag I_LINKABLE set,
which is a runtime only flag, not meant to be persisted, set when the
inode is created through open(2) if the flag O_EXCL is not passed to it.
Except for the warning, there are no other consequences (like corruptions
or metadata inconsistencies).
Since it's pointless to replay a tmpfile as it would be deleted in a
later phase of the log replay procedure (it has a link count of 0), fix
this by not logging tmpfiles and if a tmpfile is found in a log (created
by a kernel without this change), skip the replay of the inode.
A test case for fstests follows soon.
Fixes: 471d557afed1 ("Btrfs: fix loss of prealloc extents past i_size after fsync log replay")
CC: stable@vger.kernel.org # 4.18+
Reported-by: Martin Steigerwald <martin@lichtvoll.de>
Link: https://lore.kernel.org/linux-btrfs/3666619.NTnn27ZJZE@merkaba/
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 42 |
1 files changed, 32 insertions, 10 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7914f293cb6..a5e08a73653e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -255,6 +255,13 @@ struct walk_control { | |||
255 | /* what stage of the replay code we're currently in */ | 255 | /* what stage of the replay code we're currently in */ |
256 | int stage; | 256 | int stage; |
257 | 257 | ||
258 | /* | ||
259 | * Ignore any items from the inode currently being processed. Needs | ||
260 | * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in | ||
261 | * the LOG_WALK_REPLAY_INODES stage. | ||
262 | */ | ||
263 | bool ignore_cur_inode; | ||
264 | |||
258 | /* the root we are currently replaying */ | 265 | /* the root we are currently replaying */ |
259 | struct btrfs_root *replay_dest; | 266 | struct btrfs_root *replay_dest; |
260 | 267 | ||
@@ -2484,6 +2491,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2484 | 2491 | ||
2485 | inode_item = btrfs_item_ptr(eb, i, | 2492 | inode_item = btrfs_item_ptr(eb, i, |
2486 | struct btrfs_inode_item); | 2493 | struct btrfs_inode_item); |
2494 | /* | ||
2495 | * If we have a tmpfile (O_TMPFILE) that got fsync'ed | ||
2496 | * and never got linked before the fsync, skip it, as | ||
2497 | * replaying it is pointless since it would be deleted | ||
2498 | * later. We skip logging tmpfiles, but it's always | ||
2499 | * possible we are replaying a log created with a kernel | ||
2500 | * that used to log tmpfiles. | ||
2501 | */ | ||
2502 | if (btrfs_inode_nlink(eb, inode_item) == 0) { | ||
2503 | wc->ignore_cur_inode = true; | ||
2504 | continue; | ||
2505 | } else { | ||
2506 | wc->ignore_cur_inode = false; | ||
2507 | } | ||
2487 | ret = replay_xattr_deletes(wc->trans, root, log, | 2508 | ret = replay_xattr_deletes(wc->trans, root, log, |
2488 | path, key.objectid); | 2509 | path, key.objectid); |
2489 | if (ret) | 2510 | if (ret) |
@@ -2521,16 +2542,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2521 | root->fs_info->sectorsize); | 2542 | root->fs_info->sectorsize); |
2522 | ret = btrfs_drop_extents(wc->trans, root, inode, | 2543 | ret = btrfs_drop_extents(wc->trans, root, inode, |
2523 | from, (u64)-1, 1); | 2544 | from, (u64)-1, 1); |
2524 | /* | ||
2525 | * If the nlink count is zero here, the iput | ||
2526 | * will free the inode. We bump it to make | ||
2527 | * sure it doesn't get freed until the link | ||
2528 | * count fixup is done. | ||
2529 | */ | ||
2530 | if (!ret) { | 2545 | if (!ret) { |
2531 | if (inode->i_nlink == 0) | 2546 | /* Update the inode's nbytes. */ |
2532 | inc_nlink(inode); | ||
2533 | /* Update link count and nbytes. */ | ||
2534 | ret = btrfs_update_inode(wc->trans, | 2547 | ret = btrfs_update_inode(wc->trans, |
2535 | root, inode); | 2548 | root, inode); |
2536 | } | 2549 | } |
@@ -2545,6 +2558,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2545 | break; | 2558 | break; |
2546 | } | 2559 | } |
2547 | 2560 | ||
2561 | if (wc->ignore_cur_inode) | ||
2562 | continue; | ||
2563 | |||
2548 | if (key.type == BTRFS_DIR_INDEX_KEY && | 2564 | if (key.type == BTRFS_DIR_INDEX_KEY && |
2549 | wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { | 2565 | wc->stage == LOG_WALK_REPLAY_DIR_INDEX) { |
2550 | ret = replay_one_dir_item(wc->trans, root, path, | 2566 | ret = replay_one_dir_item(wc->trans, root, path, |
@@ -5640,7 +5656,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
5640 | if (ret) | 5656 | if (ret) |
5641 | goto end_no_trans; | 5657 | goto end_no_trans; |
5642 | 5658 | ||
5643 | if (btrfs_inode_in_log(inode, trans->transid)) { | 5659 | /* |
5660 | * Skip already logged inodes or inodes corresponding to tmpfiles | ||
5661 | * (since logging them is pointless, a link count of 0 means they | ||
5662 | * will never be accessible). | ||
5663 | */ | ||
5664 | if (btrfs_inode_in_log(inode, trans->transid) || | ||
5665 | inode->vfs_inode.i_nlink == 0) { | ||
5644 | ret = BTRFS_NO_LOG_SYNC; | 5666 | ret = BTRFS_NO_LOG_SYNC; |
5645 | goto end_no_trans; | 5667 | goto end_no_trans; |
5646 | } | 5668 | } |