From 8648de2c581eeda7e412d6e38bf19e25bbb795ba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 Feb 2019 16:15:29 +0800 Subject: f2fs: add bio cache for IPU SQLite in Wal mode may trigger sequential IPU write in db-wal file, after commit d1b3e72d5490 ("f2fs: submit bio of in-place-update pages"), we lost the chance of merging page in inner managed bio cache, result in submitting more small-sized IO. So let's add temporary bio in writepages() to cache mergeable write IO as much as possible. Test case: 1. xfs_io -f /mnt/f2fs/file -c "pwrite 0 65536" -c "fsync" 2. xfs_io -f /mnt/f2fs/file -c "pwrite 0 65536" -c "fsync" Before: f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65544, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65552, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65560, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65568, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65576, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65584, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65592, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65600, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65608, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65616, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65624, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65632, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65640, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65648, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65656, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65664, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), NODE, sector = 57352, size = 4096 After: f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65544, size = 65536 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), NODE, sector = 57368, size = 4096 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8dee063c833f..dbc96b2a05fa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3202,7 +3202,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); - err = f2fs_submit_page_bio(fio); + if (fio->bio) + err = f2fs_merge_page_bio(fio); + else + err = f2fs_submit_page_bio(fio); if (!err) { update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); -- cgit v1.2.2 From 040d2bb318d1aea4f28cc22504b44e446666c86e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 May 2019 17:36:59 +0800 Subject: f2fs: fix to avoid deadloop if data_flush is on As Hagbard Celine reported: [ 615.697824] INFO: task kworker/u16:5:344 blocked for more than 120 seconds. [ 615.697825] Not tainted 5.0.15-gentoo-f2fslog #4 [ 615.697826] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 615.697827] kworker/u16:5 D 0 344 2 0x80000000 [ 615.697831] Workqueue: writeback wb_workfn (flush-259:0) [ 615.697832] Call Trace: [ 615.697836] ? __schedule+0x2c5/0x8b0 [ 615.697839] schedule+0x32/0x80 [ 615.697841] schedule_preempt_disabled+0x14/0x20 [ 615.697842] __mutex_lock.isra.8+0x2ba/0x4d0 [ 615.697845] ? log_store+0xf5/0x260 [ 615.697848] f2fs_write_data_pages+0x133/0x320 [ 615.697851] ? trace_hardirqs_on+0x2c/0xe0 [ 615.697854] do_writepages+0x41/0xd0 [ 615.697857] __filemap_fdatawrite_range+0x81/0xb0 [ 615.697859] f2fs_sync_dirty_inodes+0x1dd/0x200 [ 615.697861] f2fs_balance_fs_bg+0x2a7/0x2c0 [ 615.697863] ? up_read+0x5/0x20 [ 615.697865] ? f2fs_do_write_data_page+0x2cb/0x940 [ 615.697867] f2fs_balance_fs+0xe5/0x2c0 [ 615.697869] __write_data_page+0x1c8/0x6e0 [ 615.697873] f2fs_write_cache_pages+0x1e0/0x450 [ 615.697878] f2fs_write_data_pages+0x14b/0x320 [ 615.697880] ? trace_hardirqs_on+0x2c/0xe0 [ 615.697883] do_writepages+0x41/0xd0 [ 615.697885] __filemap_fdatawrite_range+0x81/0xb0 [ 615.697887] f2fs_sync_dirty_inodes+0x1dd/0x200 [ 615.697889] f2fs_balance_fs_bg+0x2a7/0x2c0 [ 615.697891] f2fs_write_node_pages+0x51/0x220 [ 615.697894] do_writepages+0x41/0xd0 [ 615.697897] __writeback_single_inode+0x3d/0x3d0 [ 615.697899] writeback_sb_inodes+0x1e8/0x410 [ 615.697902] __writeback_inodes_wb+0x5d/0xb0 [ 615.697904] wb_writeback+0x28f/0x340 [ 615.697906] ? cpumask_next+0x16/0x20 [ 615.697908] wb_workfn+0x33e/0x420 [ 615.697911] process_one_work+0x1a1/0x3d0 [ 615.697913] worker_thread+0x30/0x380 [ 615.697915] ? process_one_work+0x3d0/0x3d0 [ 615.697916] kthread+0x116/0x130 [ 615.697918] ? kthread_create_worker_on_cpu+0x70/0x70 [ 615.697921] ret_from_fork+0x3a/0x50 There is still deadloop in below condition: d A - do_writepages - f2fs_write_node_pages - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - f2fs_write_cache_pages - mutex_lock(&sbi->writepages) -- lock once - __write_data_page - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - f2fs_write_data_pages - mutex_lock(&sbi->writepages) -- lock again Thread A Thread B - do_writepages - f2fs_write_node_pages - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - .cp_task = current - f2fs_sync_dirty_inodes - .cp_task = current - filemap_fdatawrite - .cp_task = NULL - filemap_fdatawrite - f2fs_write_cache_pages - enter f2fs_balance_fs_bg since .cp_task is NULL - .cp_task = NULL Change as below to avoid this: - add condition to avoid holding .writepages mutex lock in path of data flush - introduce mutex lock sbi.flush_lock to exclude concurrent data flush in background. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dbc96b2a05fa..5f6e4cd2eff2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; + mutex_lock(&sbi->flush_lock); + blk_start_plug(&plug); f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); -- cgit v1.2.2 From c854f4d681365498f53ba07843a16423625aa7e9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 May 2019 23:07:25 +0800 Subject: f2fs: fix to do sanity check on segment bitmap of LFS curseg As Jungyeon Reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203233 - Reproduces gcc poc_13.c ./run.sh f2fs - Kernel messages F2FS-fs (sdb): Bitmap was wrongly set, blk:4608 kernel BUG at fs/f2fs/segment.c:2133! RIP: 0010:update_sit_entry+0x35d/0x3e0 Call Trace: f2fs_allocate_data_block+0x16c/0x5a0 do_write_page+0x57/0x100 f2fs_do_write_node_page+0x33/0xa0 __write_node_page+0x270/0x4e0 f2fs_sync_node_pages+0x5df/0x670 f2fs_write_checkpoint+0x364/0x13a0 f2fs_sync_fs+0xa3/0x130 f2fs_do_sync_file+0x1a6/0x810 do_fsync+0x33/0x60 __x64_sys_fsync+0xb/0x10 do_syscall_64+0x43/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The testcase fails because that, in fuzzed image, current segment was allocated with LFS type, its .next_blkoff should point to an unused block address, but actually, its bitmap shows it's not. So during allocation, f2fs crash when setting bitmap. Introducing sanity_check_curseg() to check such inconsistence of current in-used segment. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5f6e4cd2eff2..a034e0da004a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4239,6 +4239,41 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return init_victim_secmap(sbi); } +static int sanity_check_curseg(struct f2fs_sb_info *sbi) +{ + int i; + + /* + * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; + * In LFS curseg, all blkaddr after .next_blkoff should be unused. + */ + for (i = 0; i < NO_CHECK_TYPE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + struct seg_entry *se = get_seg_entry(sbi, curseg->segno); + unsigned int blkofs = curseg->next_blkoff; + + if (f2fs_test_bit(blkofs, se->cur_valid_map)) + goto out; + + if (curseg->alloc_type == SSR) + continue; + + for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + if (!f2fs_test_bit(blkofs, se->cur_valid_map)) + continue; +out: + f2fs_msg(sbi->sb, KERN_ERR, + "Current segment's next free block offset is " + "inconsistent with bitmap, logtype:%u, " + "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); + return -EINVAL; + } + } + return 0; +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -4334,6 +4369,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (err) return err; + err = sanity_check_curseg(sbi); + if (err) + return err; + init_min_max_mtime(sbi); return 0; } -- cgit v1.2.2 From 9227d5227b8db354d386f592f159eaa44db1c0b8 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 23 May 2019 09:49:17 +0530 Subject: f2fs: add error prints for debugging mount failure Add error prints to get more details on the mount failure. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a034e0da004a..51f57393ad5b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3537,8 +3537,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || - sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { + f2fs_msg(sbi->sb, KERN_ERR, + "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; + } return 0; } -- cgit v1.2.2 From ae4ad7ea09d32ff1b6fb908ff12f8c1bd5241b29 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:03 -0700 Subject: f2fs: Lower threshold for disable_cp_again The existing threshold for allowable holes at checkpoint=disable time is too high. The OVP space contains reserved segments, which are always in the form of free segments. These must be subtracted from the OVP value. The current threshold is meant to be the maximum value of holes of a single type we can have and still guarantee that we can fill the disk without failing to find space for a block of a given type. If the disk is full, ignoring current reserved, which only helps us, the amount of unused blocks is equal to the OVP area. Of that, there are reserved segments, which must be free segments, and the rest of the ovp area, which can come from either free segments or holes. The maximum possible amount of holes is OVP-reserved. Now, consider the disk when mounting with checkpoint=disable. We must be able to fill all available free space with either data or node blocks. When we start with checkpoint=disable, holes are locked to their current type. Say we have H of one type of hole, and H+X of the other. We can fill H of that space with arbitrary typed blocks via SSR. For the remaining H+X blocks, we may not have any of a given block type left at all. For instance, if we were to fill the disk entirely with blocks of the type with fewer holes, the H+X blocks of the opposite type would not be used. If H+X > OVP-reserved, there would be more holes than could possibly exist, and we would have failed to find a suitable block earlier on, leading to a crash in update_sit_entry. If H+X <= OVP-reserved, then the holes end up effectively masked by the OVP region in this case. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51f57393ad5b..71f8913aa5f9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -876,7 +876,9 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; block_t holes[2] = {0, 0}; /* DATA and NODE */ struct seg_entry *se; unsigned int segno; @@ -891,10 +893,10 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp || holes[NODE] > ovp) + if (holes[DATA] > ovp_holes || holes[NODE] > ovp_holes) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && - dirty_segments(sbi) > overprovision_segments(sbi)) + dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; return 0; } -- cgit v1.2.2 From 4d3aed70902f299ff2ed7048ef44f0d4d573d786 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:06 -0700 Subject: f2fs: Add option to limit required GC for checkpoint=disable This extends the checkpoint option to allow checkpoint=disable:%u[%] This allows you to specify what how much of the disk you are willing to lose access to while mounting with checkpoint=disable. If the amount lost would be higher, the mount will return -EAGAIN. This can be given as a percent of total space, or in blocks. Currently, we need to run garbage collection until the amount of holes is smaller than the OVP space. With the new option, f2fs can mark space as unusable up front instead of requiring garbage collection until the number of holes is small enough. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 71f8913aa5f9..54a3e398d1ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -873,13 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ + block_t unusable; struct seg_entry *se; unsigned int segno; @@ -893,7 +894,17 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp_holes || holes[NODE] > ovp_holes) + unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + if (unusable > ovp_holes) + return unusable - ovp_holes; + return 0; +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) -- cgit v1.2.2 From 04f0b2eaa3b3ee243df6040617b4bfbbc0404854 Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Wed, 5 Jun 2019 11:33:25 +0800 Subject: f2fs: ioctl for removing a range from F2FS This ioctl shrinks a given length (aligned to sections) from end of the main area. Any cursegs and valid blocks will be moved out before invalidating the range. This feature can be used for adjusting partition sizes online. History of the patch: Sahitya Tummala: - Add this ioctl for f2fs_compat_ioctl() as well. - Fix debugfs status to reflect the online resize changes. - Fix potential race between online resize path and allocate new data block path or gc path. Others: - Rename some identifiers. - Add some error handling branches. - Clear sbi->next_victim_seg[BG_GC/FG_GC] in shrinking range. - Implement this interface as ext4's, and change the parameter from shrunk bytes to new block count of F2FS. - During resizing, force to empty sit_journal and forbid adding new entries to it, in order to avoid invalid segno in journal after resize. - Reduce sbi->user_block_count before resize starts. - Commit the updated superblock first, and then update in-memory metadata only when the former succeeds. - Target block count must align to sections. - Write checkpoint before and after committing the new superblock, w/o CP_FSCK_FLAG respectively, so that the FS can be fixed by fsck even if resize fails after the new superblock is committed. - In free_segment_range(), reduce granularity of gc_mutex. - Add protection on curseg migration. - Add freeze_bdev() and thaw_bdev() for resize fs. - Remove CUR_MAIN_SECS and use MAIN_SECS directly for allocation. - Recover super_block and FS metadata when resize fails. - No need to clear CP_FSCK_FLAG in update_ckpt_flags(). - Clean up the sb and fs metadata update functions for resize_fs. Geert Uytterhoeven: - Use div_u64*() for 64-bit divisions Arnd Bergmann: - Not all architectures support get_user() with a 64-bit argument: ERROR: "__get_user_bad" [fs/f2fs/f2fs.ko] undefined! Use copy_from_user() here, this will always work. Signed-off-by: Qiuyang Sun Signed-off-by: Chao Yu Signed-off-by: Sahitya Tummala Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 54a3e398d1ea..198b2e68a487 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2657,6 +2657,40 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_msg(sbi->sb, KERN_NOTICE, + "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -3786,7 +3820,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3805,7 +3839,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* -- cgit v1.2.2 From dcbb4c10e6d9693cc9d6fa493b4d130b66a60c7d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 18 Jun 2019 17:48:42 +0800 Subject: f2fs: introduce f2fs_ macros to wrap f2fs_printk() - Add and use f2fs_ macros - Convert f2fs_msg to f2fs_printk - Remove level from f2fs_printk and embed the level in the format - Coalesce formats and align multi-line arguments - Remove unnecessary duplicate extern f2fs_msg f2fs.h Signed-off-by: Joe Perches Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 61 +++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 198b2e68a487..4a6133cc9112 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1757,8 +1757,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, devi = f2fs_target_device_index(sbi, blkstart); if (blkstart < FDEV(devi).start_blk || blkstart > FDEV(devi).end_blk) { - f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x", - blkstart); + f2fs_err(sbi, "Invalid block %x", blkstart); return -EIO; } blkstart -= FDEV(devi).start_blk; @@ -1771,10 +1770,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_ERR, - "(%d) %s: Unaligned zone reset attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); + f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path : "", + blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); @@ -2138,15 +2136,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; @@ -2167,15 +2164,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; @@ -2683,9 +2679,8 @@ unlock: up_write(&SIT_I(sbi)->sentry_lock); if (segno != curseg->segno) - f2fs_msg(sbi->sb, KERN_NOTICE, - "For resize: curseg of type %d: %u ==> %u", - type, segno, curseg->segno); + f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); @@ -2823,8 +2818,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Found FS corruption, run fsck to fix."); + f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); return -EIO; } @@ -3585,9 +3579,8 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { - f2fs_msg(sbi->sb, KERN_ERR, - "invalid journal entries nats %u sits %u\n", - nats_in_cursum(nat_j), sits_in_cursum(sit_j)); + f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; } @@ -4155,9 +4148,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) start = le32_to_cpu(segno_in_journal(journal, i)); if (start >= MAIN_SEGS(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong journal entry on segno %u", - start); + f2fs_err(sbi, "Wrong journal entry on segno %u", + start); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; break; @@ -4196,9 +4188,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) up_read(&curseg->journal_rwsem); if (!err && total_node_blocks != valid_node_count(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "SIT is corrupted node# %u vs %u", - total_node_blocks, valid_node_count(sbi)); + f2fs_err(sbi, "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; } @@ -4314,12 +4305,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: - f2fs_msg(sbi->sb, KERN_ERR, - "Current segment's next free block offset is " - "inconsistent with bitmap, logtype:%u, " - "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", - i, curseg->segno, curseg->alloc_type, - curseg->next_blkoff, blkofs); + f2fs_err(sbi, + "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); return -EINVAL; } } -- cgit v1.2.2 From 2d821c12176bc1184701d548c987b2e2a39429ed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jun 2019 17:59:03 +0800 Subject: f2fs: print kernel message if filesystem is inconsistent As Pavel reported, once we detect filesystem inconsistency in f2fs_inplace_write_data(), it will be better to print kernel message as we did in other places. Reported-by: Pavel Machek Signed-off-by: Chao Yu Acked-by: Pavel Machek Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4a6133cc9112..ee96e6fb1a50 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3242,6 +3242,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", + __func__, segno); return -EFAULT; } -- cgit v1.2.2 From 10f966bbf521bb9b2e497bbca496a5141f4071d0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Jun 2019 11:36:14 +0800 Subject: f2fs: use generic EFSBADCRC/EFSCORRUPTED f2fs uses EFAULT as error number to indicate filesystem is corrupted all the time, but generic filesystems use EUCLEAN for such condition, we need to change to follow others. This patch adds two new macros as below to wrap more generic error code macros, and spread them in code. EFSBADCRC EBADMSG /* Bad CRC detected */ EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ Reported-by: Pavel Machek Signed-off-by: Chao Yu Acked-by: Pavel Machek Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ee96e6fb1a50..478284db3065 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2819,7 +2819,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); - return -EIO; + return -EFSCORRUPTED; } /* start/end segment number in main_area */ @@ -3244,7 +3244,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); - return -EFAULT; + return -EFSCORRUPTED; } stat_inc_inplace_blocks(fio->sbi); @@ -4153,7 +4153,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Wrong journal entry on segno %u", start); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; break; } @@ -4193,7 +4193,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "SIT is corrupted node# %u vs %u", total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; } return err; @@ -4311,7 +4311,7 @@ out: "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", i, curseg->segno, curseg->alloc_type, curseg->next_blkoff, blkofs); - return -EINVAL; + return -EFSCORRUPTED; } } return 0; -- cgit v1.2.2 From 6e0cd4a9dd4df1a0afcb454f1e654b5c80685913 Mon Sep 17 00:00:00 2001 From: Heng Xiao Date: Wed, 3 Jul 2019 10:29:57 +0800 Subject: f2fs: fix to avoid long latency during umount In umount, we give an constand time to handle pending discard, previously, in __issue_discard_cmd() we missed to check timeout condition in loop, result in delaying long time, fix it. Signed-off-by: Heng Xiao [Chao Yu: add commit message] Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 478284db3065..2d1d7baaf1b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1497,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; -- cgit v1.2.2 From 56f3ce675103e3fb9e631cfb4131fc768bc23e9a Mon Sep 17 00:00:00 2001 From: Ocean Chen Date: Mon, 8 Jul 2019 12:34:56 +0800 Subject: f2fs: avoid out-of-range memory access blkoff_off might over 512 due to fs corrupt or security vulnerability. That should be checked before being using. Use ENTRIES_IN_SUM to protect invalid value in cur_data_blkoff. Signed-off-by: Ocean Chen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2d1d7baaf1b6..a661ac32e829 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3447,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + if (blk_off > ENTRIES_IN_SUM) { + f2fs_bug_on(sbi, 1); + f2fs_put_page(page, 1); + return -EFAULT; + } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; -- cgit v1.2.2