diff options
author | Jaegeuk Kim <jaegeuk@kernel.org> | 2016-09-19 20:55:10 -0400 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk@kernel.org> | 2016-09-30 13:05:46 -0400 |
commit | a468f0ef516fda9c7d91bb550d458e853d76955e (patch) | |
tree | 87906aa2c4e349c0b04e8276de05cf956887e150 | |
parent | 5d4c0af41fd4cc26cb75af4f3de7fb63c91209c1 (diff) |
f2fs: use crc and cp version to determine roll-forward recovery
Previously, we used cp_version only to detect recoverable dnodes.
In order to avoid same garbage cp_version, we needed to truncate the next
dnode during checkpoint, resulting in additional discard or data write.
If we can distinguish this by using crc in addition to cp_version, we can
remove this overhead.
There is backward compatibility concern where it changes node_footer layout.
So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to
detect new layout. New layout will be activated only when this flag is set.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r-- | fs/f2fs/checkpoint.c | 21 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 1 | ||||
-rw-r--r-- | fs/f2fs/node.h | 77 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 36 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 22 | ||||
-rw-r--r-- | fs/f2fs/super.c | 5 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 1 |
7 files changed, 63 insertions, 100 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index df56a43f982e..9c6439b0e8d2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -992,7 +992,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) | |||
992 | static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | 992 | static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
993 | { | 993 | { |
994 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 994 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
995 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | ||
996 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 995 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
997 | unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; | 996 | unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; |
998 | nid_t last_nid = nm_i->next_scan_nid; | 997 | nid_t last_nid = nm_i->next_scan_nid; |
@@ -1001,19 +1000,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1001 | __u32 crc32 = 0; | 1000 | __u32 crc32 = 0; |
1002 | int i; | 1001 | int i; |
1003 | int cp_payload_blks = __cp_payload(sbi); | 1002 | int cp_payload_blks = __cp_payload(sbi); |
1004 | block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); | ||
1005 | bool invalidate = false; | ||
1006 | struct super_block *sb = sbi->sb; | 1003 | struct super_block *sb = sbi->sb; |
1007 | struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); | 1004 | struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); |
1008 | u64 kbytes_written; | 1005 | u64 kbytes_written; |
1009 | 1006 | ||
1010 | /* | ||
1011 | * This avoids to conduct wrong roll-forward operations and uses | ||
1012 | * metapages, so should be called prior to sync_meta_pages below. | ||
1013 | */ | ||
1014 | if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk)) | ||
1015 | invalidate = true; | ||
1016 | |||
1017 | /* Flush all the NAT/SIT pages */ | 1007 | /* Flush all the NAT/SIT pages */ |
1018 | while (get_pages(sbi, F2FS_DIRTY_META)) { | 1008 | while (get_pages(sbi, F2FS_DIRTY_META)) { |
1019 | sync_meta_pages(sbi, META, LONG_MAX); | 1009 | sync_meta_pages(sbi, META, LONG_MAX); |
@@ -1089,6 +1079,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1089 | if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) | 1079 | if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) |
1090 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); | 1080 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); |
1091 | 1081 | ||
1082 | /* set this flag to activate crc|cp_ver for recovery */ | ||
1083 | set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); | ||
1084 | |||
1092 | /* update SIT/NAT bitmap */ | 1085 | /* update SIT/NAT bitmap */ |
1093 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); | 1086 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); |
1094 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | 1087 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); |
@@ -1154,14 +1147,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1154 | /* wait for previous submitted meta pages writeback */ | 1147 | /* wait for previous submitted meta pages writeback */ |
1155 | wait_on_all_pages_writeback(sbi); | 1148 | wait_on_all_pages_writeback(sbi); |
1156 | 1149 | ||
1157 | /* | ||
1158 | * invalidate meta page which is used temporarily for zeroing out | ||
1159 | * block at the end of warm node chain. | ||
1160 | */ | ||
1161 | if (invalidate) | ||
1162 | invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, | ||
1163 | discard_blk); | ||
1164 | |||
1165 | release_ino_entry(sbi, false); | 1150 | release_ino_entry(sbi, false); |
1166 | 1151 | ||
1167 | if (unlikely(f2fs_cp_error(sbi))) | 1152 | if (unlikely(f2fs_cp_error(sbi))) |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b39fdcdcf891..cda8e6f96618 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -2045,7 +2045,6 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | |||
2045 | void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); | 2045 | void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); |
2046 | void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); | 2046 | void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); |
2047 | void release_discard_addrs(struct f2fs_sb_info *); | 2047 | void release_discard_addrs(struct f2fs_sb_info *); |
2048 | bool discard_next_dnode(struct f2fs_sb_info *, block_t); | ||
2049 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); | 2048 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); |
2050 | void allocate_new_segments(struct f2fs_sb_info *); | 2049 | void allocate_new_segments(struct f2fs_sb_info *); |
2051 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | 2050 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index fc7684554b1a..e8ca64a70de0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -229,6 +229,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) | |||
229 | f2fs_change_bit(block_off, nm_i->nat_bitmap); | 229 | f2fs_change_bit(block_off, nm_i->nat_bitmap); |
230 | } | 230 | } |
231 | 231 | ||
232 | static inline nid_t ino_of_node(struct page *node_page) | ||
233 | { | ||
234 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
235 | return le32_to_cpu(rn->footer.ino); | ||
236 | } | ||
237 | |||
238 | static inline nid_t nid_of_node(struct page *node_page) | ||
239 | { | ||
240 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
241 | return le32_to_cpu(rn->footer.nid); | ||
242 | } | ||
243 | |||
244 | static inline unsigned int ofs_of_node(struct page *node_page) | ||
245 | { | ||
246 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
247 | unsigned flag = le32_to_cpu(rn->footer.flag); | ||
248 | return flag >> OFFSET_BIT_SHIFT; | ||
249 | } | ||
250 | |||
251 | static inline __u64 cpver_of_node(struct page *node_page) | ||
252 | { | ||
253 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
254 | return le64_to_cpu(rn->footer.cp_ver); | ||
255 | } | ||
256 | |||
257 | static inline block_t next_blkaddr_of_node(struct page *node_page) | ||
258 | { | ||
259 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
260 | return le32_to_cpu(rn->footer.next_blkaddr); | ||
261 | } | ||
262 | |||
232 | static inline void fill_node_footer(struct page *page, nid_t nid, | 263 | static inline void fill_node_footer(struct page *page, nid_t nid, |
233 | nid_t ino, unsigned int ofs, bool reset) | 264 | nid_t ino, unsigned int ofs, bool reset) |
234 | { | 265 | { |
@@ -259,40 +290,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) | |||
259 | { | 290 | { |
260 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); | 291 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); |
261 | struct f2fs_node *rn = F2FS_NODE(page); | 292 | struct f2fs_node *rn = F2FS_NODE(page); |
293 | size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); | ||
294 | __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); | ||
262 | 295 | ||
263 | rn->footer.cp_ver = ckpt->checkpoint_ver; | 296 | if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { |
297 | __u64 crc = le32_to_cpu(*((__le32 *) | ||
298 | ((unsigned char *)ckpt + crc_offset))); | ||
299 | cp_ver |= (crc << 32); | ||
300 | } | ||
301 | rn->footer.cp_ver = cpu_to_le64(cp_ver); | ||
264 | rn->footer.next_blkaddr = cpu_to_le32(blkaddr); | 302 | rn->footer.next_blkaddr = cpu_to_le32(blkaddr); |
265 | } | 303 | } |
266 | 304 | ||
267 | static inline nid_t ino_of_node(struct page *node_page) | 305 | static inline bool is_recoverable_dnode(struct page *page) |
268 | { | ||
269 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
270 | return le32_to_cpu(rn->footer.ino); | ||
271 | } | ||
272 | |||
273 | static inline nid_t nid_of_node(struct page *node_page) | ||
274 | { | 306 | { |
275 | struct f2fs_node *rn = F2FS_NODE(node_page); | 307 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); |
276 | return le32_to_cpu(rn->footer.nid); | 308 | size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); |
277 | } | 309 | __u64 cp_ver = cur_cp_version(ckpt); |
278 | |||
279 | static inline unsigned int ofs_of_node(struct page *node_page) | ||
280 | { | ||
281 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
282 | unsigned flag = le32_to_cpu(rn->footer.flag); | ||
283 | return flag >> OFFSET_BIT_SHIFT; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long cpver_of_node(struct page *node_page) | ||
287 | { | ||
288 | struct f2fs_node *rn = F2FS_NODE(node_page); | ||
289 | return le64_to_cpu(rn->footer.cp_ver); | ||
290 | } | ||
291 | 310 | ||
292 | static inline block_t next_blkaddr_of_node(struct page *node_page) | 311 | if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { |
293 | { | 312 | __u64 crc = le32_to_cpu(*((__le32 *) |
294 | struct f2fs_node *rn = F2FS_NODE(node_page); | 313 | ((unsigned char *)ckpt + crc_offset))); |
295 | return le32_to_cpu(rn->footer.next_blkaddr); | 314 | cp_ver |= (crc << 32); |
315 | } | ||
316 | return cpu_to_le64(cp_ver) == cpver_of_node(page); | ||
296 | } | 317 | } |
297 | 318 | ||
298 | /* | 319 | /* |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ad748e52956a..2b8a56deb2d2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -224,7 +224,6 @@ static bool is_same_inode(struct inode *inode, struct page *ipage) | |||
224 | 224 | ||
225 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | 225 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) |
226 | { | 226 | { |
227 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | ||
228 | struct curseg_info *curseg; | 227 | struct curseg_info *curseg; |
229 | struct page *page = NULL; | 228 | struct page *page = NULL; |
230 | block_t blkaddr; | 229 | block_t blkaddr; |
@@ -242,7 +241,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
242 | 241 | ||
243 | page = get_tmp_page(sbi, blkaddr); | 242 | page = get_tmp_page(sbi, blkaddr); |
244 | 243 | ||
245 | if (cp_ver != cpver_of_node(page)) | 244 | if (!is_recoverable_dnode(page)) |
246 | break; | 245 | break; |
247 | 246 | ||
248 | if (!is_fsync_dnode(page)) | 247 | if (!is_fsync_dnode(page)) |
@@ -516,7 +515,6 @@ out: | |||
516 | static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, | 515 | static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, |
517 | struct list_head *dir_list) | 516 | struct list_head *dir_list) |
518 | { | 517 | { |
519 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | ||
520 | struct curseg_info *curseg; | 518 | struct curseg_info *curseg; |
521 | struct page *page = NULL; | 519 | struct page *page = NULL; |
522 | int err = 0; | 520 | int err = 0; |
@@ -536,7 +534,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, | |||
536 | 534 | ||
537 | page = get_tmp_page(sbi, blkaddr); | 535 | page = get_tmp_page(sbi, blkaddr); |
538 | 536 | ||
539 | if (cp_ver != cpver_of_node(page)) { | 537 | if (!is_recoverable_dnode(page)) { |
540 | f2fs_put_page(page, 1); | 538 | f2fs_put_page(page, 1); |
541 | break; | 539 | break; |
542 | } | 540 | } |
@@ -628,37 +626,15 @@ out: | |||
628 | } | 626 | } |
629 | 627 | ||
630 | clear_sbi_flag(sbi, SBI_POR_DOING); | 628 | clear_sbi_flag(sbi, SBI_POR_DOING); |
631 | if (err) { | 629 | if (err) |
632 | bool invalidate = false; | ||
633 | |||
634 | if (test_opt(sbi, LFS)) { | ||
635 | update_meta_page(sbi, NULL, blkaddr); | ||
636 | invalidate = true; | ||
637 | } else if (discard_next_dnode(sbi, blkaddr)) { | ||
638 | invalidate = true; | ||
639 | } | ||
640 | |||
641 | f2fs_wait_all_discard_bio(sbi); | ||
642 | |||
643 | /* Flush all the NAT/SIT pages */ | ||
644 | while (get_pages(sbi, F2FS_DIRTY_META)) | ||
645 | sync_meta_pages(sbi, META, LONG_MAX); | ||
646 | |||
647 | /* invalidate temporary meta page */ | ||
648 | if (invalidate) | ||
649 | invalidate_mapping_pages(META_MAPPING(sbi), | ||
650 | blkaddr, blkaddr); | ||
651 | |||
652 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | 630 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); |
653 | mutex_unlock(&sbi->cp_mutex); | 631 | mutex_unlock(&sbi->cp_mutex); |
654 | } else if (need_writecp) { | 632 | |
633 | if (!err && need_writecp) { | ||
655 | struct cp_control cpc = { | 634 | struct cp_control cpc = { |
656 | .reason = CP_RECOVERY, | 635 | .reason = CP_RECOVERY, |
657 | }; | 636 | }; |
658 | mutex_unlock(&sbi->cp_mutex); | ||
659 | err = write_checkpoint(sbi, &cpc); | 637 | err = write_checkpoint(sbi, &cpc); |
660 | } else { | ||
661 | mutex_unlock(&sbi->cp_mutex); | ||
662 | } | 638 | } |
663 | 639 | ||
664 | destroy_fsync_dnodes(&dir_list); | 640 | destroy_fsync_dnodes(&dir_list); |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4d0472cf47d..2d23d7b17d6c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -669,28 +669,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, | |||
669 | return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); | 669 | return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); |
670 | } | 670 | } |
671 | 671 | ||
672 | bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | ||
673 | { | ||
674 | int err = -EOPNOTSUPP; | ||
675 | |||
676 | if (test_opt(sbi, DISCARD)) { | ||
677 | struct seg_entry *se = get_seg_entry(sbi, | ||
678 | GET_SEGNO(sbi, blkaddr)); | ||
679 | unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); | ||
680 | |||
681 | if (f2fs_test_bit(offset, se->discard_map)) | ||
682 | return false; | ||
683 | |||
684 | err = f2fs_issue_discard(sbi, blkaddr, 1); | ||
685 | } | ||
686 | |||
687 | if (err) { | ||
688 | update_meta_page(sbi, NULL, blkaddr); | ||
689 | return true; | ||
690 | } | ||
691 | return false; | ||
692 | } | ||
693 | |||
694 | static void __add_discard_entry(struct f2fs_sb_info *sbi, | 672 | static void __add_discard_entry(struct f2fs_sb_info *sbi, |
695 | struct cp_control *cpc, struct seg_entry *se, | 673 | struct cp_control *cpc, struct seg_entry *se, |
696 | unsigned int start, unsigned int end) | 674 | unsigned int start, unsigned int end) |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 29e3cf4ccf1b..9649b79eefe8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -1827,6 +1827,9 @@ try_onemore: | |||
1827 | if (need_fsck) | 1827 | if (need_fsck) |
1828 | set_sbi_flag(sbi, SBI_NEED_FSCK); | 1828 | set_sbi_flag(sbi, SBI_NEED_FSCK); |
1829 | 1829 | ||
1830 | if (!retry) | ||
1831 | goto skip_recovery; | ||
1832 | |||
1830 | err = recover_fsync_data(sbi, false); | 1833 | err = recover_fsync_data(sbi, false); |
1831 | if (err < 0) { | 1834 | if (err < 0) { |
1832 | need_fsck = true; | 1835 | need_fsck = true; |
@@ -1844,7 +1847,7 @@ try_onemore: | |||
1844 | goto free_kobj; | 1847 | goto free_kobj; |
1845 | } | 1848 | } |
1846 | } | 1849 | } |
1847 | 1850 | skip_recovery: | |
1848 | /* recover_fsync_data() cleared this already */ | 1851 | /* recover_fsync_data() cleared this already */ |
1849 | clear_sbi_flag(sbi, SBI_POR_DOING); | 1852 | clear_sbi_flag(sbi, SBI_POR_DOING); |
1850 | 1853 | ||
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 4c02c6521fef..422630b8e588 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h | |||
@@ -100,6 +100,7 @@ struct f2fs_super_block { | |||
100 | /* | 100 | /* |
101 | * For checkpoint | 101 | * For checkpoint |
102 | */ | 102 | */ |
103 | #define CP_CRC_RECOVERY_FLAG 0x00000040 | ||
103 | #define CP_FASTBOOT_FLAG 0x00000020 | 104 | #define CP_FASTBOOT_FLAG 0x00000020 |
104 | #define CP_FSCK_FLAG 0x00000010 | 105 | #define CP_FSCK_FLAG 0x00000010 |
105 | #define CP_ERROR_FLAG 0x00000008 | 106 | #define CP_ERROR_FLAG 0x00000008 |