aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk@kernel.org>2016-09-19 20:55:10 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2016-09-30 13:05:46 -0400
commita468f0ef516fda9c7d91bb550d458e853d76955e (patch)
tree87906aa2c4e349c0b04e8276de05cf956887e150
parent5d4c0af41fd4cc26cb75af4f3de7fb63c91209c1 (diff)
f2fs: use crc and cp version to determine roll-forward recovery
Previously, we used cp_version only to detect recoverable dnodes. In order to avoid same garbage cp_version, we needed to truncate the next dnode during checkpoint, resulting in additional discard or data write. If we can distinguish this by using crc in addition to cp_version, we can remove this overhead. There is backward compatibility concern where it changes node_footer layout. So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to detect new layout. New layout will be activated only when this flag is set. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--fs/f2fs/checkpoint.c21
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/node.h77
-rw-r--r--fs/f2fs/recovery.c36
-rw-r--r--fs/f2fs/segment.c22
-rw-r--r--fs/f2fs/super.c5
-rw-r--r--include/linux/f2fs_fs.h1
7 files changed, 63 insertions, 100 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index df56a43f982e..9c6439b0e8d2 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -992,7 +992,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
992static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 992static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
993{ 993{
994 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 994 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
995 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
996 struct f2fs_nm_info *nm_i = NM_I(sbi); 995 struct f2fs_nm_info *nm_i = NM_I(sbi);
997 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; 996 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
998 nid_t last_nid = nm_i->next_scan_nid; 997 nid_t last_nid = nm_i->next_scan_nid;
@@ -1001,19 +1000,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1001 __u32 crc32 = 0; 1000 __u32 crc32 = 0;
1002 int i; 1001 int i;
1003 int cp_payload_blks = __cp_payload(sbi); 1002 int cp_payload_blks = __cp_payload(sbi);
1004 block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
1005 bool invalidate = false;
1006 struct super_block *sb = sbi->sb; 1003 struct super_block *sb = sbi->sb;
1007 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1004 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1008 u64 kbytes_written; 1005 u64 kbytes_written;
1009 1006
1010 /*
1011 * This avoids to conduct wrong roll-forward operations and uses
1012 * metapages, so should be called prior to sync_meta_pages below.
1013 */
1014 if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk))
1015 invalidate = true;
1016
1017 /* Flush all the NAT/SIT pages */ 1007 /* Flush all the NAT/SIT pages */
1018 while (get_pages(sbi, F2FS_DIRTY_META)) { 1008 while (get_pages(sbi, F2FS_DIRTY_META)) {
1019 sync_meta_pages(sbi, META, LONG_MAX); 1009 sync_meta_pages(sbi, META, LONG_MAX);
@@ -1089,6 +1079,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1089 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 1079 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1090 set_ckpt_flags(ckpt, CP_FSCK_FLAG); 1080 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
1091 1081
1082 /* set this flag to activate crc|cp_ver for recovery */
1083 set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1084
1092 /* update SIT/NAT bitmap */ 1085 /* update SIT/NAT bitmap */
1093 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 1086 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
1094 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 1087 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
@@ -1154,14 +1147,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1154 /* wait for previous submitted meta pages writeback */ 1147 /* wait for previous submitted meta pages writeback */
1155 wait_on_all_pages_writeback(sbi); 1148 wait_on_all_pages_writeback(sbi);
1156 1149
1157 /*
1158 * invalidate meta page which is used temporarily for zeroing out
1159 * block at the end of warm node chain.
1160 */
1161 if (invalidate)
1162 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
1163 discard_blk);
1164
1165 release_ino_entry(sbi, false); 1150 release_ino_entry(sbi, false);
1166 1151
1167 if (unlikely(f2fs_cp_error(sbi))) 1152 if (unlikely(f2fs_cp_error(sbi)))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b39fdcdcf891..cda8e6f96618 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2045,7 +2045,6 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
2045void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); 2045void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
2046void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 2046void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
2047void release_discard_addrs(struct f2fs_sb_info *); 2047void release_discard_addrs(struct f2fs_sb_info *);
2048bool discard_next_dnode(struct f2fs_sb_info *, block_t);
2049int npages_for_summary_flush(struct f2fs_sb_info *, bool); 2048int npages_for_summary_flush(struct f2fs_sb_info *, bool);
2050void allocate_new_segments(struct f2fs_sb_info *); 2049void allocate_new_segments(struct f2fs_sb_info *);
2051int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 2050int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index fc7684554b1a..e8ca64a70de0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -229,6 +229,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
229 f2fs_change_bit(block_off, nm_i->nat_bitmap); 229 f2fs_change_bit(block_off, nm_i->nat_bitmap);
230} 230}
231 231
232static inline nid_t ino_of_node(struct page *node_page)
233{
234 struct f2fs_node *rn = F2FS_NODE(node_page);
235 return le32_to_cpu(rn->footer.ino);
236}
237
238static inline nid_t nid_of_node(struct page *node_page)
239{
240 struct f2fs_node *rn = F2FS_NODE(node_page);
241 return le32_to_cpu(rn->footer.nid);
242}
243
244static inline unsigned int ofs_of_node(struct page *node_page)
245{
246 struct f2fs_node *rn = F2FS_NODE(node_page);
247 unsigned flag = le32_to_cpu(rn->footer.flag);
248 return flag >> OFFSET_BIT_SHIFT;
249}
250
251static inline __u64 cpver_of_node(struct page *node_page)
252{
253 struct f2fs_node *rn = F2FS_NODE(node_page);
254 return le64_to_cpu(rn->footer.cp_ver);
255}
256
257static inline block_t next_blkaddr_of_node(struct page *node_page)
258{
259 struct f2fs_node *rn = F2FS_NODE(node_page);
260 return le32_to_cpu(rn->footer.next_blkaddr);
261}
262
232static inline void fill_node_footer(struct page *page, nid_t nid, 263static inline void fill_node_footer(struct page *page, nid_t nid,
233 nid_t ino, unsigned int ofs, bool reset) 264 nid_t ino, unsigned int ofs, bool reset)
234{ 265{
@@ -259,40 +290,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
259{ 290{
260 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 291 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
261 struct f2fs_node *rn = F2FS_NODE(page); 292 struct f2fs_node *rn = F2FS_NODE(page);
293 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
294 __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
262 295
263 rn->footer.cp_ver = ckpt->checkpoint_ver; 296 if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
297 __u64 crc = le32_to_cpu(*((__le32 *)
298 ((unsigned char *)ckpt + crc_offset)));
299 cp_ver |= (crc << 32);
300 }
301 rn->footer.cp_ver = cpu_to_le64(cp_ver);
264 rn->footer.next_blkaddr = cpu_to_le32(blkaddr); 302 rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
265} 303}
266 304
267static inline nid_t ino_of_node(struct page *node_page) 305static inline bool is_recoverable_dnode(struct page *page)
268{
269 struct f2fs_node *rn = F2FS_NODE(node_page);
270 return le32_to_cpu(rn->footer.ino);
271}
272
273static inline nid_t nid_of_node(struct page *node_page)
274{ 306{
275 struct f2fs_node *rn = F2FS_NODE(node_page); 307 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
276 return le32_to_cpu(rn->footer.nid); 308 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
277} 309 __u64 cp_ver = cur_cp_version(ckpt);
278
279static inline unsigned int ofs_of_node(struct page *node_page)
280{
281 struct f2fs_node *rn = F2FS_NODE(node_page);
282 unsigned flag = le32_to_cpu(rn->footer.flag);
283 return flag >> OFFSET_BIT_SHIFT;
284}
285
286static inline unsigned long long cpver_of_node(struct page *node_page)
287{
288 struct f2fs_node *rn = F2FS_NODE(node_page);
289 return le64_to_cpu(rn->footer.cp_ver);
290}
291 310
292static inline block_t next_blkaddr_of_node(struct page *node_page) 311 if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
293{ 312 __u64 crc = le32_to_cpu(*((__le32 *)
294 struct f2fs_node *rn = F2FS_NODE(node_page); 313 ((unsigned char *)ckpt + crc_offset)));
295 return le32_to_cpu(rn->footer.next_blkaddr); 314 cp_ver |= (crc << 32);
315 }
316 return cpu_to_le64(cp_ver) == cpver_of_node(page);
296} 317}
297 318
298/* 319/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index ad748e52956a..2b8a56deb2d2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -224,7 +224,6 @@ static bool is_same_inode(struct inode *inode, struct page *ipage)
224 224
225static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 225static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
226{ 226{
227 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
228 struct curseg_info *curseg; 227 struct curseg_info *curseg;
229 struct page *page = NULL; 228 struct page *page = NULL;
230 block_t blkaddr; 229 block_t blkaddr;
@@ -242,7 +241,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
242 241
243 page = get_tmp_page(sbi, blkaddr); 242 page = get_tmp_page(sbi, blkaddr);
244 243
245 if (cp_ver != cpver_of_node(page)) 244 if (!is_recoverable_dnode(page))
246 break; 245 break;
247 246
248 if (!is_fsync_dnode(page)) 247 if (!is_fsync_dnode(page))
@@ -516,7 +515,6 @@ out:
516static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, 515static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
517 struct list_head *dir_list) 516 struct list_head *dir_list)
518{ 517{
519 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
520 struct curseg_info *curseg; 518 struct curseg_info *curseg;
521 struct page *page = NULL; 519 struct page *page = NULL;
522 int err = 0; 520 int err = 0;
@@ -536,7 +534,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
536 534
537 page = get_tmp_page(sbi, blkaddr); 535 page = get_tmp_page(sbi, blkaddr);
538 536
539 if (cp_ver != cpver_of_node(page)) { 537 if (!is_recoverable_dnode(page)) {
540 f2fs_put_page(page, 1); 538 f2fs_put_page(page, 1);
541 break; 539 break;
542 } 540 }
@@ -628,37 +626,15 @@ out:
628 } 626 }
629 627
630 clear_sbi_flag(sbi, SBI_POR_DOING); 628 clear_sbi_flag(sbi, SBI_POR_DOING);
631 if (err) { 629 if (err)
632 bool invalidate = false;
633
634 if (test_opt(sbi, LFS)) {
635 update_meta_page(sbi, NULL, blkaddr);
636 invalidate = true;
637 } else if (discard_next_dnode(sbi, blkaddr)) {
638 invalidate = true;
639 }
640
641 f2fs_wait_all_discard_bio(sbi);
642
643 /* Flush all the NAT/SIT pages */
644 while (get_pages(sbi, F2FS_DIRTY_META))
645 sync_meta_pages(sbi, META, LONG_MAX);
646
647 /* invalidate temporary meta page */
648 if (invalidate)
649 invalidate_mapping_pages(META_MAPPING(sbi),
650 blkaddr, blkaddr);
651
652 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 630 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
653 mutex_unlock(&sbi->cp_mutex); 631 mutex_unlock(&sbi->cp_mutex);
654 } else if (need_writecp) { 632
633 if (!err && need_writecp) {
655 struct cp_control cpc = { 634 struct cp_control cpc = {
656 .reason = CP_RECOVERY, 635 .reason = CP_RECOVERY,
657 }; 636 };
658 mutex_unlock(&sbi->cp_mutex);
659 err = write_checkpoint(sbi, &cpc); 637 err = write_checkpoint(sbi, &cpc);
660 } else {
661 mutex_unlock(&sbi->cp_mutex);
662 } 638 }
663 639
664 destroy_fsync_dnodes(&dir_list); 640 destroy_fsync_dnodes(&dir_list);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c4d0472cf47d..2d23d7b17d6c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -669,28 +669,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
669 return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); 669 return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0);
670} 670}
671 671
672bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
673{
674 int err = -EOPNOTSUPP;
675
676 if (test_opt(sbi, DISCARD)) {
677 struct seg_entry *se = get_seg_entry(sbi,
678 GET_SEGNO(sbi, blkaddr));
679 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
680
681 if (f2fs_test_bit(offset, se->discard_map))
682 return false;
683
684 err = f2fs_issue_discard(sbi, blkaddr, 1);
685 }
686
687 if (err) {
688 update_meta_page(sbi, NULL, blkaddr);
689 return true;
690 }
691 return false;
692}
693
694static void __add_discard_entry(struct f2fs_sb_info *sbi, 672static void __add_discard_entry(struct f2fs_sb_info *sbi,
695 struct cp_control *cpc, struct seg_entry *se, 673 struct cp_control *cpc, struct seg_entry *se,
696 unsigned int start, unsigned int end) 674 unsigned int start, unsigned int end)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 29e3cf4ccf1b..9649b79eefe8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1827,6 +1827,9 @@ try_onemore:
1827 if (need_fsck) 1827 if (need_fsck)
1828 set_sbi_flag(sbi, SBI_NEED_FSCK); 1828 set_sbi_flag(sbi, SBI_NEED_FSCK);
1829 1829
1830 if (!retry)
1831 goto skip_recovery;
1832
1830 err = recover_fsync_data(sbi, false); 1833 err = recover_fsync_data(sbi, false);
1831 if (err < 0) { 1834 if (err < 0) {
1832 need_fsck = true; 1835 need_fsck = true;
@@ -1844,7 +1847,7 @@ try_onemore:
1844 goto free_kobj; 1847 goto free_kobj;
1845 } 1848 }
1846 } 1849 }
1847 1850skip_recovery:
1848 /* recover_fsync_data() cleared this already */ 1851 /* recover_fsync_data() cleared this already */
1849 clear_sbi_flag(sbi, SBI_POR_DOING); 1852 clear_sbi_flag(sbi, SBI_POR_DOING);
1850 1853
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 4c02c6521fef..422630b8e588 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -100,6 +100,7 @@ struct f2fs_super_block {
100/* 100/*
101 * For checkpoint 101 * For checkpoint
102 */ 102 */
103#define CP_CRC_RECOVERY_FLAG 0x00000040
103#define CP_FASTBOOT_FLAG 0x00000020 104#define CP_FASTBOOT_FLAG 0x00000020
104#define CP_FSCK_FLAG 0x00000010 105#define CP_FSCK_FLAG 0x00000010
105#define CP_ERROR_FLAG 0x00000008 106#define CP_ERROR_FLAG 0x00000008