aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/checkpoint.c97
-rw-r--r--fs/f2fs/data.c69
-rw-r--r--fs/f2fs/debug.c20
-rw-r--r--fs/f2fs/dir.c19
-rw-r--r--fs/f2fs/f2fs.h163
-rw-r--r--fs/f2fs/file.c257
-rw-r--r--fs/f2fs/gc.c26
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c37
-rw-r--r--fs/f2fs/namei.c53
-rw-r--r--fs/f2fs/node.c460
-rw-r--r--fs/f2fs/node.h60
-rw-r--r--fs/f2fs/recovery.c191
-rw-r--r--fs/f2fs/segment.c520
-rw-r--r--fs/f2fs/segment.h160
-rw-r--r--fs/f2fs/super.c47
-rw-r--r--fs/f2fs/xattr.c8
17 files changed, 1421 insertions, 786 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ec3b7a5381fa..dd10a031c052 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,7 +72,22 @@ out:
72 return page; 72 return page;
73} 73}
74 74
75static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 75struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
76{
77 bool readahead = false;
78 struct page *page;
79
80 page = find_get_page(META_MAPPING(sbi), index);
81 if (!page || (page && !PageUptodate(page)))
82 readahead = true;
83 f2fs_put_page(page, 0);
84
85 if (readahead)
86 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
87 return get_meta_page(sbi, index);
88}
89
90static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
76{ 91{
77 switch (type) { 92 switch (type) {
78 case META_NAT: 93 case META_NAT:
@@ -82,6 +97,8 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
82 case META_SSA: 97 case META_SSA:
83 case META_CP: 98 case META_CP:
84 return 0; 99 return 0;
100 case META_POR:
101 return MAX_BLKADDR(sbi);
85 default: 102 default:
86 BUG(); 103 BUG();
87 } 104 }
@@ -90,12 +107,12 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
90/* 107/*
91 * Readahead CP/NAT/SIT/SSA pages 108 * Readahead CP/NAT/SIT/SSA pages
92 */ 109 */
93int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) 110int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
94{ 111{
95 block_t prev_blk_addr = 0; 112 block_t prev_blk_addr = 0;
96 struct page *page; 113 struct page *page;
97 int blkno = start; 114 block_t blkno = start;
98 int max_blks = get_max_meta_blks(sbi, type); 115 block_t max_blks = get_max_meta_blks(sbi, type);
99 116
100 struct f2fs_io_info fio = { 117 struct f2fs_io_info fio = {
101 .type = META, 118 .type = META,
@@ -125,7 +142,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
125 break; 142 break;
126 case META_SSA: 143 case META_SSA:
127 case META_CP: 144 case META_CP:
128 /* get ssa/cp block addr */ 145 case META_POR:
146 if (unlikely(blkno >= max_blks))
147 goto out;
148 if (unlikely(blkno < SEG0_BLKADDR(sbi)))
149 goto out;
129 blk_addr = blkno; 150 blk_addr = blkno;
130 break; 151 break;
131 default: 152 default:
@@ -151,8 +172,7 @@ out:
151static int f2fs_write_meta_page(struct page *page, 172static int f2fs_write_meta_page(struct page *page,
152 struct writeback_control *wbc) 173 struct writeback_control *wbc)
153{ 174{
154 struct inode *inode = page->mapping->host; 175 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
155 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
156 176
157 trace_f2fs_writepage(page, META); 177 trace_f2fs_writepage(page, META);
158 178
@@ -177,7 +197,7 @@ redirty_out:
177static int f2fs_write_meta_pages(struct address_space *mapping, 197static int f2fs_write_meta_pages(struct address_space *mapping,
178 struct writeback_control *wbc) 198 struct writeback_control *wbc)
179{ 199{
180 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 200 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
181 long diff, written; 201 long diff, written;
182 202
183 trace_f2fs_writepages(mapping->host, wbc, META); 203 trace_f2fs_writepages(mapping->host, wbc, META);
@@ -259,15 +279,12 @@ continue_unlock:
259 279
260static int f2fs_set_meta_page_dirty(struct page *page) 280static int f2fs_set_meta_page_dirty(struct page *page)
261{ 281{
262 struct address_space *mapping = page->mapping;
263 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
264
265 trace_f2fs_set_page_dirty(page, META); 282 trace_f2fs_set_page_dirty(page, META);
266 283
267 SetPageUptodate(page); 284 SetPageUptodate(page);
268 if (!PageDirty(page)) { 285 if (!PageDirty(page)) {
269 __set_page_dirty_nobuffers(page); 286 __set_page_dirty_nobuffers(page);
270 inc_page_count(sbi, F2FS_DIRTY_META); 287 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
271 return 1; 288 return 1;
272 } 289 }
273 return 0; 290 return 0;
@@ -378,7 +395,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
378void release_orphan_inode(struct f2fs_sb_info *sbi) 395void release_orphan_inode(struct f2fs_sb_info *sbi)
379{ 396{
380 spin_lock(&sbi->ino_lock[ORPHAN_INO]); 397 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
381 f2fs_bug_on(sbi->n_orphans == 0); 398 f2fs_bug_on(sbi, sbi->n_orphans == 0);
382 sbi->n_orphans--; 399 sbi->n_orphans--;
383 spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 400 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
384} 401}
@@ -398,7 +415,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
398static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 415static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
399{ 416{
400 struct inode *inode = f2fs_iget(sbi->sb, ino); 417 struct inode *inode = f2fs_iget(sbi->sb, ino);
401 f2fs_bug_on(IS_ERR(inode)); 418 f2fs_bug_on(sbi, IS_ERR(inode));
402 clear_nlink(inode); 419 clear_nlink(inode);
403 420
404 /* truncate all the data during iput */ 421 /* truncate all the data during iput */
@@ -459,7 +476,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
459 list_for_each_entry(orphan, head, list) { 476 list_for_each_entry(orphan, head, list) {
460 if (!page) { 477 if (!page) {
461 page = find_get_page(META_MAPPING(sbi), start_blk++); 478 page = find_get_page(META_MAPPING(sbi), start_blk++);
462 f2fs_bug_on(!page); 479 f2fs_bug_on(sbi, !page);
463 orphan_blk = 480 orphan_blk =
464 (struct f2fs_orphan_block *)page_address(page); 481 (struct f2fs_orphan_block *)page_address(page);
465 memset(orphan_blk, 0, sizeof(*orphan_blk)); 482 memset(orphan_blk, 0, sizeof(*orphan_blk));
@@ -619,7 +636,7 @@ fail_no_cp:
619 636
620static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 637static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
621{ 638{
622 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 639 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
623 640
624 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) 641 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
625 return -EEXIST; 642 return -EEXIST;
@@ -631,32 +648,38 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
631 return 0; 648 return 0;
632} 649}
633 650
634void set_dirty_dir_page(struct inode *inode, struct page *page) 651void update_dirty_page(struct inode *inode, struct page *page)
635{ 652{
636 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 653 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
637 struct dir_inode_entry *new; 654 struct dir_inode_entry *new;
638 int ret = 0; 655 int ret = 0;
639 656
640 if (!S_ISDIR(inode->i_mode)) 657 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
641 return; 658 return;
642 659
660 if (!S_ISDIR(inode->i_mode)) {
661 inode_inc_dirty_pages(inode);
662 goto out;
663 }
664
643 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 665 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
644 new->inode = inode; 666 new->inode = inode;
645 INIT_LIST_HEAD(&new->list); 667 INIT_LIST_HEAD(&new->list);
646 668
647 spin_lock(&sbi->dir_inode_lock); 669 spin_lock(&sbi->dir_inode_lock);
648 ret = __add_dirty_inode(inode, new); 670 ret = __add_dirty_inode(inode, new);
649 inode_inc_dirty_dents(inode); 671 inode_inc_dirty_pages(inode);
650 SetPagePrivate(page);
651 spin_unlock(&sbi->dir_inode_lock); 672 spin_unlock(&sbi->dir_inode_lock);
652 673
653 if (ret) 674 if (ret)
654 kmem_cache_free(inode_entry_slab, new); 675 kmem_cache_free(inode_entry_slab, new);
676out:
677 SetPagePrivate(page);
655} 678}
656 679
657void add_dirty_dir_inode(struct inode *inode) 680void add_dirty_dir_inode(struct inode *inode)
658{ 681{
659 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 682 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
660 struct dir_inode_entry *new = 683 struct dir_inode_entry *new =
661 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 684 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
662 int ret = 0; 685 int ret = 0;
@@ -674,14 +697,14 @@ void add_dirty_dir_inode(struct inode *inode)
674 697
675void remove_dirty_dir_inode(struct inode *inode) 698void remove_dirty_dir_inode(struct inode *inode)
676{ 699{
677 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 700 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
678 struct dir_inode_entry *entry; 701 struct dir_inode_entry *entry;
679 702
680 if (!S_ISDIR(inode->i_mode)) 703 if (!S_ISDIR(inode->i_mode))
681 return; 704 return;
682 705
683 spin_lock(&sbi->dir_inode_lock); 706 spin_lock(&sbi->dir_inode_lock);
684 if (get_dirty_dents(inode) || 707 if (get_dirty_pages(inode) ||
685 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { 708 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
686 spin_unlock(&sbi->dir_inode_lock); 709 spin_unlock(&sbi->dir_inode_lock);
687 return; 710 return;
@@ -802,11 +825,12 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
802 finish_wait(&sbi->cp_wait, &wait); 825 finish_wait(&sbi->cp_wait, &wait);
803} 826}
804 827
805static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 828static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
806{ 829{
807 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 830 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
808 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 831 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
809 nid_t last_nid = 0; 832 struct f2fs_nm_info *nm_i = NM_I(sbi);
833 nid_t last_nid = nm_i->next_scan_nid;
810 block_t start_blk; 834 block_t start_blk;
811 struct page *cp_page; 835 struct page *cp_page;
812 unsigned int data_sum_blocks, orphan_blocks; 836 unsigned int data_sum_blocks, orphan_blocks;
@@ -869,7 +893,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 893 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
870 orphan_blocks); 894 orphan_blocks);
871 895
872 if (is_umount) { 896 if (cpc->reason == CP_UMOUNT) {
873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 897 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
874 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 898 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
875 cp_payload_blks + data_sum_blocks + 899 cp_payload_blks + data_sum_blocks +
@@ -886,6 +910,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
886 else 910 else
887 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 911 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
888 912
913 if (sbi->need_fsck)
914 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
915
889 /* update SIT/NAT bitmap */ 916 /* update SIT/NAT bitmap */
890 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 917 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
891 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 918 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
@@ -920,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
920 947
921 write_data_summaries(sbi, start_blk); 948 write_data_summaries(sbi, start_blk);
922 start_blk += data_sum_blocks; 949 start_blk += data_sum_blocks;
923 if (is_umount) { 950 if (cpc->reason == CP_UMOUNT) {
924 write_node_summaries(sbi, start_blk); 951 write_node_summaries(sbi, start_blk);
925 start_blk += NR_CURSEG_NODE_TYPE; 952 start_blk += NR_CURSEG_NODE_TYPE;
926 } 953 }
@@ -960,23 +987,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
960/* 987/*
961 * We guarantee that this checkpoint procedure will not fail. 988 * We guarantee that this checkpoint procedure will not fail.
962 */ 989 */
963void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 990void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
964{ 991{
965 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 992 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
966 unsigned long long ckpt_ver; 993 unsigned long long ckpt_ver;
967 994
968 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 995 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
969 996
970 mutex_lock(&sbi->cp_mutex); 997 mutex_lock(&sbi->cp_mutex);
971 998
972 if (!sbi->s_dirty) 999 if (!sbi->s_dirty && cpc->reason != CP_DISCARD)
973 goto out; 1000 goto out;
974 if (unlikely(f2fs_cp_error(sbi))) 1001 if (unlikely(f2fs_cp_error(sbi)))
975 goto out; 1002 goto out;
976 if (block_operations(sbi)) 1003 if (block_operations(sbi))
977 goto out; 1004 goto out;
978 1005
979 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 1006 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
980 1007
981 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1008 f2fs_submit_merged_bio(sbi, DATA, WRITE);
982 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1009 f2fs_submit_merged_bio(sbi, NODE, WRITE);
@@ -992,16 +1019,16 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
992 1019
993 /* write cached NAT/SIT entries to NAT/SIT area */ 1020 /* write cached NAT/SIT entries to NAT/SIT area */
994 flush_nat_entries(sbi); 1021 flush_nat_entries(sbi);
995 flush_sit_entries(sbi); 1022 flush_sit_entries(sbi, cpc);
996 1023
997 /* unlock all the fs_lock[] in do_checkpoint() */ 1024 /* unlock all the fs_lock[] in do_checkpoint() */
998 do_checkpoint(sbi, is_umount); 1025 do_checkpoint(sbi, cpc);
999 1026
1000 unblock_operations(sbi); 1027 unblock_operations(sbi);
1001 stat_inc_cp_count(sbi->stat_info); 1028 stat_inc_cp_count(sbi->stat_info);
1002out: 1029out:
1003 mutex_unlock(&sbi->cp_mutex); 1030 mutex_unlock(&sbi->cp_mutex);
1004 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 1031 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1005} 1032}
1006 1033
1007void init_ino_entry_info(struct f2fs_sb_info *sbi) 1034void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 76de83e25a89..8e58c4cc2cb9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
85 bio = bio_alloc(GFP_NOIO, npages); 85 bio = bio_alloc(GFP_NOIO, npages);
86 86
87 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_bdev = sbi->sb->s_bdev;
88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 bio->bi_private = sbi; 90 bio->bi_private = sbi;
91 91
@@ -193,7 +193,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
193 __submit_merged_bio(io); 193 __submit_merged_bio(io);
194alloc_new: 194alloc_new:
195 if (io->bio == NULL) { 195 if (io->bio == NULL) {
196 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 196 int bio_blocks = MAX_BIO_BLOCKS(sbi);
197 197
198 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 198 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
199 io->fio = *fio; 199 io->fio = *fio;
@@ -236,7 +236,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
236 236
237int reserve_new_block(struct dnode_of_data *dn) 237int reserve_new_block(struct dnode_of_data *dn)
238{ 238{
239 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
240 240
241 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 241 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
242 return -EPERM; 242 return -EPERM;
@@ -258,7 +258,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
258 int err; 258 int err;
259 259
260 /* if inode_page exists, index should be zero */ 260 /* if inode_page exists, index should be zero */
261 f2fs_bug_on(!need_put && index); 261 f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
262 262
263 err = get_dnode_of_data(dn, index, ALLOC_NODE); 263 err = get_dnode_of_data(dn, index, ALLOC_NODE);
264 if (err) 264 if (err)
@@ -321,7 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
321 block_t start_blkaddr, end_blkaddr; 321 block_t start_blkaddr, end_blkaddr;
322 int need_update = true; 322 int need_update = true;
323 323
324 f2fs_bug_on(blk_addr == NEW_ADDR); 324 f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR);
325 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 325 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
326 dn->ofs_in_node; 326 dn->ofs_in_node;
327 327
@@ -396,7 +396,6 @@ end_update:
396 396
397struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 397struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
398{ 398{
399 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
400 struct address_space *mapping = inode->i_mapping; 399 struct address_space *mapping = inode->i_mapping;
401 struct dnode_of_data dn; 400 struct dnode_of_data dn;
402 struct page *page; 401 struct page *page;
@@ -429,7 +428,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
429 return page; 428 return page;
430 } 429 }
431 430
432 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 431 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr,
433 sync ? READ_SYNC : READA); 432 sync ? READ_SYNC : READA);
434 if (err) 433 if (err)
435 return ERR_PTR(err); 434 return ERR_PTR(err);
@@ -451,7 +450,6 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
451 */ 450 */
452struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 451struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
453{ 452{
454 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
455 struct address_space *mapping = inode->i_mapping; 453 struct address_space *mapping = inode->i_mapping;
456 struct dnode_of_data dn; 454 struct dnode_of_data dn;
457 struct page *page; 455 struct page *page;
@@ -490,7 +488,8 @@ repeat:
490 return page; 488 return page;
491 } 489 }
492 490
493 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); 491 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
492 dn.data_blkaddr, READ_SYNC);
494 if (err) 493 if (err)
495 return ERR_PTR(err); 494 return ERR_PTR(err);
496 495
@@ -517,7 +516,6 @@ repeat:
517struct page *get_new_data_page(struct inode *inode, 516struct page *get_new_data_page(struct inode *inode,
518 struct page *ipage, pgoff_t index, bool new_i_size) 517 struct page *ipage, pgoff_t index, bool new_i_size)
519{ 518{
520 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
521 struct address_space *mapping = inode->i_mapping; 519 struct address_space *mapping = inode->i_mapping;
522 struct page *page; 520 struct page *page;
523 struct dnode_of_data dn; 521 struct dnode_of_data dn;
@@ -541,8 +539,8 @@ repeat:
541 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 539 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
542 SetPageUptodate(page); 540 SetPageUptodate(page);
543 } else { 541 } else {
544 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 542 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
545 READ_SYNC); 543 dn.data_blkaddr, READ_SYNC);
546 if (err) 544 if (err)
547 goto put_err; 545 goto put_err;
548 546
@@ -573,10 +571,12 @@ put_err:
573 571
574static int __allocate_data_block(struct dnode_of_data *dn) 572static int __allocate_data_block(struct dnode_of_data *dn)
575{ 573{
576 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 574 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
575 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
577 struct f2fs_summary sum; 576 struct f2fs_summary sum;
578 block_t new_blkaddr; 577 block_t new_blkaddr;
579 struct node_info ni; 578 struct node_info ni;
579 pgoff_t fofs;
580 int type; 580 int type;
581 581
582 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 582 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
@@ -599,6 +599,12 @@ static int __allocate_data_block(struct dnode_of_data *dn)
599 update_extent_cache(new_blkaddr, dn); 599 update_extent_cache(new_blkaddr, dn);
600 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 600 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
601 601
602 /* update i_size */
603 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
604 dn->ofs_in_node;
605 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
606 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
607
602 dn->data_blkaddr = new_blkaddr; 608 dn->data_blkaddr = new_blkaddr;
603 return 0; 609 return 0;
604} 610}
@@ -614,7 +620,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
614static int __get_data_block(struct inode *inode, sector_t iblock, 620static int __get_data_block(struct inode *inode, sector_t iblock,
615 struct buffer_head *bh_result, int create, bool fiemap) 621 struct buffer_head *bh_result, int create, bool fiemap)
616{ 622{
617 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
618 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 623 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
619 unsigned maxblocks = bh_result->b_size >> blkbits; 624 unsigned maxblocks = bh_result->b_size >> blkbits;
620 struct dnode_of_data dn; 625 struct dnode_of_data dn;
@@ -630,8 +635,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
630 goto out; 635 goto out;
631 636
632 if (create) { 637 if (create) {
633 f2fs_balance_fs(sbi); 638 f2fs_balance_fs(F2FS_I_SB(inode));
634 f2fs_lock_op(sbi); 639 f2fs_lock_op(F2FS_I_SB(inode));
635 } 640 }
636 641
637 /* When reading holes, we need its node page */ 642 /* When reading holes, we need its node page */
@@ -707,7 +712,7 @@ put_out:
707 f2fs_put_dnode(&dn); 712 f2fs_put_dnode(&dn);
708unlock_out: 713unlock_out:
709 if (create) 714 if (create)
710 f2fs_unlock_op(sbi); 715 f2fs_unlock_op(F2FS_I_SB(inode));
711out: 716out:
712 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 717 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
713 return err; 718 return err;
@@ -804,7 +809,7 @@ static int f2fs_write_data_page(struct page *page,
804 struct writeback_control *wbc) 809 struct writeback_control *wbc)
805{ 810{
806 struct inode *inode = page->mapping->host; 811 struct inode *inode = page->mapping->host;
807 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 812 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
808 loff_t i_size = i_size_read(inode); 813 loff_t i_size = i_size_read(inode);
809 const pgoff_t end_index = ((unsigned long long) i_size) 814 const pgoff_t end_index = ((unsigned long long) i_size)
810 >> PAGE_CACHE_SHIFT; 815 >> PAGE_CACHE_SHIFT;
@@ -846,7 +851,7 @@ write:
846 if (unlikely(f2fs_cp_error(sbi))) { 851 if (unlikely(f2fs_cp_error(sbi))) {
847 SetPageError(page); 852 SetPageError(page);
848 unlock_page(page); 853 unlock_page(page);
849 return 0; 854 goto out;
850 } 855 }
851 856
852 if (!wbc->for_reclaim) 857 if (!wbc->for_reclaim)
@@ -866,7 +871,7 @@ done:
866 871
867 clear_cold_data(page); 872 clear_cold_data(page);
868out: 873out:
869 inode_dec_dirty_dents(inode); 874 inode_dec_dirty_pages(inode);
870 unlock_page(page); 875 unlock_page(page);
871 if (need_balance_fs) 876 if (need_balance_fs)
872 f2fs_balance_fs(sbi); 877 f2fs_balance_fs(sbi);
@@ -892,7 +897,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
892 struct writeback_control *wbc) 897 struct writeback_control *wbc)
893{ 898{
894 struct inode *inode = mapping->host; 899 struct inode *inode = mapping->host;
895 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 900 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
896 bool locked = false; 901 bool locked = false;
897 int ret; 902 int ret;
898 long diff; 903 long diff;
@@ -904,7 +909,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
904 return 0; 909 return 0;
905 910
906 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 911 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
907 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && 912 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
908 available_free_memory(sbi, DIRTY_DENTS)) 913 available_free_memory(sbi, DIRTY_DENTS))
909 goto skip_write; 914 goto skip_write;
910 915
@@ -926,7 +931,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
926 return ret; 931 return ret;
927 932
928skip_write: 933skip_write:
929 wbc->pages_skipped += get_dirty_dents(inode); 934 wbc->pages_skipped += get_dirty_pages(inode);
930 return 0; 935 return 0;
931} 936}
932 937
@@ -945,7 +950,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
945 struct page **pagep, void **fsdata) 950 struct page **pagep, void **fsdata)
946{ 951{
947 struct inode *inode = mapping->host; 952 struct inode *inode = mapping->host;
948 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 953 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
949 struct page *page; 954 struct page *page;
950 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 955 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
951 struct dnode_of_data dn; 956 struct dnode_of_data dn;
@@ -1047,7 +1052,10 @@ static int f2fs_write_end(struct file *file,
1047 1052
1048 trace_f2fs_write_end(inode, pos, len, copied); 1053 trace_f2fs_write_end(inode, pos, len, copied);
1049 1054
1050 set_page_dirty(page); 1055 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
1056 register_inmem_page(inode, page);
1057 else
1058 set_page_dirty(page);
1051 1059
1052 if (pos + copied > i_size_read(inode)) { 1060 if (pos + copied > i_size_read(inode)) {
1053 i_size_write(inode, pos + copied); 1061 i_size_write(inode, pos + copied);
@@ -1092,9 +1100,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1092 if (check_direct_IO(inode, rw, iter, offset)) 1100 if (check_direct_IO(inode, rw, iter, offset))
1093 return 0; 1101 return 0;
1094 1102
1095 /* clear fsync mark to recover these blocks */
1096 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1097
1098 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1103 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1099 1104
1100 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1105 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
@@ -1110,8 +1115,12 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1110 unsigned int length) 1115 unsigned int length)
1111{ 1116{
1112 struct inode *inode = page->mapping->host; 1117 struct inode *inode = page->mapping->host;
1118
1119 if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
1120 return;
1121
1113 if (PageDirty(page)) 1122 if (PageDirty(page))
1114 inode_dec_dirty_dents(inode); 1123 inode_dec_dirty_pages(inode);
1115 ClearPagePrivate(page); 1124 ClearPagePrivate(page);
1116} 1125}
1117 1126
@@ -1133,7 +1142,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
1133 1142
1134 if (!PageDirty(page)) { 1143 if (!PageDirty(page)) {
1135 __set_page_dirty_nobuffers(page); 1144 __set_page_dirty_nobuffers(page);
1136 set_dirty_dir_page(inode, page); 1145 update_dirty_page(inode, page);
1137 return 1; 1146 return 1;
1138 } 1147 }
1139 return 0; 1148 return 0;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fecebdbfd781..0a91ab813a9e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -93,7 +93,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
93 total_vblocks = 0; 93 total_vblocks = 0;
94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
95 hblks_per_sec = blks_per_sec / 2; 95 hblks_per_sec = blks_per_sec / 2;
96 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
98 dist = abs(vblocks - hblks_per_sec); 98 dist = abs(vblocks - hblks_per_sec);
99 bimodal += dist * dist; 99 bimodal += dist * dist;
@@ -103,7 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
103 ndirty++; 103 ndirty++;
104 } 104 }
105 } 105 }
106 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
107 si->bimodal = bimodal / dist; 107 si->bimodal = bimodal / dist;
108 if (si->dirty_count) 108 if (si->dirty_count)
109 si->avg_vblocks = total_vblocks / ndirty; 109 si->avg_vblocks = total_vblocks / ndirty;
@@ -131,17 +131,17 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
131 131
132 /* build sit */ 132 /* build sit */
133 si->base_mem += sizeof(struct sit_info); 133 si->base_mem += sizeof(struct sit_info);
134 si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); 134 si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
135 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 135 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
136 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); 136 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
137 if (sbi->segs_per_sec > 1) 137 if (sbi->segs_per_sec > 1)
138 si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); 138 si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
139 si->base_mem += __bitmap_size(sbi, SIT_BITMAP); 139 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
140 140
141 /* build free segmap */ 141 /* build free segmap */
142 si->base_mem += sizeof(struct free_segmap_info); 142 si->base_mem += sizeof(struct free_segmap_info);
143 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 143 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
144 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 144 si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
145 145
146 /* build curseg */ 146 /* build curseg */
147 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; 147 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
@@ -149,8 +149,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
149 149
150 /* build dirty segmap */ 150 /* build dirty segmap */
151 si->base_mem += sizeof(struct dirty_seglist_info); 151 si->base_mem += sizeof(struct dirty_seglist_info);
152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi));
153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 153 si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
154 154
155 /* build nm */ 155 /* build nm */
156 si->base_mem += sizeof(struct f2fs_nm_info); 156 si->base_mem += sizeof(struct f2fs_nm_info);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 155fb056b7f1..b54f87149c09 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -126,7 +126,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
126 * For the most part, it should be a bug when name_len is zero. 126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs has occurred. 127 * We stop here for figuring out where the bugs has occurred.
128 */ 128 */
129 f2fs_bug_on(!de->name_len); 129 f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len);
130 130
131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
132 } 132 }
@@ -151,7 +151,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
151 bool room = false; 151 bool room = false;
152 int max_slots = 0; 152 int max_slots = 0;
153 153
154 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 154 f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
155 155
156 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); 156 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
157 nblock = bucket_blocks(level); 157 nblock = bucket_blocks(level);
@@ -284,10 +284,9 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
284 284
285int update_dent_inode(struct inode *inode, const struct qstr *name) 285int update_dent_inode(struct inode *inode, const struct qstr *name)
286{ 286{
287 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
288 struct page *page; 287 struct page *page;
289 288
290 page = get_node_page(sbi, inode->i_ino); 289 page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
291 if (IS_ERR(page)) 290 if (IS_ERR(page))
292 return PTR_ERR(page); 291 return PTR_ERR(page);
293 292
@@ -337,7 +336,6 @@ static int make_empty_dir(struct inode *inode,
337static struct page *init_inode_metadata(struct inode *inode, 336static struct page *init_inode_metadata(struct inode *inode,
338 struct inode *dir, const struct qstr *name) 337 struct inode *dir, const struct qstr *name)
339{ 338{
340 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
341 struct page *page; 339 struct page *page;
342 int err; 340 int err;
343 341
@@ -360,7 +358,7 @@ static struct page *init_inode_metadata(struct inode *inode,
360 if (err) 358 if (err)
361 goto put_error; 359 goto put_error;
362 } else { 360 } else {
363 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 361 page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
364 if (IS_ERR(page)) 362 if (IS_ERR(page))
365 return page; 363 return page;
366 364
@@ -381,7 +379,7 @@ static struct page *init_inode_metadata(struct inode *inode,
381 * we should remove this inode from orphan list. 379 * we should remove this inode from orphan list.
382 */ 380 */
383 if (inode->i_nlink == 0) 381 if (inode->i_nlink == 0)
384 remove_orphan_inode(sbi, inode->i_ino); 382 remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
385 inc_nlink(inode); 383 inc_nlink(inode);
386 } 384 }
387 return page; 385 return page;
@@ -571,8 +569,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
571{ 569{
572 struct f2fs_dentry_block *dentry_blk; 570 struct f2fs_dentry_block *dentry_blk;
573 unsigned int bit_pos; 571 unsigned int bit_pos;
574 struct address_space *mapping = page->mapping; 572 struct inode *dir = page->mapping->host;
575 struct inode *dir = mapping->host;
576 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 573 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
577 int i; 574 int i;
578 575
@@ -594,7 +591,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
594 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 591 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
595 592
596 if (inode) { 593 if (inode) {
597 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 594 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
598 595
599 down_write(&F2FS_I(inode)->i_sem); 596 down_write(&F2FS_I(inode)->i_sem);
600 597
@@ -621,7 +618,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
621 truncate_hole(dir, page->index, page->index + 1); 618 truncate_hole(dir, page->index, page->index + 1);
622 clear_page_dirty_for_io(page); 619 clear_page_dirty_for_io(page);
623 ClearPageUptodate(page); 620 ClearPageUptodate(page);
624 inode_dec_dirty_dents(dir); 621 inode_dec_dirty_pages(dir);
625 } 622 }
626 f2fs_put_page(page, 1); 623 f2fs_put_page(page, 1);
627} 624}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e921242186f6..8171e80b2ee9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -21,10 +21,16 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22 22
23#ifdef CONFIG_F2FS_CHECK_FS 23#ifdef CONFIG_F2FS_CHECK_FS
24#define f2fs_bug_on(condition) BUG_ON(condition) 24#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
25#define f2fs_down_write(x, y) down_write_nest_lock(x, y) 25#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
26#else 26#else
27#define f2fs_bug_on(condition) WARN_ON(condition) 27#define f2fs_bug_on(sbi, condition) \
28 do { \
29 if (unlikely(condition)) { \
30 WARN_ON(1); \
31 sbi->need_fsck = true; \
32 } \
33 } while (0)
28#define f2fs_down_write(x, y) down_write(x) 34#define f2fs_down_write(x, y) down_write(x)
29#endif 35#endif
30 36
@@ -90,6 +96,20 @@ enum {
90 SIT_BITMAP 96 SIT_BITMAP
91}; 97};
92 98
99enum {
100 CP_UMOUNT,
101 CP_SYNC,
102 CP_DISCARD,
103};
104
105struct cp_control {
106 int reason;
107 __u64 trim_start;
108 __u64 trim_end;
109 __u64 trim_minlen;
110 __u64 trimmed;
111};
112
93/* 113/*
94 * For CP/NAT/SIT/SSA readahead 114 * For CP/NAT/SIT/SSA readahead
95 */ 115 */
@@ -97,7 +117,8 @@ enum {
97 META_CP, 117 META_CP,
98 META_NAT, 118 META_NAT,
99 META_SIT, 119 META_SIT,
100 META_SSA 120 META_SSA,
121 META_POR,
101}; 122};
102 123
103/* for the list of ino */ 124/* for the list of ino */
@@ -130,7 +151,9 @@ struct discard_entry {
130struct fsync_inode_entry { 151struct fsync_inode_entry {
131 struct list_head list; /* list head */ 152 struct list_head list; /* list head */
132 struct inode *inode; /* vfs inode pointer */ 153 struct inode *inode; /* vfs inode pointer */
133 block_t blkaddr; /* block address locating the last inode */ 154 block_t blkaddr; /* block address locating the last fsync */
155 block_t last_dentry; /* block address locating the last dentry */
156 block_t last_inode; /* block address locating the last inode */
134}; 157};
135 158
136#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) 159#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
@@ -141,6 +164,9 @@ struct fsync_inode_entry {
141#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) 164#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
142#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) 165#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
143 166
167#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
168#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
169
144static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) 170static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
145{ 171{
146 int before = nats_in_cursum(rs); 172 int before = nats_in_cursum(rs);
@@ -155,11 +181,24 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
155 return before; 181 return before;
156} 182}
157 183
184static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
185 int type)
186{
187 if (type == NAT_JOURNAL)
188 return size <= MAX_NAT_JENTRIES(sum);
189 return size <= MAX_SIT_JENTRIES(sum);
190}
191
158/* 192/*
159 * ioctl commands 193 * ioctl commands
160 */ 194 */
161#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 195#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
162#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 196#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
197
198#define F2FS_IOCTL_MAGIC 0xf5
199#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
200#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
201#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
163 202
164#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 203#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
165/* 204/*
@@ -222,13 +261,16 @@ struct f2fs_inode_info {
222 /* Use below internally in f2fs*/ 261 /* Use below internally in f2fs*/
223 unsigned long flags; /* use to pass per-file flags */ 262 unsigned long flags; /* use to pass per-file flags */
224 struct rw_semaphore i_sem; /* protect fi info */ 263 struct rw_semaphore i_sem; /* protect fi info */
225 atomic_t dirty_dents; /* # of dirty dentry pages */ 264 atomic_t dirty_pages; /* # of dirty pages */
226 f2fs_hash_t chash; /* hash value of given file name */ 265 f2fs_hash_t chash; /* hash value of given file name */
227 unsigned int clevel; /* maximum level of given file name */ 266 unsigned int clevel; /* maximum level of given file name */
228 nid_t i_xattr_nid; /* node id that contains xattrs */ 267 nid_t i_xattr_nid; /* node id that contains xattrs */
229 unsigned long long xattr_ver; /* cp version of xattr modification */ 268 unsigned long long xattr_ver; /* cp version of xattr modification */
230 struct extent_info ext; /* in-memory extent cache entry */ 269 struct extent_info ext; /* in-memory extent cache entry */
231 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ 270 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
271
272 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
273 struct mutex inmem_lock; /* lock for inmemory pages */
232}; 274};
233 275
234static inline void get_extent_info(struct extent_info *ext, 276static inline void get_extent_info(struct extent_info *ext,
@@ -260,11 +302,10 @@ struct f2fs_nm_info {
260 302
261 /* NAT cache management */ 303 /* NAT cache management */
262 struct radix_tree_root nat_root;/* root of the nat entry cache */ 304 struct radix_tree_root nat_root;/* root of the nat entry cache */
305 struct radix_tree_root nat_set_root;/* root of the nat set cache */
263 rwlock_t nat_tree_lock; /* protect nat_tree_lock */ 306 rwlock_t nat_tree_lock; /* protect nat_tree_lock */
264 unsigned int nat_cnt; /* the # of cached nat entries */
265 struct list_head nat_entries; /* cached nat entry list (clean) */ 307 struct list_head nat_entries; /* cached nat entry list (clean) */
266 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 308 unsigned int nat_cnt; /* the # of cached nat entries */
267 struct list_head nat_entry_set; /* nat entry set list */
268 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 309 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
269 310
270 /* free node ids management */ 311 /* free node ids management */
@@ -332,18 +373,16 @@ enum {
332}; 373};
333 374
334struct flush_cmd { 375struct flush_cmd {
335 struct flush_cmd *next;
336 struct completion wait; 376 struct completion wait;
377 struct llist_node llnode;
337 int ret; 378 int ret;
338}; 379};
339 380
340struct flush_cmd_control { 381struct flush_cmd_control {
341 struct task_struct *f2fs_issue_flush; /* flush thread */ 382 struct task_struct *f2fs_issue_flush; /* flush thread */
342 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ 383 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
343 struct flush_cmd *issue_list; /* list for command issue */ 384 struct llist_head issue_list; /* list for command issue */
344 struct flush_cmd *dispatch_list; /* list for command dispatch */ 385 struct llist_node *dispatch_list; /* list for command dispatch */
345 spinlock_t issue_lock; /* for issue list lock */
346 struct flush_cmd *issue_tail; /* list tail of issue list */
347}; 386};
348 387
349struct f2fs_sm_info { 388struct f2fs_sm_info {
@@ -369,8 +408,11 @@ struct f2fs_sm_info {
369 int nr_discards; /* # of discards in the list */ 408 int nr_discards; /* # of discards in the list */
370 int max_discards; /* max. discards to be issued */ 409 int max_discards; /* max. discards to be issued */
371 410
411 struct list_head sit_entry_set; /* sit entry set list */
412
372 unsigned int ipu_policy; /* in-place-update policy */ 413 unsigned int ipu_policy; /* in-place-update policy */
373 unsigned int min_ipu_util; /* in-place-update threshold */ 414 unsigned int min_ipu_util; /* in-place-update threshold */
415 unsigned int min_fsync_blocks; /* threshold for fsync */
374 416
375 /* for flush command control */ 417 /* for flush command control */
376 struct flush_cmd_control *cmd_control_info; 418 struct flush_cmd_control *cmd_control_info;
@@ -434,6 +476,7 @@ struct f2fs_sb_info {
434 struct buffer_head *raw_super_buf; /* buffer head of raw sb */ 476 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
435 struct f2fs_super_block *raw_super; /* raw super block pointer */ 477 struct f2fs_super_block *raw_super; /* raw super block pointer */
436 int s_dirty; /* dirty flag for checkpoint */ 478 int s_dirty; /* dirty flag for checkpoint */
479 bool need_fsck; /* need fsck.f2fs to fix */
437 480
438 /* for node-related operations */ 481 /* for node-related operations */
439 struct f2fs_nm_info *nm_info; /* node manager */ 482 struct f2fs_nm_info *nm_info; /* node manager */
@@ -539,6 +582,21 @@ static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
539 return sb->s_fs_info; 582 return sb->s_fs_info;
540} 583}
541 584
585static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode)
586{
587 return F2FS_SB(inode->i_sb);
588}
589
590static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
591{
592 return F2FS_I_SB(mapping->host);
593}
594
595static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
596{
597 return F2FS_M_SB(page->mapping);
598}
599
542static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) 600static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
543{ 601{
544 return (struct f2fs_super_block *)(sbi->raw_super); 602 return (struct f2fs_super_block *)(sbi->raw_super);
@@ -703,8 +761,8 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
703 blkcnt_t count) 761 blkcnt_t count)
704{ 762{
705 spin_lock(&sbi->stat_lock); 763 spin_lock(&sbi->stat_lock);
706 f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); 764 f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
707 f2fs_bug_on(inode->i_blocks < count); 765 f2fs_bug_on(sbi, inode->i_blocks < count);
708 inode->i_blocks -= count; 766 inode->i_blocks -= count;
709 sbi->total_valid_block_count -= (block_t)count; 767 sbi->total_valid_block_count -= (block_t)count;
710 spin_unlock(&sbi->stat_lock); 768 spin_unlock(&sbi->stat_lock);
@@ -716,10 +774,11 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
716 F2FS_SET_SB_DIRT(sbi); 774 F2FS_SET_SB_DIRT(sbi);
717} 775}
718 776
719static inline void inode_inc_dirty_dents(struct inode *inode) 777static inline void inode_inc_dirty_pages(struct inode *inode)
720{ 778{
721 inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 779 atomic_inc(&F2FS_I(inode)->dirty_pages);
722 atomic_inc(&F2FS_I(inode)->dirty_dents); 780 if (S_ISDIR(inode->i_mode))
781 inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
723} 782}
724 783
725static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 784static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -727,13 +786,15 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
727 atomic_dec(&sbi->nr_pages[count_type]); 786 atomic_dec(&sbi->nr_pages[count_type]);
728} 787}
729 788
730static inline void inode_dec_dirty_dents(struct inode *inode) 789static inline void inode_dec_dirty_pages(struct inode *inode)
731{ 790{
732 if (!S_ISDIR(inode->i_mode)) 791 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
733 return; 792 return;
734 793
735 dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 794 atomic_dec(&F2FS_I(inode)->dirty_pages);
736 atomic_dec(&F2FS_I(inode)->dirty_dents); 795
796 if (S_ISDIR(inode->i_mode))
797 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
737} 798}
738 799
739static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 800static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -741,9 +802,9 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
741 return atomic_read(&sbi->nr_pages[count_type]); 802 return atomic_read(&sbi->nr_pages[count_type]);
742} 803}
743 804
744static inline int get_dirty_dents(struct inode *inode) 805static inline int get_dirty_pages(struct inode *inode)
745{ 806{
746 return atomic_read(&F2FS_I(inode)->dirty_dents); 807 return atomic_read(&F2FS_I(inode)->dirty_pages);
747} 808}
748 809
749static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 810static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
@@ -848,9 +909,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
848{ 909{
849 spin_lock(&sbi->stat_lock); 910 spin_lock(&sbi->stat_lock);
850 911
851 f2fs_bug_on(!sbi->total_valid_block_count); 912 f2fs_bug_on(sbi, !sbi->total_valid_block_count);
852 f2fs_bug_on(!sbi->total_valid_node_count); 913 f2fs_bug_on(sbi, !sbi->total_valid_node_count);
853 f2fs_bug_on(!inode->i_blocks); 914 f2fs_bug_on(sbi, !inode->i_blocks);
854 915
855 inode->i_blocks--; 916 inode->i_blocks--;
856 sbi->total_valid_node_count--; 917 sbi->total_valid_node_count--;
@@ -867,7 +928,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
867static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 928static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
868{ 929{
869 spin_lock(&sbi->stat_lock); 930 spin_lock(&sbi->stat_lock);
870 f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); 931 f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count);
871 sbi->total_valid_inode_count++; 932 sbi->total_valid_inode_count++;
872 spin_unlock(&sbi->stat_lock); 933 spin_unlock(&sbi->stat_lock);
873} 934}
@@ -875,7 +936,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
875static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 936static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
876{ 937{
877 spin_lock(&sbi->stat_lock); 938 spin_lock(&sbi->stat_lock);
878 f2fs_bug_on(!sbi->total_valid_inode_count); 939 f2fs_bug_on(sbi, !sbi->total_valid_inode_count);
879 sbi->total_valid_inode_count--; 940 sbi->total_valid_inode_count--;
880 spin_unlock(&sbi->stat_lock); 941 spin_unlock(&sbi->stat_lock);
881} 942}
@@ -891,7 +952,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
891 return; 952 return;
892 953
893 if (unlock) { 954 if (unlock) {
894 f2fs_bug_on(!PageLocked(page)); 955 f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
895 unlock_page(page); 956 unlock_page(page);
896 } 957 }
897 page_cache_release(page); 958 page_cache_release(page);
@@ -998,7 +1059,9 @@ enum {
998 FI_INLINE_DATA, /* used for inline data*/ 1059 FI_INLINE_DATA, /* used for inline data*/
999 FI_APPEND_WRITE, /* inode has appended data */ 1060 FI_APPEND_WRITE, /* inode has appended data */
1000 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1061 FI_UPDATE_WRITE, /* inode has in-place-update data */
1001 FI_NEED_IPU, /* used fo ipu for fdatasync */ 1062 FI_NEED_IPU, /* used for ipu per file */
1063 FI_ATOMIC_FILE, /* indicate atomic file */
1064 FI_VOLATILE_FILE, /* indicate volatile file */
1002}; 1065};
1003 1066
1004static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1067static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1085,6 +1148,16 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1085 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); 1148 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
1086} 1149}
1087 1150
1151static inline bool f2fs_is_atomic_file(struct inode *inode)
1152{
1153 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
1154}
1155
1156static inline bool f2fs_is_volatile_file(struct inode *inode)
1157{
1158 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1159}
1160
1088static inline void *inline_data_addr(struct page *page) 1161static inline void *inline_data_addr(struct page *page)
1089{ 1162{
1090 struct f2fs_inode *ri = F2FS_INODE(page); 1163 struct f2fs_inode *ri = F2FS_INODE(page);
@@ -1141,6 +1214,7 @@ void update_inode(struct inode *, struct page *);
1141void update_inode_page(struct inode *); 1214void update_inode_page(struct inode *);
1142int f2fs_write_inode(struct inode *, struct writeback_control *); 1215int f2fs_write_inode(struct inode *, struct writeback_control *);
1143void f2fs_evict_inode(struct inode *); 1216void f2fs_evict_inode(struct inode *);
1217void handle_failed_inode(struct inode *);
1144 1218
1145/* 1219/*
1146 * namei.c 1220 * namei.c
@@ -1188,9 +1262,9 @@ struct dnode_of_data;
1188struct node_info; 1262struct node_info;
1189 1263
1190bool available_free_memory(struct f2fs_sb_info *, int); 1264bool available_free_memory(struct f2fs_sb_info *, int);
1191int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1265bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1192bool fsync_mark_done(struct f2fs_sb_info *, nid_t); 1266bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
1193void fsync_mark_clear(struct f2fs_sb_info *, nid_t); 1267bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
1194void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1268void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1195int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1269int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1196int truncate_inode_blocks(struct inode *, pgoff_t); 1270int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1221,6 +1295,8 @@ void destroy_node_manager_caches(void);
1221/* 1295/*
1222 * segment.c 1296 * segment.c
1223 */ 1297 */
1298void register_inmem_page(struct inode *, struct page *);
1299void commit_inmem_pages(struct inode *, bool);
1224void f2fs_balance_fs(struct f2fs_sb_info *); 1300void f2fs_balance_fs(struct f2fs_sb_info *);
1225void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1301void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1226int f2fs_issue_flush(struct f2fs_sb_info *); 1302int f2fs_issue_flush(struct f2fs_sb_info *);
@@ -1229,9 +1305,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
1229void invalidate_blocks(struct f2fs_sb_info *, block_t); 1305void invalidate_blocks(struct f2fs_sb_info *, block_t);
1230void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1306void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1231void clear_prefree_segments(struct f2fs_sb_info *); 1307void clear_prefree_segments(struct f2fs_sb_info *);
1308void release_discard_addrs(struct f2fs_sb_info *);
1232void discard_next_dnode(struct f2fs_sb_info *, block_t); 1309void discard_next_dnode(struct f2fs_sb_info *, block_t);
1233int npages_for_summary_flush(struct f2fs_sb_info *); 1310int npages_for_summary_flush(struct f2fs_sb_info *);
1234void allocate_new_segments(struct f2fs_sb_info *); 1311void allocate_new_segments(struct f2fs_sb_info *);
1312int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
1235struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1313struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
1236void write_meta_page(struct f2fs_sb_info *, struct page *); 1314void write_meta_page(struct f2fs_sb_info *, struct page *);
1237void write_node_page(struct f2fs_sb_info *, struct page *, 1315void write_node_page(struct f2fs_sb_info *, struct page *,
@@ -1248,7 +1326,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t);
1248void write_node_summaries(struct f2fs_sb_info *, block_t); 1326void write_node_summaries(struct f2fs_sb_info *, block_t);
1249int lookup_journal_in_cursum(struct f2fs_summary_block *, 1327int lookup_journal_in_cursum(struct f2fs_summary_block *,
1250 int, unsigned int, int); 1328 int, unsigned int, int);
1251void flush_sit_entries(struct f2fs_sb_info *); 1329void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
1252int build_segment_manager(struct f2fs_sb_info *); 1330int build_segment_manager(struct f2fs_sb_info *);
1253void destroy_segment_manager(struct f2fs_sb_info *); 1331void destroy_segment_manager(struct f2fs_sb_info *);
1254int __init create_segment_manager_caches(void); 1332int __init create_segment_manager_caches(void);
@@ -1259,7 +1337,8 @@ void destroy_segment_manager_caches(void);
1259 */ 1337 */
1260struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1338struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1261struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1339struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1262int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1340struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t);
1341int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
1263long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1342long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1264void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1343void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1265void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1344void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1271,11 +1350,11 @@ void add_orphan_inode(struct f2fs_sb_info *, nid_t);
1271void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1350void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
1272void recover_orphan_inodes(struct f2fs_sb_info *); 1351void recover_orphan_inodes(struct f2fs_sb_info *);
1273int get_valid_checkpoint(struct f2fs_sb_info *); 1352int get_valid_checkpoint(struct f2fs_sb_info *);
1274void set_dirty_dir_page(struct inode *, struct page *); 1353void update_dirty_page(struct inode *, struct page *);
1275void add_dirty_dir_inode(struct inode *); 1354void add_dirty_dir_inode(struct inode *);
1276void remove_dirty_dir_inode(struct inode *); 1355void remove_dirty_dir_inode(struct inode *);
1277void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1356void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1278void write_checkpoint(struct f2fs_sb_info *, bool); 1357void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
1279void init_ino_entry_info(struct f2fs_sb_info *); 1358void init_ino_entry_info(struct f2fs_sb_info *);
1280int __init create_checkpoint_caches(void); 1359int __init create_checkpoint_caches(void);
1281void destroy_checkpoint_caches(void); 1360void destroy_checkpoint_caches(void);
@@ -1359,12 +1438,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1359#define stat_inc_inline_inode(inode) \ 1438#define stat_inc_inline_inode(inode) \
1360 do { \ 1439 do { \
1361 if (f2fs_has_inline_data(inode)) \ 1440 if (f2fs_has_inline_data(inode)) \
1362 ((F2FS_SB(inode->i_sb))->inline_inode++); \ 1441 ((F2FS_I_SB(inode))->inline_inode++); \
1363 } while (0) 1442 } while (0)
1364#define stat_dec_inline_inode(inode) \ 1443#define stat_dec_inline_inode(inode) \
1365 do { \ 1444 do { \
1366 if (f2fs_has_inline_data(inode)) \ 1445 if (f2fs_has_inline_data(inode)) \
1367 ((F2FS_SB(inode->i_sb))->inline_inode--); \ 1446 ((F2FS_I_SB(inode))->inline_inode--); \
1368 } while (0) 1447 } while (0)
1369 1448
1370#define stat_inc_seg_type(sbi, curseg) \ 1449#define stat_inc_seg_type(sbi, curseg) \
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 060aee65aee8..8e68bb64f835 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,7 +33,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
33{ 33{
34 struct page *page = vmf->page; 34 struct page *page = vmf->page;
35 struct inode *inode = file_inode(vma->vm_file); 35 struct inode *inode = file_inode(vma->vm_file);
36 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 36 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
37 struct dnode_of_data dn; 37 struct dnode_of_data dn;
38 int err; 38 int err;
39 39
@@ -117,7 +117,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
117 117
118static inline bool need_do_checkpoint(struct inode *inode) 118static inline bool need_do_checkpoint(struct inode *inode)
119{ 119{
120 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 120 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
121 bool need_cp = false; 121 bool need_cp = false;
122 122
123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
@@ -138,7 +138,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
138{ 138{
139 struct inode *inode = file->f_mapping->host; 139 struct inode *inode = file->f_mapping->host;
140 struct f2fs_inode_info *fi = F2FS_I(inode); 140 struct f2fs_inode_info *fi = F2FS_I(inode);
141 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 141 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
142 nid_t ino = inode->i_ino;
142 int ret = 0; 143 int ret = 0;
143 bool need_cp = false; 144 bool need_cp = false;
144 struct writeback_control wbc = { 145 struct writeback_control wbc = {
@@ -153,12 +154,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
153 trace_f2fs_sync_file_enter(inode); 154 trace_f2fs_sync_file_enter(inode);
154 155
155 /* if fdatasync is triggered, let's do in-place-update */ 156 /* if fdatasync is triggered, let's do in-place-update */
156 if (datasync) 157 if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
157 set_inode_flag(fi, FI_NEED_IPU); 158 set_inode_flag(fi, FI_NEED_IPU);
158
159 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 159 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
160 if (datasync) 160 clear_inode_flag(fi, FI_NEED_IPU);
161 clear_inode_flag(fi, FI_NEED_IPU); 161
162 if (ret) { 162 if (ret) {
163 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 163 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
164 return ret; 164 return ret;
@@ -168,13 +168,22 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
168 * if there is no written data, don't waste time to write recovery info. 168 * if there is no written data, don't waste time to write recovery info.
169 */ 169 */
170 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && 170 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
171 !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { 171 !exist_written_data(sbi, ino, APPEND_INO)) {
172 struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
173
174 /* But we need to avoid that there are some inode updates */
175 if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
176 f2fs_put_page(i, 0);
177 goto go_write;
178 }
179 f2fs_put_page(i, 0);
180
172 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || 181 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
173 exist_written_data(sbi, inode->i_ino, UPDATE_INO)) 182 exist_written_data(sbi, ino, UPDATE_INO))
174 goto flush_out; 183 goto flush_out;
175 goto out; 184 goto out;
176 } 185 }
177 186go_write:
178 /* guarantee free sections for fsync */ 187 /* guarantee free sections for fsync */
179 f2fs_balance_fs(sbi); 188 f2fs_balance_fs(sbi);
180 189
@@ -207,26 +216,28 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
207 up_write(&fi->i_sem); 216 up_write(&fi->i_sem);
208 } 217 }
209 } else { 218 } else {
210 /* if there is no written node page, write its inode page */ 219sync_nodes:
211 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 220 sync_node_pages(sbi, ino, &wbc);
212 if (fsync_mark_done(sbi, inode->i_ino)) 221
213 goto out; 222 if (need_inode_block_update(sbi, ino)) {
214 mark_inode_dirty_sync(inode); 223 mark_inode_dirty_sync(inode);
215 ret = f2fs_write_inode(inode, NULL); 224 ret = f2fs_write_inode(inode, NULL);
216 if (ret) 225 if (ret)
217 goto out; 226 goto out;
227 goto sync_nodes;
218 } 228 }
219 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 229
230 ret = wait_on_node_pages_writeback(sbi, ino);
220 if (ret) 231 if (ret)
221 goto out; 232 goto out;
222 233
223 /* once recovery info is written, don't need to tack this */ 234 /* once recovery info is written, don't need to tack this */
224 remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); 235 remove_dirty_inode(sbi, ino, APPEND_INO);
225 clear_inode_flag(fi, FI_APPEND_WRITE); 236 clear_inode_flag(fi, FI_APPEND_WRITE);
226flush_out: 237flush_out:
227 remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 238 remove_dirty_inode(sbi, ino, UPDATE_INO);
228 clear_inode_flag(fi, FI_UPDATE_WRITE); 239 clear_inode_flag(fi, FI_UPDATE_WRITE);
229 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 240 ret = f2fs_issue_flush(F2FS_I_SB(inode));
230 } 241 }
231out: 242out:
232 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 243 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -353,6 +364,8 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
353 maxbytes, i_size_read(inode)); 364 maxbytes, i_size_read(inode));
354 case SEEK_DATA: 365 case SEEK_DATA:
355 case SEEK_HOLE: 366 case SEEK_HOLE:
367 if (offset < 0)
368 return -ENXIO;
356 return f2fs_seek_block(file, offset, whence); 369 return f2fs_seek_block(file, offset, whence);
357 } 370 }
358 371
@@ -369,7 +382,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
369int truncate_data_blocks_range(struct dnode_of_data *dn, int count) 382int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
370{ 383{
371 int nr_free = 0, ofs = dn->ofs_in_node; 384 int nr_free = 0, ofs = dn->ofs_in_node;
372 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 385 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
373 struct f2fs_node *raw_node; 386 struct f2fs_node *raw_node;
374 __le32 *addr; 387 __le32 *addr;
375 388
@@ -432,7 +445,7 @@ out:
432 445
433int truncate_blocks(struct inode *inode, u64 from, bool lock) 446int truncate_blocks(struct inode *inode, u64 from, bool lock)
434{ 447{
435 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 448 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
436 unsigned int blocksize = inode->i_sb->s_blocksize; 449 unsigned int blocksize = inode->i_sb->s_blocksize;
437 struct dnode_of_data dn; 450 struct dnode_of_data dn;
438 pgoff_t free_from; 451 pgoff_t free_from;
@@ -463,7 +476,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
463 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 476 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
464 477
465 count -= dn.ofs_in_node; 478 count -= dn.ofs_in_node;
466 f2fs_bug_on(count < 0); 479 f2fs_bug_on(sbi, count < 0);
467 480
468 if (dn.ofs_in_node || IS_INODE(dn.node_page)) { 481 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
469 truncate_data_blocks_range(&dn, count); 482 truncate_data_blocks_range(&dn, count);
@@ -547,15 +560,22 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
547 if (err) 560 if (err)
548 return err; 561 return err;
549 562
550 if ((attr->ia_valid & ATTR_SIZE) && 563 if (attr->ia_valid & ATTR_SIZE) {
551 attr->ia_size != i_size_read(inode)) {
552 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); 564 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
553 if (err) 565 if (err)
554 return err; 566 return err;
555 567
556 truncate_setsize(inode, attr->ia_size); 568 if (attr->ia_size != i_size_read(inode)) {
557 f2fs_truncate(inode); 569 truncate_setsize(inode, attr->ia_size);
558 f2fs_balance_fs(F2FS_SB(inode->i_sb)); 570 f2fs_truncate(inode);
571 f2fs_balance_fs(F2FS_I_SB(inode));
572 } else {
573 /*
574 * giving a chance to truncate blocks past EOF which
575 * are fallocated with FALLOC_FL_KEEP_SIZE.
576 */
577 f2fs_truncate(inode);
578 }
559 } 579 }
560 580
561 __setattr_copy(inode, attr); 581 __setattr_copy(inode, attr);
@@ -589,7 +609,7 @@ const struct inode_operations f2fs_file_inode_operations = {
589static void fill_zero(struct inode *inode, pgoff_t index, 609static void fill_zero(struct inode *inode, pgoff_t index,
590 loff_t start, loff_t len) 610 loff_t start, loff_t len)
591{ 611{
592 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 612 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
593 struct page *page; 613 struct page *page;
594 614
595 if (!len) 615 if (!len)
@@ -638,6 +658,13 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
638 loff_t off_start, off_end; 658 loff_t off_start, off_end;
639 int ret = 0; 659 int ret = 0;
640 660
661 if (!S_ISREG(inode->i_mode))
662 return -EOPNOTSUPP;
663
664 /* skip punching hole beyond i_size */
665 if (offset >= inode->i_size)
666 return ret;
667
641 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); 668 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
642 if (ret) 669 if (ret)
643 return ret; 670 return ret;
@@ -661,7 +688,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
661 if (pg_start < pg_end) { 688 if (pg_start < pg_end) {
662 struct address_space *mapping = inode->i_mapping; 689 struct address_space *mapping = inode->i_mapping;
663 loff_t blk_start, blk_end; 690 loff_t blk_start, blk_end;
664 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 691 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
665 692
666 f2fs_balance_fs(sbi); 693 f2fs_balance_fs(sbi);
667 694
@@ -682,7 +709,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
682static int expand_inode_data(struct inode *inode, loff_t offset, 709static int expand_inode_data(struct inode *inode, loff_t offset,
683 loff_t len, int mode) 710 loff_t len, int mode)
684{ 711{
685 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 712 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
686 pgoff_t index, pg_start, pg_end; 713 pgoff_t index, pg_start, pg_end;
687 loff_t new_size = i_size_read(inode); 714 loff_t new_size = i_size_read(inode);
688 loff_t off_start, off_end; 715 loff_t off_start, off_end;
@@ -778,61 +805,157 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
778 return flags & F2FS_OTHER_FLMASK; 805 return flags & F2FS_OTHER_FLMASK;
779} 806}
780 807
781long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 808static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
809{
810 struct inode *inode = file_inode(filp);
811 struct f2fs_inode_info *fi = F2FS_I(inode);
812 unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
813 return put_user(flags, (int __user *)arg);
814}
815
816static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
782{ 817{
783 struct inode *inode = file_inode(filp); 818 struct inode *inode = file_inode(filp);
784 struct f2fs_inode_info *fi = F2FS_I(inode); 819 struct f2fs_inode_info *fi = F2FS_I(inode);
785 unsigned int flags; 820 unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
821 unsigned int oldflags;
786 int ret; 822 int ret;
787 823
788 switch (cmd) { 824 ret = mnt_want_write_file(filp);
789 case F2FS_IOC_GETFLAGS: 825 if (ret)
790 flags = fi->i_flags & FS_FL_USER_VISIBLE; 826 return ret;
791 return put_user(flags, (int __user *) arg);
792 case F2FS_IOC_SETFLAGS:
793 {
794 unsigned int oldflags;
795 827
796 ret = mnt_want_write_file(filp); 828 if (!inode_owner_or_capable(inode)) {
797 if (ret) 829 ret = -EACCES;
798 return ret; 830 goto out;
831 }
799 832
800 if (!inode_owner_or_capable(inode)) { 833 if (get_user(flags, (int __user *)arg)) {
801 ret = -EACCES; 834 ret = -EFAULT;
802 goto out; 835 goto out;
803 } 836 }
837
838 flags = f2fs_mask_flags(inode->i_mode, flags);
839
840 mutex_lock(&inode->i_mutex);
804 841
805 if (get_user(flags, (int __user *) arg)) { 842 oldflags = fi->i_flags;
806 ret = -EFAULT; 843
844 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
845 if (!capable(CAP_LINUX_IMMUTABLE)) {
846 mutex_unlock(&inode->i_mutex);
847 ret = -EPERM;
807 goto out; 848 goto out;
808 } 849 }
850 }
809 851
810 flags = f2fs_mask_flags(inode->i_mode, flags); 852 flags = flags & FS_FL_USER_MODIFIABLE;
853 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
854 fi->i_flags = flags;
855 mutex_unlock(&inode->i_mutex);
811 856
812 mutex_lock(&inode->i_mutex); 857 f2fs_set_inode_flags(inode);
858 inode->i_ctime = CURRENT_TIME;
859 mark_inode_dirty(inode);
860out:
861 mnt_drop_write_file(filp);
862 return ret;
863}
813 864
814 oldflags = fi->i_flags; 865static int f2fs_ioc_start_atomic_write(struct file *filp)
866{
867 struct inode *inode = file_inode(filp);
868 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
815 869
816 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 870 if (!inode_owner_or_capable(inode))
817 if (!capable(CAP_LINUX_IMMUTABLE)) { 871 return -EACCES;
818 mutex_unlock(&inode->i_mutex);
819 ret = -EPERM;
820 goto out;
821 }
822 }
823 872
824 flags = flags & FS_FL_USER_MODIFIABLE; 873 f2fs_balance_fs(sbi);
825 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
826 fi->i_flags = flags;
827 mutex_unlock(&inode->i_mutex);
828 874
829 f2fs_set_inode_flags(inode); 875 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
830 inode->i_ctime = CURRENT_TIME; 876
831 mark_inode_dirty(inode); 877 return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
832out: 878}
833 mnt_drop_write_file(filp); 879
880static int f2fs_ioc_commit_atomic_write(struct file *filp)
881{
882 struct inode *inode = file_inode(filp);
883 int ret;
884
885 if (!inode_owner_or_capable(inode))
886 return -EACCES;
887
888 if (f2fs_is_volatile_file(inode))
889 return 0;
890
891 ret = mnt_want_write_file(filp);
892 if (ret)
834 return ret; 893 return ret;
835 } 894
895 if (f2fs_is_atomic_file(inode))
896 commit_inmem_pages(inode, false);
897
898 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
899 mnt_drop_write_file(filp);
900 return ret;
901}
902
903static int f2fs_ioc_start_volatile_write(struct file *filp)
904{
905 struct inode *inode = file_inode(filp);
906
907 if (!inode_owner_or_capable(inode))
908 return -EACCES;
909
910 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
911 return 0;
912}
913
914static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
915{
916 struct inode *inode = file_inode(filp);
917 struct super_block *sb = inode->i_sb;
918 struct request_queue *q = bdev_get_queue(sb->s_bdev);
919 struct fstrim_range range;
920 int ret;
921
922 if (!capable(CAP_SYS_ADMIN))
923 return -EPERM;
924
925 if (!blk_queue_discard(q))
926 return -EOPNOTSUPP;
927
928 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
929 sizeof(range)))
930 return -EFAULT;
931
932 range.minlen = max((unsigned int)range.minlen,
933 q->limits.discard_granularity);
934 ret = f2fs_trim_fs(F2FS_SB(sb), &range);
935 if (ret < 0)
936 return ret;
937
938 if (copy_to_user((struct fstrim_range __user *)arg, &range,
939 sizeof(range)))
940 return -EFAULT;
941 return 0;
942}
943
944long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
945{
946 switch (cmd) {
947 case F2FS_IOC_GETFLAGS:
948 return f2fs_ioc_getflags(filp, arg);
949 case F2FS_IOC_SETFLAGS:
950 return f2fs_ioc_setflags(filp, arg);
951 case F2FS_IOC_START_ATOMIC_WRITE:
952 return f2fs_ioc_start_atomic_write(filp);
953 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
954 return f2fs_ioc_commit_atomic_write(filp);
955 case F2FS_IOC_START_VOLATILE_WRITE:
956 return f2fs_ioc_start_volatile_write(filp);
957 case FITRIM:
958 return f2fs_ioc_fitrim(filp, arg);
836 default: 959 default:
837 return -ENOTTY; 960 return -ENOTTY;
838 } 961 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 943a31db7cc3..2a8f4acdb86b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,7 +193,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
193 * selected by background GC before. 193 * selected by background GC before.
194 * Those segments guarantee they have small valid blocks. 194 * Those segments guarantee they have small valid blocks.
195 */ 195 */
196 for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { 196 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
197 if (sec_usage_check(sbi, secno)) 197 if (sec_usage_check(sbi, secno))
198 continue; 198 continue;
199 clear_bit(secno, dirty_i->victim_secmap); 199 clear_bit(secno, dirty_i->victim_secmap);
@@ -263,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
263 unsigned int secno, max_cost; 263 unsigned int secno, max_cost;
264 int nsearched = 0; 264 int nsearched = 0;
265 265
266 mutex_lock(&dirty_i->seglist_lock);
267
266 p.alloc_mode = alloc_mode; 268 p.alloc_mode = alloc_mode;
267 select_policy(sbi, gc_type, type, &p); 269 select_policy(sbi, gc_type, type, &p);
268 270
269 p.min_segno = NULL_SEGNO; 271 p.min_segno = NULL_SEGNO;
270 p.min_cost = max_cost = get_max_cost(sbi, &p); 272 p.min_cost = max_cost = get_max_cost(sbi, &p);
271 273
272 mutex_lock(&dirty_i->seglist_lock);
273
274 if (p.alloc_mode == LFS && gc_type == FG_GC) { 274 if (p.alloc_mode == LFS && gc_type == FG_GC) {
275 p.min_segno = check_bg_victims(sbi); 275 p.min_segno = check_bg_victims(sbi);
276 if (p.min_segno != NULL_SEGNO) 276 if (p.min_segno != NULL_SEGNO)
@@ -281,9 +281,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
281 unsigned long cost; 281 unsigned long cost;
282 unsigned int segno; 282 unsigned int segno;
283 283
284 segno = find_next_bit(p.dirty_segmap, 284 segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset);
285 TOTAL_SEGS(sbi), p.offset); 285 if (segno >= MAIN_SEGS(sbi)) {
286 if (segno >= TOTAL_SEGS(sbi)) {
287 if (sbi->last_victim[p.gc_mode]) { 286 if (sbi->last_victim[p.gc_mode]) {
288 sbi->last_victim[p.gc_mode] = 0; 287 sbi->last_victim[p.gc_mode] = 0;
289 p.offset = 0; 288 p.offset = 0;
@@ -423,6 +422,12 @@ next_step:
423 if (IS_ERR(node_page)) 422 if (IS_ERR(node_page))
424 continue; 423 continue;
425 424
425 /* block may become invalid during get_node_page */
426 if (check_valid_map(sbi, segno, off) == 0) {
427 f2fs_put_page(node_page, 1);
428 continue;
429 }
430
426 /* set page dirty and write it */ 431 /* set page dirty and write it */
427 if (gc_type == FG_GC) { 432 if (gc_type == FG_GC) {
428 f2fs_wait_on_page_writeback(node_page, NODE); 433 f2fs_wait_on_page_writeback(node_page, NODE);
@@ -531,7 +536,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
531 f2fs_wait_on_page_writeback(page, DATA); 536 f2fs_wait_on_page_writeback(page, DATA);
532 537
533 if (clear_page_dirty_for_io(page)) 538 if (clear_page_dirty_for_io(page))
534 inode_dec_dirty_dents(inode); 539 inode_dec_dirty_pages(inode);
535 set_cold_data(page); 540 set_cold_data(page);
536 do_write_data_page(page, &fio); 541 do_write_data_page(page, &fio);
537 clear_cold_data(page); 542 clear_cold_data(page);
@@ -688,6 +693,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
688 int gc_type = BG_GC; 693 int gc_type = BG_GC;
689 int nfree = 0; 694 int nfree = 0;
690 int ret = -1; 695 int ret = -1;
696 struct cp_control cpc = {
697 .reason = CP_SYNC,
698 };
691 699
692 INIT_LIST_HEAD(&ilist); 700 INIT_LIST_HEAD(&ilist);
693gc_more: 701gc_more:
@@ -698,7 +706,7 @@ gc_more:
698 706
699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 707 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
700 gc_type = FG_GC; 708 gc_type = FG_GC;
701 write_checkpoint(sbi, false); 709 write_checkpoint(sbi, &cpc);
702 } 710 }
703 711
704 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 712 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
@@ -723,7 +731,7 @@ gc_more:
723 goto gc_more; 731 goto gc_more;
724 732
725 if (gc_type == FG_GC) 733 if (gc_type == FG_GC)
726 write_checkpoint(sbi, false); 734 write_checkpoint(sbi, &cpc);
727stop: 735stop:
728 mutex_unlock(&sbi->gc_mutex); 736 mutex_unlock(&sbi->gc_mutex);
729 737
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 3e8ecdf3742b..88036fd75797 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,11 +15,13 @@
15 15
16bool f2fs_may_inline(struct inode *inode) 16bool f2fs_may_inline(struct inode *inode)
17{ 17{
18 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
19 block_t nr_blocks; 18 block_t nr_blocks;
20 loff_t i_size; 19 loff_t i_size;
21 20
22 if (!test_opt(sbi, INLINE_DATA)) 21 if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
22 return false;
23
24 if (f2fs_is_atomic_file(inode))
23 return false; 25 return false;
24 26
25 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; 27 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
@@ -35,7 +37,6 @@ bool f2fs_may_inline(struct inode *inode)
35 37
36int f2fs_read_inline_data(struct inode *inode, struct page *page) 38int f2fs_read_inline_data(struct inode *inode, struct page *page)
37{ 39{
38 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
39 struct page *ipage; 40 struct page *ipage;
40 void *src_addr, *dst_addr; 41 void *src_addr, *dst_addr;
41 42
@@ -44,7 +45,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
44 goto out; 45 goto out;
45 } 46 }
46 47
47 ipage = get_node_page(sbi, inode->i_ino); 48 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
48 if (IS_ERR(ipage)) { 49 if (IS_ERR(ipage)) {
49 unlock_page(page); 50 unlock_page(page);
50 return PTR_ERR(ipage); 51 return PTR_ERR(ipage);
@@ -73,7 +74,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
73 struct dnode_of_data dn; 74 struct dnode_of_data dn;
74 void *src_addr, *dst_addr; 75 void *src_addr, *dst_addr;
75 block_t new_blk_addr; 76 block_t new_blk_addr;
76 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 77 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
77 struct f2fs_io_info fio = { 78 struct f2fs_io_info fio = {
78 .type = DATA, 79 .type = DATA,
79 .rw = WRITE_SYNC | REQ_PRIO, 80 .rw = WRITE_SYNC | REQ_PRIO,
@@ -189,13 +190,12 @@ int f2fs_write_inline_data(struct inode *inode,
189 190
190void truncate_inline_data(struct inode *inode, u64 from) 191void truncate_inline_data(struct inode *inode, u64 from)
191{ 192{
192 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
193 struct page *ipage; 193 struct page *ipage;
194 194
195 if (from >= MAX_INLINE_DATA) 195 if (from >= MAX_INLINE_DATA)
196 return; 196 return;
197 197
198 ipage = get_node_page(sbi, inode->i_ino); 198 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
199 if (IS_ERR(ipage)) 199 if (IS_ERR(ipage))
200 return; 200 return;
201 201
@@ -209,7 +209,7 @@ void truncate_inline_data(struct inode *inode, u64 from)
209 209
210bool recover_inline_data(struct inode *inode, struct page *npage) 210bool recover_inline_data(struct inode *inode, struct page *npage)
211{ 211{
212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 212 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
213 struct f2fs_inode *ri = NULL; 213 struct f2fs_inode *ri = NULL;
214 void *src_addr, *dst_addr; 214 void *src_addr, *dst_addr;
215 struct page *ipage; 215 struct page *ipage;
@@ -229,7 +229,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
229 ri && (ri->i_inline & F2FS_INLINE_DATA)) { 229 ri && (ri->i_inline & F2FS_INLINE_DATA)) {
230process_inline: 230process_inline:
231 ipage = get_node_page(sbi, inode->i_ino); 231 ipage = get_node_page(sbi, inode->i_ino);
232 f2fs_bug_on(IS_ERR(ipage)); 232 f2fs_bug_on(sbi, IS_ERR(ipage));
233 233
234 f2fs_wait_on_page_writeback(ipage, NODE); 234 f2fs_wait_on_page_writeback(ipage, NODE);
235 235
@@ -243,7 +243,7 @@ process_inline:
243 243
244 if (f2fs_has_inline_data(inode)) { 244 if (f2fs_has_inline_data(inode)) {
245 ipage = get_node_page(sbi, inode->i_ino); 245 ipage = get_node_page(sbi, inode->i_ino);
246 f2fs_bug_on(IS_ERR(ipage)); 246 f2fs_bug_on(sbi, IS_ERR(ipage));
247 f2fs_wait_on_page_writeback(ipage, NODE); 247 f2fs_wait_on_page_writeback(ipage, NODE);
248 zero_user_segment(ipage, INLINE_DATA_OFFSET, 248 zero_user_segment(ipage, INLINE_DATA_OFFSET,
249 INLINE_DATA_OFFSET + MAX_INLINE_DATA); 249 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2c39999f3868..0deead4505e7 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -69,7 +69,7 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
69 69
70static int do_read_inode(struct inode *inode) 70static int do_read_inode(struct inode *inode)
71{ 71{
72 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 72 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
73 struct f2fs_inode_info *fi = F2FS_I(inode); 73 struct f2fs_inode_info *fi = F2FS_I(inode);
74 struct page *node_page; 74 struct page *node_page;
75 struct f2fs_inode *ri; 75 struct f2fs_inode *ri;
@@ -218,7 +218,7 @@ void update_inode(struct inode *inode, struct page *node_page)
218 218
219void update_inode_page(struct inode *inode) 219void update_inode_page(struct inode *inode)
220{ 220{
221 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 221 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222 struct page *node_page; 222 struct page *node_page;
223retry: 223retry:
224 node_page = get_node_page(sbi, inode->i_ino); 224 node_page = get_node_page(sbi, inode->i_ino);
@@ -238,7 +238,7 @@ retry:
238 238
239int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 239int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
240{ 240{
241 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 241 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
242 242
243 if (inode->i_ino == F2FS_NODE_INO(sbi) || 243 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
244 inode->i_ino == F2FS_META_INO(sbi)) 244 inode->i_ino == F2FS_META_INO(sbi))
@@ -266,9 +266,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
266 */ 266 */
267void f2fs_evict_inode(struct inode *inode) 267void f2fs_evict_inode(struct inode *inode)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
271 271
272 /* some remained atomic pages should discarded */
273 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
274 commit_inmem_pages(inode, true);
275
272 trace_f2fs_evict_inode(inode); 276 trace_f2fs_evict_inode(inode);
273 truncate_inode_pages_final(&inode->i_data); 277 truncate_inode_pages_final(&inode->i_data);
274 278
@@ -276,7 +280,7 @@ void f2fs_evict_inode(struct inode *inode)
276 inode->i_ino == F2FS_META_INO(sbi)) 280 inode->i_ino == F2FS_META_INO(sbi))
277 goto out_clear; 281 goto out_clear;
278 282
279 f2fs_bug_on(get_dirty_dents(inode)); 283 f2fs_bug_on(sbi, get_dirty_pages(inode));
280 remove_dirty_dir_inode(inode); 284 remove_dirty_dir_inode(inode);
281 285
282 if (inode->i_nlink || is_bad_inode(inode)) 286 if (inode->i_nlink || is_bad_inode(inode))
@@ -306,3 +310,26 @@ no_delete:
306out_clear: 310out_clear:
307 clear_inode(inode); 311 clear_inode(inode);
308} 312}
313
314/* caller should call f2fs_lock_op() */
315void handle_failed_inode(struct inode *inode)
316{
317 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
318
319 clear_nlink(inode);
320 make_bad_inode(inode);
321 unlock_new_inode(inode);
322
323 i_size_write(inode, 0);
324 if (F2FS_HAS_BLOCKS(inode))
325 f2fs_truncate(inode);
326
327 remove_inode_page(inode);
328 stat_dec_inline_inode(inode);
329
330 alloc_nid_failed(sbi, inode->i_ino);
331 f2fs_unlock_op(sbi);
332
333 /* iput will drop the inode object */
334 iput(inode);
335}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ee103fd7283c..0d2526e5aa11 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -23,7 +23,7 @@
23 23
24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
25{ 25{
26 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 26 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
@@ -102,7 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
103 bool excl) 103 bool excl)
104{ 104{
105 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 105 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
106 struct inode *inode; 106 struct inode *inode;
107 nid_t ino = 0; 107 nid_t ino = 0;
108 int err; 108 int err;
@@ -123,9 +123,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
123 123
124 f2fs_lock_op(sbi); 124 f2fs_lock_op(sbi);
125 err = f2fs_add_link(dentry, inode); 125 err = f2fs_add_link(dentry, inode);
126 f2fs_unlock_op(sbi);
127 if (err) 126 if (err)
128 goto out; 127 goto out;
128 f2fs_unlock_op(sbi);
129 129
130 alloc_nid_done(sbi, ino); 130 alloc_nid_done(sbi, ino);
131 131
@@ -133,9 +133,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
133 unlock_new_inode(inode); 133 unlock_new_inode(inode);
134 return 0; 134 return 0;
135out: 135out:
136 clear_nlink(inode); 136 handle_failed_inode(inode);
137 iget_failed(inode);
138 alloc_nid_failed(sbi, ino);
139 return err; 137 return err;
140} 138}
141 139
@@ -143,7 +141,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
143 struct dentry *dentry) 141 struct dentry *dentry)
144{ 142{
145 struct inode *inode = old_dentry->d_inode; 143 struct inode *inode = old_dentry->d_inode;
146 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 144 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
147 int err; 145 int err;
148 146
149 f2fs_balance_fs(sbi); 147 f2fs_balance_fs(sbi);
@@ -154,15 +152,16 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
154 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 152 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
155 f2fs_lock_op(sbi); 153 f2fs_lock_op(sbi);
156 err = f2fs_add_link(dentry, inode); 154 err = f2fs_add_link(dentry, inode);
157 f2fs_unlock_op(sbi);
158 if (err) 155 if (err)
159 goto out; 156 goto out;
157 f2fs_unlock_op(sbi);
160 158
161 d_instantiate(dentry, inode); 159 d_instantiate(dentry, inode);
162 return 0; 160 return 0;
163out: 161out:
164 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 162 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
165 iput(inode); 163 iput(inode);
164 f2fs_unlock_op(sbi);
166 return err; 165 return err;
167} 166}
168 167
@@ -203,7 +202,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
203 202
204static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 203static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
205{ 204{
206 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 205 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
207 struct inode *inode = dentry->d_inode; 206 struct inode *inode = dentry->d_inode;
208 struct f2fs_dir_entry *de; 207 struct f2fs_dir_entry *de;
209 struct page *page; 208 struct page *page;
@@ -237,7 +236,7 @@ fail:
237static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 236static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
238 const char *symname) 237 const char *symname)
239{ 238{
240 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
241 struct inode *inode; 240 struct inode *inode;
242 size_t symlen = strlen(symname) + 1; 241 size_t symlen = strlen(symname) + 1;
243 int err; 242 int err;
@@ -253,9 +252,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
253 252
254 f2fs_lock_op(sbi); 253 f2fs_lock_op(sbi);
255 err = f2fs_add_link(dentry, inode); 254 err = f2fs_add_link(dentry, inode);
256 f2fs_unlock_op(sbi);
257 if (err) 255 if (err)
258 goto out; 256 goto out;
257 f2fs_unlock_op(sbi);
259 258
260 err = page_symlink(inode, symname, symlen); 259 err = page_symlink(inode, symname, symlen);
261 alloc_nid_done(sbi, inode->i_ino); 260 alloc_nid_done(sbi, inode->i_ino);
@@ -264,15 +263,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
264 unlock_new_inode(inode); 263 unlock_new_inode(inode);
265 return err; 264 return err;
266out: 265out:
267 clear_nlink(inode); 266 handle_failed_inode(inode);
268 iget_failed(inode);
269 alloc_nid_failed(sbi, inode->i_ino);
270 return err; 267 return err;
271} 268}
272 269
273static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 270static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
274{ 271{
275 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 272 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
276 struct inode *inode; 273 struct inode *inode;
277 int err; 274 int err;
278 275
@@ -290,9 +287,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
290 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 287 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
291 f2fs_lock_op(sbi); 288 f2fs_lock_op(sbi);
292 err = f2fs_add_link(dentry, inode); 289 err = f2fs_add_link(dentry, inode);
293 f2fs_unlock_op(sbi);
294 if (err) 290 if (err)
295 goto out_fail; 291 goto out_fail;
292 f2fs_unlock_op(sbi);
296 293
297 alloc_nid_done(sbi, inode->i_ino); 294 alloc_nid_done(sbi, inode->i_ino);
298 295
@@ -303,9 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
303 300
304out_fail: 301out_fail:
305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 302 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
306 clear_nlink(inode); 303 handle_failed_inode(inode);
307 iget_failed(inode);
308 alloc_nid_failed(sbi, inode->i_ino);
309 return err; 304 return err;
310} 305}
311 306
@@ -320,7 +315,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
320static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 315static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
321 umode_t mode, dev_t rdev) 316 umode_t mode, dev_t rdev)
322{ 317{
323 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 318 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
324 struct inode *inode; 319 struct inode *inode;
325 int err = 0; 320 int err = 0;
326 321
@@ -338,25 +333,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
338 333
339 f2fs_lock_op(sbi); 334 f2fs_lock_op(sbi);
340 err = f2fs_add_link(dentry, inode); 335 err = f2fs_add_link(dentry, inode);
341 f2fs_unlock_op(sbi);
342 if (err) 336 if (err)
343 goto out; 337 goto out;
338 f2fs_unlock_op(sbi);
344 339
345 alloc_nid_done(sbi, inode->i_ino); 340 alloc_nid_done(sbi, inode->i_ino);
346 d_instantiate(dentry, inode); 341 d_instantiate(dentry, inode);
347 unlock_new_inode(inode); 342 unlock_new_inode(inode);
348 return 0; 343 return 0;
349out: 344out:
350 clear_nlink(inode); 345 handle_failed_inode(inode);
351 iget_failed(inode);
352 alloc_nid_failed(sbi, inode->i_ino);
353 return err; 346 return err;
354} 347}
355 348
356static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 349static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
357 struct inode *new_dir, struct dentry *new_dentry) 350 struct inode *new_dir, struct dentry *new_dentry)
358{ 351{
359 struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); 352 struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
360 struct inode *old_inode = old_dentry->d_inode; 353 struct inode *old_inode = old_dentry->d_inode;
361 struct inode *new_inode = new_dentry->d_inode; 354 struct inode *new_inode = new_dentry->d_inode;
362 struct page *old_dir_page; 355 struct page *old_dir_page;
@@ -480,8 +473,7 @@ out:
480static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, 473static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
481 struct inode *new_dir, struct dentry *new_dentry) 474 struct inode *new_dir, struct dentry *new_dentry)
482{ 475{
483 struct super_block *sb = old_dir->i_sb; 476 struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
484 struct f2fs_sb_info *sbi = F2FS_SB(sb);
485 struct inode *old_inode = old_dentry->d_inode; 477 struct inode *old_inode = old_dentry->d_inode;
486 struct inode *new_inode = new_dentry->d_inode; 478 struct inode *new_inode = new_dentry->d_inode;
487 struct page *old_dir_page, *new_dir_page; 479 struct page *old_dir_page, *new_dir_page;
@@ -642,7 +634,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
642 634
643static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 635static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
644{ 636{
645 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 637 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
646 struct inode *inode; 638 struct inode *inode;
647 int err; 639 int err;
648 640
@@ -678,10 +670,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
678release_out: 670release_out:
679 release_orphan_inode(sbi); 671 release_orphan_inode(sbi);
680out: 672out:
681 f2fs_unlock_op(sbi); 673 handle_failed_inode(inode);
682 clear_nlink(inode);
683 iget_failed(inode);
684 alloc_nid_failed(sbi, inode->i_ino);
685 return err; 674 return err;
686} 675}
687 676
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 45378196e19a..44b8afef43d9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -54,7 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
54static void clear_node_page_dirty(struct page *page) 54static void clear_node_page_dirty(struct page *page)
55{ 55{
56 struct address_space *mapping = page->mapping; 56 struct address_space *mapping = page->mapping;
57 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
58 unsigned int long flags; 57 unsigned int long flags;
59 58
60 if (PageDirty(page)) { 59 if (PageDirty(page)) {
@@ -65,7 +64,7 @@ static void clear_node_page_dirty(struct page *page)
65 spin_unlock_irqrestore(&mapping->tree_lock, flags); 64 spin_unlock_irqrestore(&mapping->tree_lock, flags);
66 65
67 clear_page_dirty_for_io(page); 66 clear_page_dirty_for_io(page);
68 dec_page_count(sbi, F2FS_DIRTY_NODES); 67 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
69 } 68 }
70 ClearPageUptodate(page); 69 ClearPageUptodate(page);
71} 70}
@@ -92,7 +91,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
92 /* get current nat block page with lock */ 91 /* get current nat block page with lock */
93 src_page = get_meta_page(sbi, src_off); 92 src_page = get_meta_page(sbi, src_off);
94 dst_page = grab_meta_page(sbi, dst_off); 93 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page)); 94 f2fs_bug_on(sbi, PageDirty(src_page));
96 95
97 src_addr = page_address(src_page); 96 src_addr = page_address(src_page);
98 dst_addr = page_address(dst_page); 97 dst_addr = page_address(dst_page);
@@ -124,44 +123,99 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
124 kmem_cache_free(nat_entry_slab, e); 123 kmem_cache_free(nat_entry_slab, e);
125} 124}
126 125
127int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 126static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
127 struct nat_entry *ne)
128{
129 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
130 struct nat_entry_set *head;
131
132 if (get_nat_flag(ne, IS_DIRTY))
133 return;
134retry:
135 head = radix_tree_lookup(&nm_i->nat_set_root, set);
136 if (!head) {
137 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
138
139 INIT_LIST_HEAD(&head->entry_list);
140 INIT_LIST_HEAD(&head->set_list);
141 head->set = set;
142 head->entry_cnt = 0;
143
144 if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
145 cond_resched();
146 goto retry;
147 }
148 }
149 list_move_tail(&ne->list, &head->entry_list);
150 nm_i->dirty_nat_cnt++;
151 head->entry_cnt++;
152 set_nat_flag(ne, IS_DIRTY, true);
153}
154
155static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
156 struct nat_entry *ne)
157{
158 nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
159 struct nat_entry_set *head;
160
161 head = radix_tree_lookup(&nm_i->nat_set_root, set);
162 if (head) {
163 list_move_tail(&ne->list, &nm_i->nat_entries);
164 set_nat_flag(ne, IS_DIRTY, false);
165 head->entry_cnt--;
166 nm_i->dirty_nat_cnt--;
167 }
168}
169
170static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
171 nid_t start, unsigned int nr, struct nat_entry_set **ep)
172{
173 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
174 start, nr);
175}
176
177bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
128{ 178{
129 struct f2fs_nm_info *nm_i = NM_I(sbi); 179 struct f2fs_nm_info *nm_i = NM_I(sbi);
130 struct nat_entry *e; 180 struct nat_entry *e;
131 int is_cp = 1; 181 bool is_cp = true;
132 182
133 read_lock(&nm_i->nat_tree_lock); 183 read_lock(&nm_i->nat_tree_lock);
134 e = __lookup_nat_cache(nm_i, nid); 184 e = __lookup_nat_cache(nm_i, nid);
135 if (e && !e->checkpointed) 185 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
136 is_cp = 0; 186 is_cp = false;
137 read_unlock(&nm_i->nat_tree_lock); 187 read_unlock(&nm_i->nat_tree_lock);
138 return is_cp; 188 return is_cp;
139} 189}
140 190
141bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) 191bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
142{ 192{
143 struct f2fs_nm_info *nm_i = NM_I(sbi); 193 struct f2fs_nm_info *nm_i = NM_I(sbi);
144 struct nat_entry *e; 194 struct nat_entry *e;
145 bool fsync_done = false; 195 bool fsynced = false;
146 196
147 read_lock(&nm_i->nat_tree_lock); 197 read_lock(&nm_i->nat_tree_lock);
148 e = __lookup_nat_cache(nm_i, nid); 198 e = __lookup_nat_cache(nm_i, ino);
149 if (e) 199 if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
150 fsync_done = e->fsync_done; 200 fsynced = true;
151 read_unlock(&nm_i->nat_tree_lock); 201 read_unlock(&nm_i->nat_tree_lock);
152 return fsync_done; 202 return fsynced;
153} 203}
154 204
155void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) 205bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
156{ 206{
157 struct f2fs_nm_info *nm_i = NM_I(sbi); 207 struct f2fs_nm_info *nm_i = NM_I(sbi);
158 struct nat_entry *e; 208 struct nat_entry *e;
209 bool need_update = true;
159 210
160 write_lock(&nm_i->nat_tree_lock); 211 read_lock(&nm_i->nat_tree_lock);
161 e = __lookup_nat_cache(nm_i, nid); 212 e = __lookup_nat_cache(nm_i, ino);
162 if (e) 213 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
163 e->fsync_done = false; 214 (get_nat_flag(e, IS_CHECKPOINTED) ||
164 write_unlock(&nm_i->nat_tree_lock); 215 get_nat_flag(e, HAS_FSYNCED_INODE)))
216 need_update = false;
217 read_unlock(&nm_i->nat_tree_lock);
218 return need_update;
165} 219}
166 220
167static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 221static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
@@ -177,7 +231,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
177 } 231 }
178 memset(new, 0, sizeof(struct nat_entry)); 232 memset(new, 0, sizeof(struct nat_entry));
179 nat_set_nid(new, nid); 233 nat_set_nid(new, nid);
180 new->checkpointed = true; 234 nat_reset_flag(new);
181 list_add_tail(&new->list, &nm_i->nat_entries); 235 list_add_tail(&new->list, &nm_i->nat_entries);
182 nm_i->nat_cnt++; 236 nm_i->nat_cnt++;
183 return new; 237 return new;
@@ -216,7 +270,7 @@ retry:
216 goto retry; 270 goto retry;
217 } 271 }
218 e->ni = *ni; 272 e->ni = *ni;
219 f2fs_bug_on(ni->blk_addr == NEW_ADDR); 273 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
220 } else if (new_blkaddr == NEW_ADDR) { 274 } else if (new_blkaddr == NEW_ADDR) {
221 /* 275 /*
222 * when nid is reallocated, 276 * when nid is reallocated,
@@ -224,16 +278,16 @@ retry:
224 * So, reinitialize it with new information. 278 * So, reinitialize it with new information.
225 */ 279 */
226 e->ni = *ni; 280 e->ni = *ni;
227 f2fs_bug_on(ni->blk_addr != NULL_ADDR); 281 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
228 } 282 }
229 283
230 /* sanity check */ 284 /* sanity check */
231 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 285 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
232 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 286 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
233 new_blkaddr == NULL_ADDR); 287 new_blkaddr == NULL_ADDR);
234 f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && 288 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
235 new_blkaddr == NEW_ADDR); 289 new_blkaddr == NEW_ADDR);
236 f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && 290 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
237 nat_get_blkaddr(e) != NULL_ADDR && 291 nat_get_blkaddr(e) != NULL_ADDR &&
238 new_blkaddr == NEW_ADDR); 292 new_blkaddr == NEW_ADDR);
239 293
@@ -245,12 +299,17 @@ retry:
245 299
246 /* change address */ 300 /* change address */
247 nat_set_blkaddr(e, new_blkaddr); 301 nat_set_blkaddr(e, new_blkaddr);
302 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
303 set_nat_flag(e, IS_CHECKPOINTED, false);
248 __set_nat_cache_dirty(nm_i, e); 304 __set_nat_cache_dirty(nm_i, e);
249 305
250 /* update fsync_mark if its inode nat entry is still alive */ 306 /* update fsync_mark if its inode nat entry is still alive */
251 e = __lookup_nat_cache(nm_i, ni->ino); 307 e = __lookup_nat_cache(nm_i, ni->ino);
252 if (e) 308 if (e) {
253 e->fsync_done = fsync_done; 309 if (fsync_done && ni->nid == ni->ino)
310 set_nat_flag(e, HAS_FSYNCED_INODE, true);
311 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
312 }
254 write_unlock(&nm_i->nat_tree_lock); 313 write_unlock(&nm_i->nat_tree_lock);
255} 314}
256 315
@@ -411,7 +470,7 @@ got:
411 */ 470 */
412int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 471int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
413{ 472{
414 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 473 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
415 struct page *npage[4]; 474 struct page *npage[4];
416 struct page *parent; 475 struct page *parent;
417 int offset[4]; 476 int offset[4];
@@ -504,15 +563,15 @@ release_out:
504 563
505static void truncate_node(struct dnode_of_data *dn) 564static void truncate_node(struct dnode_of_data *dn)
506{ 565{
507 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 566 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
508 struct node_info ni; 567 struct node_info ni;
509 568
510 get_node_info(sbi, dn->nid, &ni); 569 get_node_info(sbi, dn->nid, &ni);
511 if (dn->inode->i_blocks == 0) { 570 if (dn->inode->i_blocks == 0) {
512 f2fs_bug_on(ni.blk_addr != NULL_ADDR); 571 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
513 goto invalidate; 572 goto invalidate;
514 } 573 }
515 f2fs_bug_on(ni.blk_addr == NULL_ADDR); 574 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
516 575
517 /* Deallocate node address */ 576 /* Deallocate node address */
518 invalidate_blocks(sbi, ni.blk_addr); 577 invalidate_blocks(sbi, ni.blk_addr);
@@ -540,14 +599,13 @@ invalidate:
540 599
541static int truncate_dnode(struct dnode_of_data *dn) 600static int truncate_dnode(struct dnode_of_data *dn)
542{ 601{
543 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
544 struct page *page; 602 struct page *page;
545 603
546 if (dn->nid == 0) 604 if (dn->nid == 0)
547 return 1; 605 return 1;
548 606
549 /* get direct node */ 607 /* get direct node */
550 page = get_node_page(sbi, dn->nid); 608 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
551 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 609 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
552 return 1; 610 return 1;
553 else if (IS_ERR(page)) 611 else if (IS_ERR(page))
@@ -564,7 +622,6 @@ static int truncate_dnode(struct dnode_of_data *dn)
564static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 622static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
565 int ofs, int depth) 623 int ofs, int depth)
566{ 624{
567 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
568 struct dnode_of_data rdn = *dn; 625 struct dnode_of_data rdn = *dn;
569 struct page *page; 626 struct page *page;
570 struct f2fs_node *rn; 627 struct f2fs_node *rn;
@@ -578,7 +635,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
578 635
579 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 636 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
580 637
581 page = get_node_page(sbi, dn->nid); 638 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
582 if (IS_ERR(page)) { 639 if (IS_ERR(page)) {
583 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 640 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
584 return PTR_ERR(page); 641 return PTR_ERR(page);
@@ -636,7 +693,6 @@ out_err:
636static int truncate_partial_nodes(struct dnode_of_data *dn, 693static int truncate_partial_nodes(struct dnode_of_data *dn,
637 struct f2fs_inode *ri, int *offset, int depth) 694 struct f2fs_inode *ri, int *offset, int depth)
638{ 695{
639 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
640 struct page *pages[2]; 696 struct page *pages[2];
641 nid_t nid[3]; 697 nid_t nid[3];
642 nid_t child_nid; 698 nid_t child_nid;
@@ -651,7 +707,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
651 /* get indirect nodes in the path */ 707 /* get indirect nodes in the path */
652 for (i = 0; i < idx + 1; i++) { 708 for (i = 0; i < idx + 1; i++) {
653 /* reference count'll be increased */ 709 /* reference count'll be increased */
654 pages[i] = get_node_page(sbi, nid[i]); 710 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
655 if (IS_ERR(pages[i])) { 711 if (IS_ERR(pages[i])) {
656 err = PTR_ERR(pages[i]); 712 err = PTR_ERR(pages[i]);
657 idx = i - 1; 713 idx = i - 1;
@@ -696,7 +752,7 @@ fail:
696 */ 752 */
697int truncate_inode_blocks(struct inode *inode, pgoff_t from) 753int truncate_inode_blocks(struct inode *inode, pgoff_t from)
698{ 754{
699 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 755 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
700 int err = 0, cont = 1; 756 int err = 0, cont = 1;
701 int level, offset[4], noffset[4]; 757 int level, offset[4], noffset[4];
702 unsigned int nofs = 0; 758 unsigned int nofs = 0;
@@ -792,7 +848,7 @@ fail:
792 848
793int truncate_xattr_node(struct inode *inode, struct page *page) 849int truncate_xattr_node(struct inode *inode, struct page *page)
794{ 850{
795 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 851 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
796 nid_t nid = F2FS_I(inode)->i_xattr_nid; 852 nid_t nid = F2FS_I(inode)->i_xattr_nid;
797 struct dnode_of_data dn; 853 struct dnode_of_data dn;
798 struct page *npage; 854 struct page *npage;
@@ -840,7 +896,8 @@ void remove_inode_page(struct inode *inode)
840 truncate_data_blocks_range(&dn, 1); 896 truncate_data_blocks_range(&dn, 1);
841 897
842 /* 0 is possible, after f2fs_new_inode() has failed */ 898 /* 0 is possible, after f2fs_new_inode() has failed */
843 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 899 f2fs_bug_on(F2FS_I_SB(inode),
900 inode->i_blocks != 0 && inode->i_blocks != 1);
844 901
845 /* will put inode & node pages */ 902 /* will put inode & node pages */
846 truncate_node(&dn); 903 truncate_node(&dn);
@@ -860,7 +917,7 @@ struct page *new_inode_page(struct inode *inode)
860struct page *new_node_page(struct dnode_of_data *dn, 917struct page *new_node_page(struct dnode_of_data *dn,
861 unsigned int ofs, struct page *ipage) 918 unsigned int ofs, struct page *ipage)
862{ 919{
863 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 920 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
864 struct node_info old_ni, new_ni; 921 struct node_info old_ni, new_ni;
865 struct page *page; 922 struct page *page;
866 int err; 923 int err;
@@ -880,7 +937,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
880 get_node_info(sbi, dn->nid, &old_ni); 937 get_node_info(sbi, dn->nid, &old_ni);
881 938
882 /* Reinitialize old_ni with new node page */ 939 /* Reinitialize old_ni with new node page */
883 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 940 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
884 new_ni = old_ni; 941 new_ni = old_ni;
885 new_ni.ino = dn->inode->i_ino; 942 new_ni.ino = dn->inode->i_ino;
886 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 943 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
@@ -918,7 +975,7 @@ fail:
918 */ 975 */
919static int read_node_page(struct page *page, int rw) 976static int read_node_page(struct page *page, int rw)
920{ 977{
921 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 978 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
922 struct node_info ni; 979 struct node_info ni;
923 980
924 get_node_info(sbi, page->index, &ni); 981 get_node_info(sbi, page->index, &ni);
@@ -994,7 +1051,7 @@ got_it:
994 */ 1051 */
995struct page *get_node_page_ra(struct page *parent, int start) 1052struct page *get_node_page_ra(struct page *parent, int start)
996{ 1053{
997 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 1054 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
998 struct blk_plug plug; 1055 struct blk_plug plug;
999 struct page *page; 1056 struct page *page;
1000 int err, i, end; 1057 int err, i, end;
@@ -1124,10 +1181,14 @@ continue_unlock:
1124 1181
1125 /* called by fsync() */ 1182 /* called by fsync() */
1126 if (ino && IS_DNODE(page)) { 1183 if (ino && IS_DNODE(page)) {
1127 int mark = !is_checkpointed_node(sbi, ino);
1128 set_fsync_mark(page, 1); 1184 set_fsync_mark(page, 1);
1129 if (IS_INODE(page)) 1185 if (IS_INODE(page)) {
1130 set_dentry_mark(page, mark); 1186 if (!is_checkpointed_node(sbi, ino) &&
1187 !has_fsynced_inode(sbi, ino))
1188 set_dentry_mark(page, 1);
1189 else
1190 set_dentry_mark(page, 0);
1191 }
1131 nwritten++; 1192 nwritten++;
1132 } else { 1193 } else {
1133 set_fsync_mark(page, 0); 1194 set_fsync_mark(page, 0);
@@ -1206,7 +1267,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1206static int f2fs_write_node_page(struct page *page, 1267static int f2fs_write_node_page(struct page *page,
1207 struct writeback_control *wbc) 1268 struct writeback_control *wbc)
1208{ 1269{
1209 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1270 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1210 nid_t nid; 1271 nid_t nid;
1211 block_t new_addr; 1272 block_t new_addr;
1212 struct node_info ni; 1273 struct node_info ni;
@@ -1226,7 +1287,7 @@ static int f2fs_write_node_page(struct page *page,
1226 1287
1227 /* get old block addr of this node page */ 1288 /* get old block addr of this node page */
1228 nid = nid_of_node(page); 1289 nid = nid_of_node(page);
1229 f2fs_bug_on(page->index != nid); 1290 f2fs_bug_on(sbi, page->index != nid);
1230 1291
1231 get_node_info(sbi, nid, &ni); 1292 get_node_info(sbi, nid, &ni);
1232 1293
@@ -1257,7 +1318,7 @@ redirty_out:
1257static int f2fs_write_node_pages(struct address_space *mapping, 1318static int f2fs_write_node_pages(struct address_space *mapping,
1258 struct writeback_control *wbc) 1319 struct writeback_control *wbc)
1259{ 1320{
1260 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1321 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1261 long diff; 1322 long diff;
1262 1323
1263 trace_f2fs_writepages(mapping->host, wbc, NODE); 1324 trace_f2fs_writepages(mapping->host, wbc, NODE);
@@ -1282,15 +1343,12 @@ skip_write:
1282 1343
1283static int f2fs_set_node_page_dirty(struct page *page) 1344static int f2fs_set_node_page_dirty(struct page *page)
1284{ 1345{
1285 struct address_space *mapping = page->mapping;
1286 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1287
1288 trace_f2fs_set_page_dirty(page, NODE); 1346 trace_f2fs_set_page_dirty(page, NODE);
1289 1347
1290 SetPageUptodate(page); 1348 SetPageUptodate(page);
1291 if (!PageDirty(page)) { 1349 if (!PageDirty(page)) {
1292 __set_page_dirty_nobuffers(page); 1350 __set_page_dirty_nobuffers(page);
1293 inc_page_count(sbi, F2FS_DIRTY_NODES); 1351 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1294 SetPagePrivate(page); 1352 SetPagePrivate(page);
1295 return 1; 1353 return 1;
1296 } 1354 }
@@ -1301,9 +1359,8 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1301 unsigned int length) 1359 unsigned int length)
1302{ 1360{
1303 struct inode *inode = page->mapping->host; 1361 struct inode *inode = page->mapping->host;
1304 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1305 if (PageDirty(page)) 1362 if (PageDirty(page))
1306 dec_page_count(sbi, F2FS_DIRTY_NODES); 1363 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES);
1307 ClearPagePrivate(page); 1364 ClearPagePrivate(page);
1308} 1365}
1309 1366
@@ -1356,7 +1413,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1356 read_lock(&nm_i->nat_tree_lock); 1413 read_lock(&nm_i->nat_tree_lock);
1357 ne = __lookup_nat_cache(nm_i, nid); 1414 ne = __lookup_nat_cache(nm_i, nid);
1358 if (ne && 1415 if (ne &&
1359 (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) 1416 (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1417 nat_get_blkaddr(ne) != NULL_ADDR))
1360 allocated = true; 1418 allocated = true;
1361 read_unlock(&nm_i->nat_tree_lock); 1419 read_unlock(&nm_i->nat_tree_lock);
1362 if (allocated) 1420 if (allocated)
@@ -1413,7 +1471,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1413 break; 1471 break;
1414 1472
1415 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1473 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1416 f2fs_bug_on(blk_addr == NEW_ADDR); 1474 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1417 if (blk_addr == NULL_ADDR) { 1475 if (blk_addr == NULL_ADDR) {
1418 if (add_free_nid(sbi, start_nid, true) < 0) 1476 if (add_free_nid(sbi, start_nid, true) < 0)
1419 break; 1477 break;
@@ -1483,12 +1541,12 @@ retry:
1483 1541
1484 /* We should not use stale free nids created by build_free_nids */ 1542 /* We should not use stale free nids created by build_free_nids */
1485 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1543 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1486 f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1544 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1487 list_for_each_entry(i, &nm_i->free_nid_list, list) 1545 list_for_each_entry(i, &nm_i->free_nid_list, list)
1488 if (i->state == NID_NEW) 1546 if (i->state == NID_NEW)
1489 break; 1547 break;
1490 1548
1491 f2fs_bug_on(i->state != NID_NEW); 1549 f2fs_bug_on(sbi, i->state != NID_NEW);
1492 *nid = i->nid; 1550 *nid = i->nid;
1493 i->state = NID_ALLOC; 1551 i->state = NID_ALLOC;
1494 nm_i->fcnt--; 1552 nm_i->fcnt--;
@@ -1514,7 +1572,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1514 1572
1515 spin_lock(&nm_i->free_nid_list_lock); 1573 spin_lock(&nm_i->free_nid_list_lock);
1516 i = __lookup_free_nid_list(nm_i, nid); 1574 i = __lookup_free_nid_list(nm_i, nid);
1517 f2fs_bug_on(!i || i->state != NID_ALLOC); 1575 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1518 __del_from_free_nid_list(nm_i, i); 1576 __del_from_free_nid_list(nm_i, i);
1519 spin_unlock(&nm_i->free_nid_list_lock); 1577 spin_unlock(&nm_i->free_nid_list_lock);
1520 1578
@@ -1535,7 +1593,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1535 1593
1536 spin_lock(&nm_i->free_nid_list_lock); 1594 spin_lock(&nm_i->free_nid_list_lock);
1537 i = __lookup_free_nid_list(nm_i, nid); 1595 i = __lookup_free_nid_list(nm_i, nid);
1538 f2fs_bug_on(!i || i->state != NID_ALLOC); 1596 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1539 if (!available_free_memory(sbi, FREE_NIDS)) { 1597 if (!available_free_memory(sbi, FREE_NIDS)) {
1540 __del_from_free_nid_list(nm_i, i); 1598 __del_from_free_nid_list(nm_i, i);
1541 need_free = true; 1599 need_free = true;
@@ -1551,14 +1609,13 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1551 1609
1552void recover_inline_xattr(struct inode *inode, struct page *page) 1610void recover_inline_xattr(struct inode *inode, struct page *page)
1553{ 1611{
1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1555 void *src_addr, *dst_addr; 1612 void *src_addr, *dst_addr;
1556 size_t inline_size; 1613 size_t inline_size;
1557 struct page *ipage; 1614 struct page *ipage;
1558 struct f2fs_inode *ri; 1615 struct f2fs_inode *ri;
1559 1616
1560 ipage = get_node_page(sbi, inode->i_ino); 1617 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
1561 f2fs_bug_on(IS_ERR(ipage)); 1618 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
1562 1619
1563 ri = F2FS_INODE(page); 1620 ri = F2FS_INODE(page);
1564 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { 1621 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
@@ -1579,7 +1636,7 @@ update_inode:
1579 1636
1580void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1637void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1581{ 1638{
1582 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1639 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1640 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1584 nid_t new_xnid = nid_of_node(page); 1641 nid_t new_xnid = nid_of_node(page);
1585 struct node_info ni; 1642 struct node_info ni;
@@ -1590,7 +1647,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1590 1647
1591 /* Deallocate node address */ 1648 /* Deallocate node address */
1592 get_node_info(sbi, prev_xnid, &ni); 1649 get_node_info(sbi, prev_xnid, &ni);
1593 f2fs_bug_on(ni.blk_addr == NULL_ADDR); 1650 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
1594 invalidate_blocks(sbi, ni.blk_addr); 1651 invalidate_blocks(sbi, ni.blk_addr);
1595 dec_valid_node_count(sbi, inode); 1652 dec_valid_node_count(sbi, inode);
1596 set_node_addr(sbi, &ni, NULL_ADDR, false); 1653 set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -1598,7 +1655,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1598recover_xnid: 1655recover_xnid:
1599 /* 2: allocate new xattr nid */ 1656 /* 2: allocate new xattr nid */
1600 if (unlikely(!inc_valid_node_count(sbi, inode))) 1657 if (unlikely(!inc_valid_node_count(sbi, inode)))
1601 f2fs_bug_on(1); 1658 f2fs_bug_on(sbi, 1);
1602 1659
1603 remove_free_nid(NM_I(sbi), new_xnid); 1660 remove_free_nid(NM_I(sbi), new_xnid);
1604 get_node_info(sbi, new_xnid, &ni); 1661 get_node_info(sbi, new_xnid, &ni);
@@ -1691,7 +1748,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1691 struct f2fs_summary *sum_entry; 1748 struct f2fs_summary *sum_entry;
1692 struct inode *inode = sbi->sb->s_bdev->bd_inode; 1749 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1693 block_t addr; 1750 block_t addr;
1694 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1751 int bio_blocks = MAX_BIO_BLOCKS(sbi);
1695 struct page *pages[bio_blocks]; 1752 struct page *pages[bio_blocks];
1696 int i, idx, last_offset, nrpages, err = 0; 1753 int i, idx, last_offset, nrpages, err = 0;
1697 1754
@@ -1733,89 +1790,6 @@ skip:
1733 return err; 1790 return err;
1734} 1791}
1735 1792
1736static struct nat_entry_set *grab_nat_entry_set(void)
1737{
1738 struct nat_entry_set *nes =
1739 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1740
1741 nes->entry_cnt = 0;
1742 INIT_LIST_HEAD(&nes->set_list);
1743 INIT_LIST_HEAD(&nes->entry_list);
1744 return nes;
1745}
1746
1747static void release_nat_entry_set(struct nat_entry_set *nes,
1748 struct f2fs_nm_info *nm_i)
1749{
1750 f2fs_bug_on(!list_empty(&nes->entry_list));
1751
1752 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1753 list_del(&nes->set_list);
1754 kmem_cache_free(nat_entry_set_slab, nes);
1755}
1756
1757static void adjust_nat_entry_set(struct nat_entry_set *nes,
1758 struct list_head *head)
1759{
1760 struct nat_entry_set *next = nes;
1761
1762 if (list_is_last(&nes->set_list, head))
1763 return;
1764
1765 list_for_each_entry_continue(next, head, set_list)
1766 if (nes->entry_cnt <= next->entry_cnt)
1767 break;
1768
1769 list_move_tail(&nes->set_list, &next->set_list);
1770}
1771
1772static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1773{
1774 struct nat_entry_set *nes;
1775 nid_t start_nid = START_NID(ne->ni.nid);
1776
1777 list_for_each_entry(nes, head, set_list) {
1778 if (nes->start_nid == start_nid) {
1779 list_move_tail(&ne->list, &nes->entry_list);
1780 nes->entry_cnt++;
1781 adjust_nat_entry_set(nes, head);
1782 return;
1783 }
1784 }
1785
1786 nes = grab_nat_entry_set();
1787
1788 nes->start_nid = start_nid;
1789 list_move_tail(&ne->list, &nes->entry_list);
1790 nes->entry_cnt++;
1791 list_add(&nes->set_list, head);
1792}
1793
1794static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1795{
1796 struct f2fs_nm_info *nm_i = NM_I(sbi);
1797 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1798 struct list_head *set_list = &nm_i->nat_entry_set;
1799 struct nat_entry *ne, *tmp;
1800
1801 write_lock(&nm_i->nat_tree_lock);
1802 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1803 if (nat_get_blkaddr(ne) == NEW_ADDR)
1804 continue;
1805 add_nat_entry(ne, set_list);
1806 nm_i->dirty_nat_cnt++;
1807 }
1808 write_unlock(&nm_i->nat_tree_lock);
1809}
1810
1811static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1812{
1813 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1814 return true;
1815 else
1816 return false;
1817}
1818
1819static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1793static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1820{ 1794{
1821 struct f2fs_nm_info *nm_i = NM_I(sbi); 1795 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1850,99 +1824,130 @@ found:
1850 mutex_unlock(&curseg->curseg_mutex); 1824 mutex_unlock(&curseg->curseg_mutex);
1851} 1825}
1852 1826
1853/* 1827static void __adjust_nat_entry_set(struct nat_entry_set *nes,
1854 * This function is called during the checkpointing process. 1828 struct list_head *head, int max)
1855 */
1856void flush_nat_entries(struct f2fs_sb_info *sbi)
1857{ 1829{
1858 struct f2fs_nm_info *nm_i = NM_I(sbi); 1830 struct nat_entry_set *cur;
1859 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1860 struct f2fs_summary_block *sum = curseg->sum_blk;
1861 struct nat_entry_set *nes, *tmp;
1862 struct list_head *head = &nm_i->nat_entry_set;
1863 bool to_journal = true;
1864 1831
1865 /* merge nat entries of dirty list to nat entry set temporarily */ 1832 if (nes->entry_cnt >= max)
1866 merge_nats_in_set(sbi); 1833 goto add_out;
1867 1834
1868 /* 1835 list_for_each_entry(cur, head, set_list) {
1869 * if there are no enough space in journal to store dirty nat 1836 if (cur->entry_cnt >= nes->entry_cnt) {
1870 * entries, remove all entries from journal and merge them 1837 list_add(&nes->set_list, cur->set_list.prev);
1871 * into nat entry set. 1838 return;
1872 */ 1839 }
1873 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1874 remove_nats_in_journal(sbi);
1875
1876 /*
1877 * merge nat entries of dirty list to nat entry set temporarily
1878 */
1879 merge_nats_in_set(sbi);
1880 } 1840 }
1841add_out:
1842 list_add_tail(&nes->set_list, head);
1843}
1881 1844
1882 if (!nm_i->dirty_nat_cnt) 1845static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1883 return; 1846 struct nat_entry_set *set)
1847{
1848 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1849 struct f2fs_summary_block *sum = curseg->sum_blk;
1850 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
1851 bool to_journal = true;
1852 struct f2fs_nat_block *nat_blk;
1853 struct nat_entry *ne, *cur;
1854 struct page *page = NULL;
1884 1855
1885 /* 1856 /*
1886 * there are two steps to flush nat entries: 1857 * there are two steps to flush nat entries:
1887 * #1, flush nat entries to journal in current hot data summary block. 1858 * #1, flush nat entries to journal in current hot data summary block.
1888 * #2, flush nat entries to nat page. 1859 * #2, flush nat entries to nat page.
1889 */ 1860 */
1890 list_for_each_entry_safe(nes, tmp, head, set_list) { 1861 if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
1891 struct f2fs_nat_block *nat_blk; 1862 to_journal = false;
1892 struct nat_entry *ne, *cur;
1893 struct page *page;
1894 nid_t start_nid = nes->start_nid;
1895 1863
1896 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) 1864 if (to_journal) {
1897 to_journal = false; 1865 mutex_lock(&curseg->curseg_mutex);
1866 } else {
1867 page = get_next_nat_page(sbi, start_nid);
1868 nat_blk = page_address(page);
1869 f2fs_bug_on(sbi, !nat_blk);
1870 }
1871
1872 /* flush dirty nats in nat entry set */
1873 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
1874 struct f2fs_nat_entry *raw_ne;
1875 nid_t nid = nat_get_nid(ne);
1876 int offset;
1877
1878 if (nat_get_blkaddr(ne) == NEW_ADDR)
1879 continue;
1898 1880
1899 if (to_journal) { 1881 if (to_journal) {
1900 mutex_lock(&curseg->curseg_mutex); 1882 offset = lookup_journal_in_cursum(sum,
1883 NAT_JOURNAL, nid, 1);
1884 f2fs_bug_on(sbi, offset < 0);
1885 raw_ne = &nat_in_journal(sum, offset);
1886 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1901 } else { 1887 } else {
1902 page = get_next_nat_page(sbi, start_nid); 1888 raw_ne = &nat_blk->entries[nid - start_nid];
1903 nat_blk = page_address(page);
1904 f2fs_bug_on(!nat_blk);
1905 } 1889 }
1890 raw_nat_from_node_info(raw_ne, &ne->ni);
1906 1891
1907 /* flush dirty nats in nat entry set */ 1892 write_lock(&NM_I(sbi)->nat_tree_lock);
1908 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { 1893 nat_reset_flag(ne);
1909 struct f2fs_nat_entry *raw_ne; 1894 __clear_nat_cache_dirty(NM_I(sbi), ne);
1910 nid_t nid = nat_get_nid(ne); 1895 write_unlock(&NM_I(sbi)->nat_tree_lock);
1911 int offset;
1912 1896
1913 if (to_journal) { 1897 if (nat_get_blkaddr(ne) == NULL_ADDR)
1914 offset = lookup_journal_in_cursum(sum, 1898 add_free_nid(sbi, nid, false);
1915 NAT_JOURNAL, nid, 1); 1899 }
1916 f2fs_bug_on(offset < 0);
1917 raw_ne = &nat_in_journal(sum, offset);
1918 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1919 } else {
1920 raw_ne = &nat_blk->entries[nid - start_nid];
1921 }
1922 raw_nat_from_node_info(raw_ne, &ne->ni);
1923 1900
1924 if (nat_get_blkaddr(ne) == NULL_ADDR && 1901 if (to_journal)
1925 add_free_nid(sbi, nid, false) <= 0) { 1902 mutex_unlock(&curseg->curseg_mutex);
1926 write_lock(&nm_i->nat_tree_lock); 1903 else
1927 __del_from_nat_cache(nm_i, ne); 1904 f2fs_put_page(page, 1);
1928 write_unlock(&nm_i->nat_tree_lock);
1929 } else {
1930 write_lock(&nm_i->nat_tree_lock);
1931 __clear_nat_cache_dirty(nm_i, ne);
1932 write_unlock(&nm_i->nat_tree_lock);
1933 }
1934 }
1935 1905
1936 if (to_journal) 1906 if (!set->entry_cnt) {
1937 mutex_unlock(&curseg->curseg_mutex); 1907 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1938 else 1908 kmem_cache_free(nat_entry_set_slab, set);
1939 f2fs_put_page(page, 1); 1909 }
1910}
1911
1912/*
1913 * This function is called during the checkpointing process.
1914 */
1915void flush_nat_entries(struct f2fs_sb_info *sbi)
1916{
1917 struct f2fs_nm_info *nm_i = NM_I(sbi);
1918 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1919 struct f2fs_summary_block *sum = curseg->sum_blk;
1920 struct nat_entry_set *setvec[NATVEC_SIZE];
1921 struct nat_entry_set *set, *tmp;
1922 unsigned int found;
1923 nid_t set_idx = 0;
1924 LIST_HEAD(sets);
1925
1926 /*
1927 * if there are no enough space in journal to store dirty nat
1928 * entries, remove all entries from journal and merge them
1929 * into nat entry set.
1930 */
1931 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1932 remove_nats_in_journal(sbi);
1940 1933
1941 release_nat_entry_set(nes, nm_i); 1934 if (!nm_i->dirty_nat_cnt)
1935 return;
1936
1937 while ((found = __gang_lookup_nat_set(nm_i,
1938 set_idx, NATVEC_SIZE, setvec))) {
1939 unsigned idx;
1940 set_idx = setvec[found - 1]->set + 1;
1941 for (idx = 0; idx < found; idx++)
1942 __adjust_nat_entry_set(setvec[idx], &sets,
1943 MAX_NAT_JENTRIES(sum));
1942 } 1944 }
1943 1945
1944 f2fs_bug_on(!list_empty(head)); 1946 /* flush dirty nats in nat entry set */
1945 f2fs_bug_on(nm_i->dirty_nat_cnt); 1947 list_for_each_entry_safe(set, tmp, &sets, set_list)
1948 __flush_nat_entry_set(sbi, set);
1949
1950 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
1946} 1951}
1947 1952
1948static int init_node_manager(struct f2fs_sb_info *sbi) 1953static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1969,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1969 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 1974 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1970 INIT_LIST_HEAD(&nm_i->free_nid_list); 1975 INIT_LIST_HEAD(&nm_i->free_nid_list);
1971 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1976 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1977 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
1972 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1973 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1974 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1975 1979
1976 mutex_init(&nm_i->build_lock); 1980 mutex_init(&nm_i->build_lock);
1977 spin_lock_init(&nm_i->free_nid_list_lock); 1981 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -2020,14 +2024,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2020 /* destroy free nid list */ 2024 /* destroy free nid list */
2021 spin_lock(&nm_i->free_nid_list_lock); 2025 spin_lock(&nm_i->free_nid_list_lock);
2022 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2026 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
2023 f2fs_bug_on(i->state == NID_ALLOC); 2027 f2fs_bug_on(sbi, i->state == NID_ALLOC);
2024 __del_from_free_nid_list(nm_i, i); 2028 __del_from_free_nid_list(nm_i, i);
2025 nm_i->fcnt--; 2029 nm_i->fcnt--;
2026 spin_unlock(&nm_i->free_nid_list_lock); 2030 spin_unlock(&nm_i->free_nid_list_lock);
2027 kmem_cache_free(free_nid_slab, i); 2031 kmem_cache_free(free_nid_slab, i);
2028 spin_lock(&nm_i->free_nid_list_lock); 2032 spin_lock(&nm_i->free_nid_list_lock);
2029 } 2033 }
2030 f2fs_bug_on(nm_i->fcnt); 2034 f2fs_bug_on(sbi, nm_i->fcnt);
2031 spin_unlock(&nm_i->free_nid_list_lock); 2035 spin_unlock(&nm_i->free_nid_list_lock);
2032 2036
2033 /* destroy nat cache */ 2037 /* destroy nat cache */
@@ -2039,7 +2043,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2039 for (idx = 0; idx < found; idx++) 2043 for (idx = 0; idx < found; idx++)
2040 __del_from_nat_cache(nm_i, natvec[idx]); 2044 __del_from_nat_cache(nm_i, natvec[idx]);
2041 } 2045 }
2042 f2fs_bug_on(nm_i->nat_cnt); 2046 f2fs_bug_on(sbi, nm_i->nat_cnt);
2043 write_unlock(&nm_i->nat_tree_lock); 2047 write_unlock(&nm_i->nat_tree_lock);
2044 2048
2045 kfree(nm_i->nat_bitmap); 2049 kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 8a116a407599..8d5e6e0dd840 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -39,10 +39,16 @@ struct node_info {
39 unsigned char version; /* version of the node */ 39 unsigned char version; /* version of the node */
40}; 40};
41 41
42enum {
43 IS_CHECKPOINTED, /* is it checkpointed before? */
44 HAS_FSYNCED_INODE, /* is the inode fsynced before? */
45 HAS_LAST_FSYNC, /* has the latest node fsync mark? */
46 IS_DIRTY, /* this nat entry is dirty? */
47};
48
42struct nat_entry { 49struct nat_entry {
43 struct list_head list; /* for clean or dirty nat list */ 50 struct list_head list; /* for clean or dirty nat list */
44 bool checkpointed; /* whether it is checkpointed or not */ 51 unsigned char flag; /* for node information bits */
45 bool fsync_done; /* whether the latest node has fsync mark */
46 struct node_info ni; /* in-memory node information */ 52 struct node_info ni; /* in-memory node information */
47}; 53};
48 54
@@ -55,18 +61,32 @@ struct nat_entry {
55#define nat_get_version(nat) (nat->ni.version) 61#define nat_get_version(nat) (nat->ni.version)
56#define nat_set_version(nat, v) (nat->ni.version = v) 62#define nat_set_version(nat, v) (nat->ni.version = v)
57 63
58#define __set_nat_cache_dirty(nm_i, ne) \
59 do { \
60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0)
63#define __clear_nat_cache_dirty(nm_i, ne) \
64 do { \
65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0)
68#define inc_node_version(version) (++version) 64#define inc_node_version(version) (++version)
69 65
66static inline void set_nat_flag(struct nat_entry *ne,
67 unsigned int type, bool set)
68{
69 unsigned char mask = 0x01 << type;
70 if (set)
71 ne->flag |= mask;
72 else
73 ne->flag &= ~mask;
74}
75
76static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
77{
78 unsigned char mask = 0x01 << type;
79 return ne->flag & mask;
80}
81
82static inline void nat_reset_flag(struct nat_entry *ne)
83{
84 /* these states can be set only after checkpoint was done */
85 set_nat_flag(ne, IS_CHECKPOINTED, true);
86 set_nat_flag(ne, HAS_FSYNCED_INODE, false);
87 set_nat_flag(ne, HAS_LAST_FSYNC, true);
88}
89
70static inline void node_info_from_raw_nat(struct node_info *ni, 90static inline void node_info_from_raw_nat(struct node_info *ni,
71 struct f2fs_nat_entry *raw_ne) 91 struct f2fs_nat_entry *raw_ne)
72{ 92{
@@ -90,9 +110,9 @@ enum mem_type {
90}; 110};
91 111
92struct nat_entry_set { 112struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */ 113 struct list_head set_list; /* link with other nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */ 114 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */ 115 nid_t set; /* set number*/
96 unsigned int entry_cnt; /* the # of nat entries in set */ 116 unsigned int entry_cnt; /* the # of nat entries in set */
97}; 117};
98 118
@@ -110,18 +130,19 @@ struct free_nid {
110 int state; /* in use or not: NID_NEW or NID_ALLOC */ 130 int state; /* in use or not: NID_NEW or NID_ALLOC */
111}; 131};
112 132
113static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) 133static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
114{ 134{
115 struct f2fs_nm_info *nm_i = NM_I(sbi); 135 struct f2fs_nm_info *nm_i = NM_I(sbi);
116 struct free_nid *fnid; 136 struct free_nid *fnid;
117 137
118 if (nm_i->fcnt <= 0)
119 return -1;
120 spin_lock(&nm_i->free_nid_list_lock); 138 spin_lock(&nm_i->free_nid_list_lock);
139 if (nm_i->fcnt <= 0) {
140 spin_unlock(&nm_i->free_nid_list_lock);
141 return;
142 }
121 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); 143 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
122 *nid = fnid->nid; 144 *nid = fnid->nid;
123 spin_unlock(&nm_i->free_nid_list_lock); 145 spin_unlock(&nm_i->free_nid_list_lock);
124 return 0;
125} 146}
126 147
127/* 148/*
@@ -197,8 +218,7 @@ static inline void copy_node_footer(struct page *dst, struct page *src)
197 218
198static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) 219static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
199{ 220{
200 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 221 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
201 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
202 struct f2fs_node *rn = F2FS_NODE(page); 222 struct f2fs_node *rn = F2FS_NODE(page);
203 223
204 rn->footer.cp_ver = ckpt->checkpoint_ver; 224 rn->footer.cp_ver = ckpt->checkpoint_ver;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 756c41cd2582..ebd013225788 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -14,6 +14,37 @@
14#include "node.h" 14#include "node.h"
15#include "segment.h" 15#include "segment.h"
16 16
17/*
18 * Roll forward recovery scenarios.
19 *
20 * [Term] F: fsync_mark, D: dentry_mark
21 *
22 * 1. inode(x) | CP | inode(x) | dnode(F)
23 * -> Update the latest inode(x).
24 *
25 * 2. inode(x) | CP | inode(F) | dnode(F)
26 * -> No problem.
27 *
28 * 3. inode(x) | CP | dnode(F) | inode(x)
29 * -> Recover to the latest dnode(F), and drop the last inode(x)
30 *
31 * 4. inode(x) | CP | dnode(F) | inode(F)
32 * -> No problem.
33 *
34 * 5. CP | inode(x) | dnode(F)
35 * -> The inode(DF) was missing. Should drop this dnode(F).
36 *
37 * 6. CP | inode(DF) | dnode(F)
38 * -> No problem.
39 *
40 * 7. CP | dnode(F) | inode(DF)
41 * -> If f2fs_iget fails, then goto next to find inode(DF).
42 *
43 * 8. CP | dnode(F) | inode(x)
44 * -> If f2fs_iget fails, then goto next to find inode(DF).
45 * But it will fail due to no inode(DF).
46 */
47
17static struct kmem_cache *fsync_entry_slab; 48static struct kmem_cache *fsync_entry_slab;
18 49
19bool space_for_roll_forward(struct f2fs_sb_info *sbi) 50bool space_for_roll_forward(struct f2fs_sb_info *sbi)
@@ -36,7 +67,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
36 return NULL; 67 return NULL;
37} 68}
38 69
39static int recover_dentry(struct page *ipage, struct inode *inode) 70static int recover_dentry(struct inode *inode, struct page *ipage)
40{ 71{
41 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 72 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
42 nid_t pino = le32_to_cpu(raw_inode->i_pino); 73 nid_t pino = le32_to_cpu(raw_inode->i_pino);
@@ -75,7 +106,7 @@ retry:
75 err = -EEXIST; 106 err = -EEXIST;
76 goto out_unmap_put; 107 goto out_unmap_put;
77 } 108 }
78 err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); 109 err = acquire_orphan_inode(F2FS_I_SB(inode));
79 if (err) { 110 if (err) {
80 iput(einode); 111 iput(einode);
81 goto out_unmap_put; 112 goto out_unmap_put;
@@ -110,35 +141,28 @@ out:
110 return err; 141 return err;
111} 142}
112 143
113static int recover_inode(struct inode *inode, struct page *node_page) 144static void recover_inode(struct inode *inode, struct page *page)
114{ 145{
115 struct f2fs_inode *raw_inode = F2FS_INODE(node_page); 146 struct f2fs_inode *raw = F2FS_INODE(page);
116
117 if (!IS_INODE(node_page))
118 return 0;
119 147
120 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 148 inode->i_mode = le16_to_cpu(raw->i_mode);
121 i_size_write(inode, le64_to_cpu(raw_inode->i_size)); 149 i_size_write(inode, le64_to_cpu(raw->i_size));
122 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 150 inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
123 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 151 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
124 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 152 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
125 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 153 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
126 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 154 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
127 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 155 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
128
129 if (is_dent_dnode(node_page))
130 return recover_dentry(node_page, inode);
131 156
132 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", 157 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
133 ino_of_node(node_page), raw_inode->i_name); 158 ino_of_node(page), F2FS_INODE(page)->i_name);
134 return 0;
135} 159}
136 160
137static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 161static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
138{ 162{
139 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 163 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
140 struct curseg_info *curseg; 164 struct curseg_info *curseg;
141 struct page *page; 165 struct page *page = NULL;
142 block_t blkaddr; 166 block_t blkaddr;
143 int err = 0; 167 int err = 0;
144 168
@@ -146,20 +170,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
146 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 170 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
147 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 171 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
148 172
149 /* read node page */
150 page = alloc_page(GFP_F2FS_ZERO);
151 if (!page)
152 return -ENOMEM;
153 lock_page(page);
154
155 while (1) { 173 while (1) {
156 struct fsync_inode_entry *entry; 174 struct fsync_inode_entry *entry;
157 175
158 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 176 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
159 if (err) 177 return 0;
160 return err;
161 178
162 lock_page(page); 179 page = get_meta_page_ra(sbi, blkaddr);
163 180
164 if (cp_ver != cpver_of_node(page)) 181 if (cp_ver != cpver_of_node(page))
165 break; 182 break;
@@ -180,33 +197,38 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
180 } 197 }
181 198
182 /* add this fsync inode to the list */ 199 /* add this fsync inode to the list */
183 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); 200 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
184 if (!entry) { 201 if (!entry) {
185 err = -ENOMEM; 202 err = -ENOMEM;
186 break; 203 break;
187 } 204 }
188 205 /*
206 * CP | dnode(F) | inode(DF)
207 * For this case, we should not give up now.
208 */
189 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 209 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
190 if (IS_ERR(entry->inode)) { 210 if (IS_ERR(entry->inode)) {
191 err = PTR_ERR(entry->inode); 211 err = PTR_ERR(entry->inode);
192 kmem_cache_free(fsync_entry_slab, entry); 212 kmem_cache_free(fsync_entry_slab, entry);
213 if (err == -ENOENT)
214 goto next;
193 break; 215 break;
194 } 216 }
195 list_add_tail(&entry->list, head); 217 list_add_tail(&entry->list, head);
196 } 218 }
197 entry->blkaddr = blkaddr; 219 entry->blkaddr = blkaddr;
198 220
199 err = recover_inode(entry->inode, page); 221 if (IS_INODE(page)) {
200 if (err && err != -ENOENT) 222 entry->last_inode = blkaddr;
201 break; 223 if (is_dent_dnode(page))
224 entry->last_dentry = blkaddr;
225 }
202next: 226next:
203 /* check next segment */ 227 /* check next segment */
204 blkaddr = next_blkaddr_of_node(page); 228 blkaddr = next_blkaddr_of_node(page);
229 f2fs_put_page(page, 1);
205 } 230 }
206 231 f2fs_put_page(page, 1);
207 unlock_page(page);
208 __free_pages(page, 0);
209
210 return err; 232 return err;
211} 233}
212 234
@@ -279,16 +301,30 @@ got_it:
279 ino = ino_of_node(node_page); 301 ino = ino_of_node(node_page);
280 f2fs_put_page(node_page, 1); 302 f2fs_put_page(node_page, 1);
281 303
282 /* Deallocate previous index in the node page */ 304 if (ino != dn->inode->i_ino) {
283 inode = f2fs_iget(sbi->sb, ino); 305 /* Deallocate previous index in the node page */
284 if (IS_ERR(inode)) 306 inode = f2fs_iget(sbi->sb, ino);
285 return PTR_ERR(inode); 307 if (IS_ERR(inode))
308 return PTR_ERR(inode);
309 } else {
310 inode = dn->inode;
311 }
286 312
287 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
288 le16_to_cpu(sum.ofs_in_node); 314 le16_to_cpu(sum.ofs_in_node);
289 315
290 truncate_hole(inode, bidx, bidx + 1); 316 if (ino != dn->inode->i_ino) {
291 iput(inode); 317 truncate_hole(inode, bidx, bidx + 1);
318 iput(inode);
319 } else {
320 struct dnode_of_data tdn;
321 set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
322 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
323 return 0;
324 if (tdn.data_blkaddr != NULL_ADDR)
325 truncate_data_blocks_range(&tdn, 1);
326 f2fs_put_page(tdn.node_page, 1);
327 }
292 return 0; 328 return 0;
293} 329}
294 330
@@ -331,8 +367,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
331 f2fs_wait_on_page_writeback(dn.node_page, NODE); 367 f2fs_wait_on_page_writeback(dn.node_page, NODE);
332 368
333 get_node_info(sbi, dn.nid, &ni); 369 get_node_info(sbi, dn.nid, &ni);
334 f2fs_bug_on(ni.ino != ino_of_node(page)); 370 f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
335 f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); 371 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
336 372
337 for (; start < end; start++) { 373 for (; start < end; start++) {
338 block_t src, dest; 374 block_t src, dest;
@@ -344,7 +380,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
344 if (src == NULL_ADDR) { 380 if (src == NULL_ADDR) {
345 err = reserve_new_block(&dn); 381 err = reserve_new_block(&dn);
346 /* We should not get -ENOSPC */ 382 /* We should not get -ENOSPC */
347 f2fs_bug_on(err); 383 f2fs_bug_on(sbi, err);
348 } 384 }
349 385
350 /* Check the previous node page having this index */ 386 /* Check the previous node page having this index */
@@ -386,7 +422,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
386{ 422{
387 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 423 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
388 struct curseg_info *curseg; 424 struct curseg_info *curseg;
389 struct page *page; 425 struct page *page = NULL;
390 int err = 0; 426 int err = 0;
391 block_t blkaddr; 427 block_t blkaddr;
392 428
@@ -394,32 +430,41 @@ static int recover_data(struct f2fs_sb_info *sbi,
394 curseg = CURSEG_I(sbi, type); 430 curseg = CURSEG_I(sbi, type);
395 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 431 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
396 432
397 /* read node page */
398 page = alloc_page(GFP_F2FS_ZERO);
399 if (!page)
400 return -ENOMEM;
401
402 lock_page(page);
403
404 while (1) { 433 while (1) {
405 struct fsync_inode_entry *entry; 434 struct fsync_inode_entry *entry;
406 435
407 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 436 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
408 if (err) 437 break;
409 return err;
410 438
411 lock_page(page); 439 page = get_meta_page_ra(sbi, blkaddr);
412 440
413 if (cp_ver != cpver_of_node(page)) 441 if (cp_ver != cpver_of_node(page)) {
442 f2fs_put_page(page, 1);
414 break; 443 break;
444 }
415 445
416 entry = get_fsync_inode(head, ino_of_node(page)); 446 entry = get_fsync_inode(head, ino_of_node(page));
417 if (!entry) 447 if (!entry)
418 goto next; 448 goto next;
419 449 /*
450 * inode(x) | CP | inode(x) | dnode(F)
451 * In this case, we can lose the latest inode(x).
452 * So, call recover_inode for the inode update.
453 */
454 if (entry->last_inode == blkaddr)
455 recover_inode(entry->inode, page);
456 if (entry->last_dentry == blkaddr) {
457 err = recover_dentry(entry->inode, page);
458 if (err) {
459 f2fs_put_page(page, 1);
460 break;
461 }
462 }
420 err = do_recover_data(sbi, entry->inode, page, blkaddr); 463 err = do_recover_data(sbi, entry->inode, page, blkaddr);
421 if (err) 464 if (err) {
465 f2fs_put_page(page, 1);
422 break; 466 break;
467 }
423 468
424 if (entry->blkaddr == blkaddr) { 469 if (entry->blkaddr == blkaddr) {
425 iput(entry->inode); 470 iput(entry->inode);
@@ -429,11 +474,8 @@ static int recover_data(struct f2fs_sb_info *sbi,
429next: 474next:
430 /* check next segment */ 475 /* check next segment */
431 blkaddr = next_blkaddr_of_node(page); 476 blkaddr = next_blkaddr_of_node(page);
477 f2fs_put_page(page, 1);
432 } 478 }
433
434 unlock_page(page);
435 __free_pages(page, 0);
436
437 if (!err) 479 if (!err)
438 allocate_new_segments(sbi); 480 allocate_new_segments(sbi);
439 return err; 481 return err;
@@ -474,11 +516,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
474 /* step #2: recover data */ 516 /* step #2: recover data */
475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 517 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
476 if (!err) 518 if (!err)
477 f2fs_bug_on(!list_empty(&inode_list)); 519 f2fs_bug_on(sbi, !list_empty(&inode_list));
478out: 520out:
479 destroy_fsync_dnodes(&inode_list); 521 destroy_fsync_dnodes(&inode_list);
480 kmem_cache_destroy(fsync_entry_slab); 522 kmem_cache_destroy(fsync_entry_slab);
481 523
524 /* truncate meta pages to be used by the recovery */
525 truncate_inode_pages_range(META_MAPPING(sbi),
526 MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
527
482 if (err) { 528 if (err) {
483 truncate_inode_pages_final(NODE_MAPPING(sbi)); 529 truncate_inode_pages_final(NODE_MAPPING(sbi));
484 truncate_inode_pages_final(META_MAPPING(sbi)); 530 truncate_inode_pages_final(META_MAPPING(sbi));
@@ -494,8 +540,11 @@ out:
494 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 540 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
495 mutex_unlock(&sbi->cp_mutex); 541 mutex_unlock(&sbi->cp_mutex);
496 } else if (need_writecp) { 542 } else if (need_writecp) {
543 struct cp_control cpc = {
544 .reason = CP_SYNC,
545 };
497 mutex_unlock(&sbi->cp_mutex); 546 mutex_unlock(&sbi->cp_mutex);
498 write_checkpoint(sbi, false); 547 write_checkpoint(sbi, &cpc);
499 } else { 548 } else {
500 mutex_unlock(&sbi->cp_mutex); 549 mutex_unlock(&sbi->cp_mutex);
501 } 550 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0aa337cd5bba..923cb76fdc46 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -25,6 +25,8 @@
25#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
26 26
27static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *sit_entry_set_slab;
29static struct kmem_cache *inmem_entry_slab;
28 30
29/* 31/*
30 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 32 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -172,6 +174,60 @@ found_middle:
172 return result + __reverse_ffz(tmp); 174 return result + __reverse_ffz(tmp);
173} 175}
174 176
177void register_inmem_page(struct inode *inode, struct page *page)
178{
179 struct f2fs_inode_info *fi = F2FS_I(inode);
180 struct inmem_pages *new;
181
182 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
183
184 /* add atomic page indices to the list */
185 new->page = page;
186 INIT_LIST_HEAD(&new->list);
187
188 /* increase reference count with clean state */
189 mutex_lock(&fi->inmem_lock);
190 get_page(page);
191 list_add_tail(&new->list, &fi->inmem_pages);
192 mutex_unlock(&fi->inmem_lock);
193}
194
195void commit_inmem_pages(struct inode *inode, bool abort)
196{
197 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
198 struct f2fs_inode_info *fi = F2FS_I(inode);
199 struct inmem_pages *cur, *tmp;
200 bool submit_bio = false;
201 struct f2fs_io_info fio = {
202 .type = DATA,
203 .rw = WRITE_SYNC,
204 };
205
206 f2fs_balance_fs(sbi);
207 f2fs_lock_op(sbi);
208
209 mutex_lock(&fi->inmem_lock);
210 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
211 lock_page(cur->page);
212 if (!abort && cur->page->mapping == inode->i_mapping) {
213 f2fs_wait_on_page_writeback(cur->page, DATA);
214 if (clear_page_dirty_for_io(cur->page))
215 inode_dec_dirty_pages(inode);
216 do_write_data_page(cur->page, &fio);
217 submit_bio = true;
218 }
219 f2fs_put_page(cur->page, 1);
220 list_del(&cur->list);
221 kmem_cache_free(inmem_entry_slab, cur);
222 }
223 if (submit_bio)
224 f2fs_submit_merged_bio(sbi, DATA, WRITE);
225 mutex_unlock(&fi->inmem_lock);
226
227 filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
228 f2fs_unlock_op(sbi);
229}
230
175/* 231/*
176 * This function balances dirty node and dentry pages. 232 * This function balances dirty node and dentry pages.
177 * In addition, it controls garbage collection. 233 * In addition, it controls garbage collection.
@@ -205,24 +261,20 @@ repeat:
205 if (kthread_should_stop()) 261 if (kthread_should_stop())
206 return 0; 262 return 0;
207 263
208 spin_lock(&fcc->issue_lock); 264 if (!llist_empty(&fcc->issue_list)) {
209 if (fcc->issue_list) {
210 fcc->dispatch_list = fcc->issue_list;
211 fcc->issue_list = fcc->issue_tail = NULL;
212 }
213 spin_unlock(&fcc->issue_lock);
214
215 if (fcc->dispatch_list) {
216 struct bio *bio = bio_alloc(GFP_NOIO, 0); 265 struct bio *bio = bio_alloc(GFP_NOIO, 0);
217 struct flush_cmd *cmd, *next; 266 struct flush_cmd *cmd, *next;
218 int ret; 267 int ret;
219 268
269 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
270 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
271
220 bio->bi_bdev = sbi->sb->s_bdev; 272 bio->bi_bdev = sbi->sb->s_bdev;
221 ret = submit_bio_wait(WRITE_FLUSH, bio); 273 ret = submit_bio_wait(WRITE_FLUSH, bio);
222 274
223 for (cmd = fcc->dispatch_list; cmd; cmd = next) { 275 llist_for_each_entry_safe(cmd, next,
276 fcc->dispatch_list, llnode) {
224 cmd->ret = ret; 277 cmd->ret = ret;
225 next = cmd->next;
226 complete(&cmd->wait); 278 complete(&cmd->wait);
227 } 279 }
228 bio_put(bio); 280 bio_put(bio);
@@ -230,7 +282,7 @@ repeat:
230 } 282 }
231 283
232 wait_event_interruptible(*q, 284 wait_event_interruptible(*q,
233 kthread_should_stop() || fcc->issue_list); 285 kthread_should_stop() || !llist_empty(&fcc->issue_list));
234 goto repeat; 286 goto repeat;
235} 287}
236 288
@@ -249,15 +301,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 301 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
250 302
251 init_completion(&cmd.wait); 303 init_completion(&cmd.wait);
252 cmd.next = NULL;
253 304
254 spin_lock(&fcc->issue_lock); 305 llist_add(&cmd.llnode, &fcc->issue_list);
255 if (fcc->issue_list)
256 fcc->issue_tail->next = &cmd;
257 else
258 fcc->issue_list = &cmd;
259 fcc->issue_tail = &cmd;
260 spin_unlock(&fcc->issue_lock);
261 306
262 if (!fcc->dispatch_list) 307 if (!fcc->dispatch_list)
263 wake_up(&fcc->flush_wait_queue); 308 wake_up(&fcc->flush_wait_queue);
@@ -276,8 +321,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
276 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 321 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
277 if (!fcc) 322 if (!fcc)
278 return -ENOMEM; 323 return -ENOMEM;
279 spin_lock_init(&fcc->issue_lock);
280 init_waitqueue_head(&fcc->flush_wait_queue); 324 init_waitqueue_head(&fcc->flush_wait_queue);
325 init_llist_head(&fcc->issue_list);
281 SM_I(sbi)->cmd_control_info = fcc; 326 SM_I(sbi)->cmd_control_info = fcc;
282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 327 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 328 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
@@ -317,6 +362,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
317 struct seg_entry *sentry = get_seg_entry(sbi, segno); 362 struct seg_entry *sentry = get_seg_entry(sbi, segno);
318 enum dirty_type t = sentry->type; 363 enum dirty_type t = sentry->type;
319 364
365 if (unlikely(t >= DIRTY)) {
366 f2fs_bug_on(sbi, 1);
367 return;
368 }
320 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 369 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
321 dirty_i->nr_dirty[t]++; 370 dirty_i->nr_dirty[t]++;
322 } 371 }
@@ -376,8 +425,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
376static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 425static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
377 block_t blkstart, block_t blklen) 426 block_t blkstart, block_t blklen)
378{ 427{
379 sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); 428 sector_t start = SECTOR_FROM_BLOCK(blkstart);
380 sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); 429 sector_t len = SECTOR_FROM_BLOCK(blklen);
381 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 430 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 431 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
383} 432}
@@ -392,22 +441,48 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
392 } 441 }
393} 442}
394 443
395static void add_discard_addrs(struct f2fs_sb_info *sbi, 444static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
396 unsigned int segno, struct seg_entry *se)
397{ 445{
398 struct list_head *head = &SM_I(sbi)->discard_list; 446 struct list_head *head = &SM_I(sbi)->discard_list;
399 struct discard_entry *new; 447 struct discard_entry *new;
400 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 448 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
401 int max_blocks = sbi->blocks_per_seg; 449 int max_blocks = sbi->blocks_per_seg;
450 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
402 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 451 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
403 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 452 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
404 unsigned long dmap[entries]; 453 unsigned long dmap[entries];
405 unsigned int start = 0, end = -1; 454 unsigned int start = 0, end = -1;
455 bool force = (cpc->reason == CP_DISCARD);
406 int i; 456 int i;
407 457
408 if (!test_opt(sbi, DISCARD)) 458 if (!force && !test_opt(sbi, DISCARD))
409 return; 459 return;
410 460
461 if (force && !se->valid_blocks) {
462 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
463 /*
464 * if this segment is registered in the prefree list, then
465 * we should skip adding a discard candidate, and let the
466 * checkpoint do that later.
467 */
468 mutex_lock(&dirty_i->seglist_lock);
469 if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
470 mutex_unlock(&dirty_i->seglist_lock);
471 cpc->trimmed += sbi->blocks_per_seg;
472 return;
473 }
474 mutex_unlock(&dirty_i->seglist_lock);
475
476 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
477 INIT_LIST_HEAD(&new->list);
478 new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
479 new->len = sbi->blocks_per_seg;
480 list_add_tail(&new->list, head);
481 SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
482 cpc->trimmed += sbi->blocks_per_seg;
483 return;
484 }
485
411 /* zero block will be discarded through the prefree list */ 486 /* zero block will be discarded through the prefree list */
412 if (!se->valid_blocks || se->valid_blocks == max_blocks) 487 if (!se->valid_blocks || se->valid_blocks == max_blocks)
413 return; 488 return;
@@ -416,23 +491,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
416 for (i = 0; i < entries; i++) 491 for (i = 0; i < entries; i++)
417 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 492 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
418 493
419 while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 494 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
420 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 495 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
421 if (start >= max_blocks) 496 if (start >= max_blocks)
422 break; 497 break;
423 498
424 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 499 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
425 500
501 if (end - start < cpc->trim_minlen)
502 continue;
503
426 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 504 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
427 INIT_LIST_HEAD(&new->list); 505 INIT_LIST_HEAD(&new->list);
428 new->blkaddr = START_BLOCK(sbi, segno) + start; 506 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
429 new->len = end - start; 507 new->len = end - start;
508 cpc->trimmed += end - start;
430 509
431 list_add_tail(&new->list, head); 510 list_add_tail(&new->list, head);
432 SM_I(sbi)->nr_discards += end - start; 511 SM_I(sbi)->nr_discards += end - start;
433 } 512 }
434} 513}
435 514
515void release_discard_addrs(struct f2fs_sb_info *sbi)
516{
517 struct list_head *head = &(SM_I(sbi)->discard_list);
518 struct discard_entry *entry, *this;
519
520 /* drop caches */
521 list_for_each_entry_safe(entry, this, head, list) {
522 list_del(&entry->list);
523 kmem_cache_free(discard_entry_slab, entry);
524 }
525}
526
436/* 527/*
437 * Should call clear_prefree_segments after checkpoint is done. 528 * Should call clear_prefree_segments after checkpoint is done.
438 */ 529 */
@@ -440,10 +531,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
440{ 531{
441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 532 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
442 unsigned int segno; 533 unsigned int segno;
443 unsigned int total_segs = TOTAL_SEGS(sbi);
444 534
445 mutex_lock(&dirty_i->seglist_lock); 535 mutex_lock(&dirty_i->seglist_lock);
446 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) 536 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
447 __set_test_and_free(sbi, segno); 537 __set_test_and_free(sbi, segno);
448 mutex_unlock(&dirty_i->seglist_lock); 538 mutex_unlock(&dirty_i->seglist_lock);
449} 539}
@@ -454,17 +544,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
454 struct discard_entry *entry, *this; 544 struct discard_entry *entry, *this;
455 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 545 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
456 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 546 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
457 unsigned int total_segs = TOTAL_SEGS(sbi);
458 unsigned int start = 0, end = -1; 547 unsigned int start = 0, end = -1;
459 548
460 mutex_lock(&dirty_i->seglist_lock); 549 mutex_lock(&dirty_i->seglist_lock);
461 550
462 while (1) { 551 while (1) {
463 int i; 552 int i;
464 start = find_next_bit(prefree_map, total_segs, end + 1); 553 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
465 if (start >= total_segs) 554 if (start >= MAIN_SEGS(sbi))
466 break; 555 break;
467 end = find_next_zero_bit(prefree_map, total_segs, start + 1); 556 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
557 start + 1);
468 558
469 for (i = start; i < end; i++) 559 for (i = start; i < end; i++)
470 clear_bit(i, prefree_map); 560 clear_bit(i, prefree_map);
@@ -488,11 +578,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
488 } 578 }
489} 579}
490 580
491static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 581static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
492{ 582{
493 struct sit_info *sit_i = SIT_I(sbi); 583 struct sit_info *sit_i = SIT_I(sbi);
494 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) 584
585 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
495 sit_i->dirty_sentries++; 586 sit_i->dirty_sentries++;
587 return false;
588 }
589
590 return true;
496} 591}
497 592
498static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 593static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
@@ -516,7 +611,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
516 new_vblocks = se->valid_blocks + del; 611 new_vblocks = se->valid_blocks + del;
517 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 612 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
518 613
519 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 614 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
520 (new_vblocks > sbi->blocks_per_seg))); 615 (new_vblocks > sbi->blocks_per_seg)));
521 616
522 se->valid_blocks = new_vblocks; 617 se->valid_blocks = new_vblocks;
@@ -526,10 +621,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
526 /* Update valid block bitmap */ 621 /* Update valid block bitmap */
527 if (del > 0) { 622 if (del > 0) {
528 if (f2fs_set_bit(offset, se->cur_valid_map)) 623 if (f2fs_set_bit(offset, se->cur_valid_map))
529 BUG(); 624 f2fs_bug_on(sbi, 1);
530 } else { 625 } else {
531 if (!f2fs_clear_bit(offset, se->cur_valid_map)) 626 if (!f2fs_clear_bit(offset, se->cur_valid_map))
532 BUG(); 627 f2fs_bug_on(sbi, 1);
533 } 628 }
534 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 629 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
535 se->ckpt_valid_blocks += del; 630 se->ckpt_valid_blocks += del;
@@ -558,7 +653,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
558 unsigned int segno = GET_SEGNO(sbi, addr); 653 unsigned int segno = GET_SEGNO(sbi, addr);
559 struct sit_info *sit_i = SIT_I(sbi); 654 struct sit_info *sit_i = SIT_I(sbi);
560 655
561 f2fs_bug_on(addr == NULL_ADDR); 656 f2fs_bug_on(sbi, addr == NULL_ADDR);
562 if (addr == NEW_ADDR) 657 if (addr == NEW_ADDR)
563 return; 658 return;
564 659
@@ -634,7 +729,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
634 unsigned int segno = curseg->segno + 1; 729 unsigned int segno = curseg->segno + 1;
635 struct free_segmap_info *free_i = FREE_I(sbi); 730 struct free_segmap_info *free_i = FREE_I(sbi);
636 731
637 if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) 732 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
638 return !test_bit(segno, free_i->free_segmap); 733 return !test_bit(segno, free_i->free_segmap);
639 return 0; 734 return 0;
640} 735}
@@ -648,7 +743,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
648{ 743{
649 struct free_segmap_info *free_i = FREE_I(sbi); 744 struct free_segmap_info *free_i = FREE_I(sbi);
650 unsigned int segno, secno, zoneno; 745 unsigned int segno, secno, zoneno;
651 unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; 746 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
652 unsigned int hint = *newseg / sbi->segs_per_sec; 747 unsigned int hint = *newseg / sbi->segs_per_sec;
653 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 748 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
654 unsigned int left_start = hint; 749 unsigned int left_start = hint;
@@ -660,18 +755,18 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
660 755
661 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 756 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
662 segno = find_next_zero_bit(free_i->free_segmap, 757 segno = find_next_zero_bit(free_i->free_segmap,
663 TOTAL_SEGS(sbi), *newseg + 1); 758 MAIN_SEGS(sbi), *newseg + 1);
664 if (segno - *newseg < sbi->segs_per_sec - 759 if (segno - *newseg < sbi->segs_per_sec -
665 (*newseg % sbi->segs_per_sec)) 760 (*newseg % sbi->segs_per_sec))
666 goto got_it; 761 goto got_it;
667 } 762 }
668find_other_zone: 763find_other_zone:
669 secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); 764 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
670 if (secno >= TOTAL_SECS(sbi)) { 765 if (secno >= MAIN_SECS(sbi)) {
671 if (dir == ALLOC_RIGHT) { 766 if (dir == ALLOC_RIGHT) {
672 secno = find_next_zero_bit(free_i->free_secmap, 767 secno = find_next_zero_bit(free_i->free_secmap,
673 TOTAL_SECS(sbi), 0); 768 MAIN_SECS(sbi), 0);
674 f2fs_bug_on(secno >= TOTAL_SECS(sbi)); 769 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
675 } else { 770 } else {
676 go_left = 1; 771 go_left = 1;
677 left_start = hint - 1; 772 left_start = hint - 1;
@@ -686,8 +781,8 @@ find_other_zone:
686 continue; 781 continue;
687 } 782 }
688 left_start = find_next_zero_bit(free_i->free_secmap, 783 left_start = find_next_zero_bit(free_i->free_secmap,
689 TOTAL_SECS(sbi), 0); 784 MAIN_SECS(sbi), 0);
690 f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); 785 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
691 break; 786 break;
692 } 787 }
693 secno = left_start; 788 secno = left_start;
@@ -726,7 +821,7 @@ skip_left:
726 } 821 }
727got_it: 822got_it:
728 /* set it as dirty segment in free segmap */ 823 /* set it as dirty segment in free segmap */
729 f2fs_bug_on(test_bit(segno, free_i->free_segmap)); 824 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
730 __set_inuse(sbi, segno); 825 __set_inuse(sbi, segno);
731 *newseg = segno; 826 *newseg = segno;
732 write_unlock(&free_i->segmap_lock); 827 write_unlock(&free_i->segmap_lock);
@@ -898,6 +993,37 @@ static const struct segment_allocation default_salloc_ops = {
898 .allocate_segment = allocate_segment_by_default, 993 .allocate_segment = allocate_segment_by_default,
899}; 994};
900 995
996int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
997{
998 __u64 start = range->start >> sbi->log_blocksize;
999 __u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1000 unsigned int start_segno, end_segno;
1001 struct cp_control cpc;
1002
1003 if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1004 range->len < sbi->blocksize)
1005 return -EINVAL;
1006
1007 if (end <= MAIN_BLKADDR(sbi))
1008 goto out;
1009
1010 /* start/end segment number in main_area */
1011 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1012 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1013 GET_SEGNO(sbi, end);
1014 cpc.reason = CP_DISCARD;
1015 cpc.trim_start = start_segno;
1016 cpc.trim_end = end_segno;
1017 cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1018 cpc.trimmed = 0;
1019
1020 /* do checkpoint to issue discard commands safely */
1021 write_checkpoint(sbi, &cpc);
1022out:
1023 range->len = cpc.trimmed << sbi->log_blocksize;
1024 return 0;
1025}
1026
901static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 1027static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
902{ 1028{
903 struct curseg_info *curseg = CURSEG_I(sbi, type); 1029 struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -953,15 +1079,15 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
953 1079
954static int __get_segment_type(struct page *page, enum page_type p_type) 1080static int __get_segment_type(struct page *page, enum page_type p_type)
955{ 1081{
956 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1082 switch (F2FS_P_SB(page)->active_logs) {
957 switch (sbi->active_logs) {
958 case 2: 1083 case 2:
959 return __get_segment_type_2(page, p_type); 1084 return __get_segment_type_2(page, p_type);
960 case 4: 1085 case 4:
961 return __get_segment_type_4(page, p_type); 1086 return __get_segment_type_4(page, p_type);
962 } 1087 }
963 /* NR_CURSEG_TYPE(6) logs by default */ 1088 /* NR_CURSEG_TYPE(6) logs by default */
964 f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); 1089 f2fs_bug_on(F2FS_P_SB(page),
1090 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
965 return __get_segment_type_6(page, p_type); 1091 return __get_segment_type_6(page, p_type);
966} 1092}
967 1093
@@ -1041,11 +1167,11 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1041void write_data_page(struct page *page, struct dnode_of_data *dn, 1167void write_data_page(struct page *page, struct dnode_of_data *dn,
1042 block_t *new_blkaddr, struct f2fs_io_info *fio) 1168 block_t *new_blkaddr, struct f2fs_io_info *fio)
1043{ 1169{
1044 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 1170 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1045 struct f2fs_summary sum; 1171 struct f2fs_summary sum;
1046 struct node_info ni; 1172 struct node_info ni;
1047 1173
1048 f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); 1174 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1049 get_node_info(sbi, dn->nid, &ni); 1175 get_node_info(sbi, dn->nid, &ni);
1050 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1176 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1051 1177
@@ -1055,9 +1181,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn,
1055void rewrite_data_page(struct page *page, block_t old_blkaddr, 1181void rewrite_data_page(struct page *page, block_t old_blkaddr,
1056 struct f2fs_io_info *fio) 1182 struct f2fs_io_info *fio)
1057{ 1183{
1058 struct inode *inode = page->mapping->host; 1184 f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1059 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1060 f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
1061} 1185}
1062 1186
1063void recover_data_page(struct f2fs_sb_info *sbi, 1187void recover_data_page(struct f2fs_sb_info *sbi,
@@ -1130,8 +1254,9 @@ out:
1130void f2fs_wait_on_page_writeback(struct page *page, 1254void f2fs_wait_on_page_writeback(struct page *page,
1131 enum page_type type) 1255 enum page_type type)
1132{ 1256{
1133 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1134 if (PageWriteback(page)) { 1257 if (PageWriteback(page)) {
1258 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1259
1135 if (is_merged_page(sbi, page, type)) 1260 if (is_merged_page(sbi, page, type))
1136 f2fs_submit_merged_bio(sbi, type, WRITE); 1261 f2fs_submit_merged_bio(sbi, type, WRITE);
1137 wait_on_page_writeback(page); 1262 wait_on_page_writeback(page);
@@ -1400,7 +1525,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1400 unsigned int segno) 1525 unsigned int segno)
1401{ 1526{
1402 struct sit_info *sit_i = SIT_I(sbi); 1527 struct sit_info *sit_i = SIT_I(sbi);
1403 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); 1528 unsigned int offset = SIT_BLOCK_OFFSET(segno);
1404 block_t blk_addr = sit_i->sit_base_addr + offset; 1529 block_t blk_addr = sit_i->sit_base_addr + offset;
1405 1530
1406 check_seg_range(sbi, segno); 1531 check_seg_range(sbi, segno);
@@ -1426,7 +1551,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1426 /* get current sit block page without lock */ 1551 /* get current sit block page without lock */
1427 src_page = get_meta_page(sbi, src_off); 1552 src_page = get_meta_page(sbi, src_off);
1428 dst_page = grab_meta_page(sbi, dst_off); 1553 dst_page = grab_meta_page(sbi, dst_off);
1429 f2fs_bug_on(PageDirty(src_page)); 1554 f2fs_bug_on(sbi, PageDirty(src_page));
1430 1555
1431 src_addr = page_address(src_page); 1556 src_addr = page_address(src_page);
1432 dst_addr = page_address(dst_page); 1557 dst_addr = page_address(dst_page);
@@ -1440,101 +1565,192 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1440 return dst_page; 1565 return dst_page;
1441} 1566}
1442 1567
1443static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) 1568static struct sit_entry_set *grab_sit_entry_set(void)
1569{
1570 struct sit_entry_set *ses =
1571 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1572
1573 ses->entry_cnt = 0;
1574 INIT_LIST_HEAD(&ses->set_list);
1575 return ses;
1576}
1577
1578static void release_sit_entry_set(struct sit_entry_set *ses)
1579{
1580 list_del(&ses->set_list);
1581 kmem_cache_free(sit_entry_set_slab, ses);
1582}
1583
1584static void adjust_sit_entry_set(struct sit_entry_set *ses,
1585 struct list_head *head)
1586{
1587 struct sit_entry_set *next = ses;
1588
1589 if (list_is_last(&ses->set_list, head))
1590 return;
1591
1592 list_for_each_entry_continue(next, head, set_list)
1593 if (ses->entry_cnt <= next->entry_cnt)
1594 break;
1595
1596 list_move_tail(&ses->set_list, &next->set_list);
1597}
1598
1599static void add_sit_entry(unsigned int segno, struct list_head *head)
1600{
1601 struct sit_entry_set *ses;
1602 unsigned int start_segno = START_SEGNO(segno);
1603
1604 list_for_each_entry(ses, head, set_list) {
1605 if (ses->start_segno == start_segno) {
1606 ses->entry_cnt++;
1607 adjust_sit_entry_set(ses, head);
1608 return;
1609 }
1610 }
1611
1612 ses = grab_sit_entry_set();
1613
1614 ses->start_segno = start_segno;
1615 ses->entry_cnt++;
1616 list_add(&ses->set_list, head);
1617}
1618
1619static void add_sits_in_set(struct f2fs_sb_info *sbi)
1620{
1621 struct f2fs_sm_info *sm_info = SM_I(sbi);
1622 struct list_head *set_list = &sm_info->sit_entry_set;
1623 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1624 unsigned int segno;
1625
1626 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1627 add_sit_entry(segno, set_list);
1628}
1629
1630static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1444{ 1631{
1445 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1632 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1446 struct f2fs_summary_block *sum = curseg->sum_blk; 1633 struct f2fs_summary_block *sum = curseg->sum_blk;
1447 int i; 1634 int i;
1448 1635
1449 /* 1636 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1450 * If the journal area in the current summary is full of sit entries, 1637 unsigned int segno;
1451 * all the sit entries will be flushed. Otherwise the sit entries 1638 bool dirtied;
1452 * are not able to replace with newly hot sit entries. 1639
1453 */ 1640 segno = le32_to_cpu(segno_in_journal(sum, i));
1454 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { 1641 dirtied = __mark_sit_entry_dirty(sbi, segno);
1455 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1642
1456 unsigned int segno; 1643 if (!dirtied)
1457 segno = le32_to_cpu(segno_in_journal(sum, i)); 1644 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1458 __mark_sit_entry_dirty(sbi, segno);
1459 }
1460 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1461 return true;
1462 } 1645 }
1463 return false; 1646 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1464} 1647}
1465 1648
1466/* 1649/*
1467 * CP calls this function, which flushes SIT entries including sit_journal, 1650 * CP calls this function, which flushes SIT entries including sit_journal,
1468 * and moves prefree segs to free segs. 1651 * and moves prefree segs to free segs.
1469 */ 1652 */
1470void flush_sit_entries(struct f2fs_sb_info *sbi) 1653void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1471{ 1654{
1472 struct sit_info *sit_i = SIT_I(sbi); 1655 struct sit_info *sit_i = SIT_I(sbi);
1473 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 1656 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1474 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1657 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1475 struct f2fs_summary_block *sum = curseg->sum_blk; 1658 struct f2fs_summary_block *sum = curseg->sum_blk;
1476 unsigned long nsegs = TOTAL_SEGS(sbi); 1659 struct sit_entry_set *ses, *tmp;
1477 struct page *page = NULL; 1660 struct list_head *head = &SM_I(sbi)->sit_entry_set;
1478 struct f2fs_sit_block *raw_sit = NULL; 1661 bool to_journal = true;
1479 unsigned int start = 0, end = 0; 1662 struct seg_entry *se;
1480 unsigned int segno;
1481 bool flushed;
1482 1663
1483 mutex_lock(&curseg->curseg_mutex); 1664 mutex_lock(&curseg->curseg_mutex);
1484 mutex_lock(&sit_i->sentry_lock); 1665 mutex_lock(&sit_i->sentry_lock);
1485 1666
1486 /* 1667 /*
1487 * "flushed" indicates whether sit entries in journal are flushed 1668 * add and account sit entries of dirty bitmap in sit entry
1488 * to the SIT area or not. 1669 * set temporarily
1489 */ 1670 */
1490 flushed = flush_sits_in_journal(sbi); 1671 add_sits_in_set(sbi);
1491 1672
1492 for_each_set_bit(segno, bitmap, nsegs) { 1673 /*
1493 struct seg_entry *se = get_seg_entry(sbi, segno); 1674 * if there are no enough space in journal to store dirty sit
1494 int sit_offset, offset; 1675 * entries, remove all entries from journal and add and account
1676 * them in sit entry set.
1677 */
1678 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1679 remove_sits_in_journal(sbi);
1495 1680
1496 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1681 if (!sit_i->dirty_sentries)
1682 goto out;
1497 1683
1498 /* add discard candidates */ 1684 /*
1499 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) 1685 * there are two steps to flush sit entries:
1500 add_discard_addrs(sbi, segno, se); 1686 * #1, flush sit entries to journal in current cold data summary block.
1687 * #2, flush sit entries to sit page.
1688 */
1689 list_for_each_entry_safe(ses, tmp, head, set_list) {
1690 struct page *page;
1691 struct f2fs_sit_block *raw_sit = NULL;
1692 unsigned int start_segno = ses->start_segno;
1693 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1694 (unsigned long)MAIN_SEGS(sbi));
1695 unsigned int segno = start_segno;
1696
1697 if (to_journal &&
1698 !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1699 to_journal = false;
1700
1701 if (!to_journal) {
1702 page = get_next_sit_page(sbi, start_segno);
1703 raw_sit = page_address(page);
1704 }
1501 1705
1502 if (flushed) 1706 /* flush dirty sit entries in region of current sit set */
1503 goto to_sit_page; 1707 for_each_set_bit_from(segno, bitmap, end) {
1708 int offset, sit_offset;
1504 1709
1505 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); 1710 se = get_seg_entry(sbi, segno);
1506 if (offset >= 0) { 1711
1507 segno_in_journal(sum, offset) = cpu_to_le32(segno); 1712 /* add discard candidates */
1508 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); 1713 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1509 goto flush_done; 1714 cpc->trim_start = segno;
1510 } 1715 add_discard_addrs(sbi, cpc);
1511to_sit_page:
1512 if (!page || (start > segno) || (segno > end)) {
1513 if (page) {
1514 f2fs_put_page(page, 1);
1515 page = NULL;
1516 } 1716 }
1517 1717
1518 start = START_SEGNO(sit_i, segno); 1718 if (to_journal) {
1519 end = start + SIT_ENTRY_PER_BLOCK - 1; 1719 offset = lookup_journal_in_cursum(sum,
1720 SIT_JOURNAL, segno, 1);
1721 f2fs_bug_on(sbi, offset < 0);
1722 segno_in_journal(sum, offset) =
1723 cpu_to_le32(segno);
1724 seg_info_to_raw_sit(se,
1725 &sit_in_journal(sum, offset));
1726 } else {
1727 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1728 seg_info_to_raw_sit(se,
1729 &raw_sit->entries[sit_offset]);
1730 }
1520 1731
1521 /* read sit block that will be updated */ 1732 __clear_bit(segno, bitmap);
1522 page = get_next_sit_page(sbi, start); 1733 sit_i->dirty_sentries--;
1523 raw_sit = page_address(page); 1734 ses->entry_cnt--;
1524 } 1735 }
1525 1736
1526 /* udpate entry in SIT block */ 1737 if (!to_journal)
1527 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); 1738 f2fs_put_page(page, 1);
1528flush_done: 1739
1529 __clear_bit(segno, bitmap); 1740 f2fs_bug_on(sbi, ses->entry_cnt);
1530 sit_i->dirty_sentries--; 1741 release_sit_entry_set(ses);
1742 }
1743
1744 f2fs_bug_on(sbi, !list_empty(head));
1745 f2fs_bug_on(sbi, sit_i->dirty_sentries);
1746out:
1747 if (cpc->reason == CP_DISCARD) {
1748 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1749 add_discard_addrs(sbi, cpc);
1531 } 1750 }
1532 mutex_unlock(&sit_i->sentry_lock); 1751 mutex_unlock(&sit_i->sentry_lock);
1533 mutex_unlock(&curseg->curseg_mutex); 1752 mutex_unlock(&curseg->curseg_mutex);
1534 1753
1535 /* writeout last modified SIT block */
1536 f2fs_put_page(page, 1);
1537
1538 set_prefree_as_free_segments(sbi); 1754 set_prefree_as_free_segments(sbi);
1539} 1755}
1540 1756
@@ -1554,16 +1770,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1554 1770
1555 SM_I(sbi)->sit_info = sit_i; 1771 SM_I(sbi)->sit_info = sit_i;
1556 1772
1557 sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); 1773 sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1558 if (!sit_i->sentries) 1774 if (!sit_i->sentries)
1559 return -ENOMEM; 1775 return -ENOMEM;
1560 1776
1561 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1777 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1562 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1778 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1563 if (!sit_i->dirty_sentries_bitmap) 1779 if (!sit_i->dirty_sentries_bitmap)
1564 return -ENOMEM; 1780 return -ENOMEM;
1565 1781
1566 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1782 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1567 sit_i->sentries[start].cur_valid_map 1783 sit_i->sentries[start].cur_valid_map
1568 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 1784 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1569 sit_i->sentries[start].ckpt_valid_map 1785 sit_i->sentries[start].ckpt_valid_map
@@ -1574,7 +1790,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1574 } 1790 }
1575 1791
1576 if (sbi->segs_per_sec > 1) { 1792 if (sbi->segs_per_sec > 1) {
1577 sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * 1793 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1578 sizeof(struct sec_entry)); 1794 sizeof(struct sec_entry));
1579 if (!sit_i->sec_entries) 1795 if (!sit_i->sec_entries)
1580 return -ENOMEM; 1796 return -ENOMEM;
@@ -1609,7 +1825,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1609 1825
1610static int build_free_segmap(struct f2fs_sb_info *sbi) 1826static int build_free_segmap(struct f2fs_sb_info *sbi)
1611{ 1827{
1612 struct f2fs_sm_info *sm_info = SM_I(sbi);
1613 struct free_segmap_info *free_i; 1828 struct free_segmap_info *free_i;
1614 unsigned int bitmap_size, sec_bitmap_size; 1829 unsigned int bitmap_size, sec_bitmap_size;
1615 1830
@@ -1620,12 +1835,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1620 1835
1621 SM_I(sbi)->free_info = free_i; 1836 SM_I(sbi)->free_info = free_i;
1622 1837
1623 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1838 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1624 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); 1839 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1625 if (!free_i->free_segmap) 1840 if (!free_i->free_segmap)
1626 return -ENOMEM; 1841 return -ENOMEM;
1627 1842
1628 sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1843 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1629 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); 1844 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1630 if (!free_i->free_secmap) 1845 if (!free_i->free_secmap)
1631 return -ENOMEM; 1846 return -ENOMEM;
@@ -1635,8 +1850,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1635 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 1850 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1636 1851
1637 /* init free segmap information */ 1852 /* init free segmap information */
1638 free_i->start_segno = 1853 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1639 (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1640 free_i->free_segments = 0; 1854 free_i->free_segments = 0;
1641 free_i->free_sections = 0; 1855 free_i->free_sections = 0;
1642 rwlock_init(&free_i->segmap_lock); 1856 rwlock_init(&free_i->segmap_lock);
@@ -1673,7 +1887,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1673 int sit_blk_cnt = SIT_BLK_CNT(sbi); 1887 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1674 unsigned int i, start, end; 1888 unsigned int i, start, end;
1675 unsigned int readed, start_blk = 0; 1889 unsigned int readed, start_blk = 0;
1676 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1890 int nrpages = MAX_BIO_BLOCKS(sbi);
1677 1891
1678 do { 1892 do {
1679 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); 1893 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
@@ -1681,7 +1895,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1681 start = start_blk * sit_i->sents_per_block; 1895 start = start_blk * sit_i->sents_per_block;
1682 end = (start_blk + readed) * sit_i->sents_per_block; 1896 end = (start_blk + readed) * sit_i->sents_per_block;
1683 1897
1684 for (; start < end && start < TOTAL_SEGS(sbi); start++) { 1898 for (; start < end && start < MAIN_SEGS(sbi); start++) {
1685 struct seg_entry *se = &sit_i->sentries[start]; 1899 struct seg_entry *se = &sit_i->sentries[start];
1686 struct f2fs_sit_block *sit_blk; 1900 struct f2fs_sit_block *sit_blk;
1687 struct f2fs_sit_entry sit; 1901 struct f2fs_sit_entry sit;
@@ -1719,7 +1933,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
1719 unsigned int start; 1933 unsigned int start;
1720 int type; 1934 int type;
1721 1935
1722 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1936 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1723 struct seg_entry *sentry = get_seg_entry(sbi, start); 1937 struct seg_entry *sentry = get_seg_entry(sbi, start);
1724 if (!sentry->valid_blocks) 1938 if (!sentry->valid_blocks)
1725 __set_free(sbi, start); 1939 __set_free(sbi, start);
@@ -1736,18 +1950,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1736{ 1950{
1737 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1951 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1738 struct free_segmap_info *free_i = FREE_I(sbi); 1952 struct free_segmap_info *free_i = FREE_I(sbi);
1739 unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); 1953 unsigned int segno = 0, offset = 0;
1740 unsigned short valid_blocks; 1954 unsigned short valid_blocks;
1741 1955
1742 while (1) { 1956 while (1) {
1743 /* find dirty segment based on free segmap */ 1957 /* find dirty segment based on free segmap */
1744 segno = find_next_inuse(free_i, total_segs, offset); 1958 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1745 if (segno >= total_segs) 1959 if (segno >= MAIN_SEGS(sbi))
1746 break; 1960 break;
1747 offset = segno + 1; 1961 offset = segno + 1;
1748 valid_blocks = get_valid_blocks(sbi, segno, 0); 1962 valid_blocks = get_valid_blocks(sbi, segno, 0);
1749 if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) 1963 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
1964 continue;
1965 if (valid_blocks > sbi->blocks_per_seg) {
1966 f2fs_bug_on(sbi, 1);
1750 continue; 1967 continue;
1968 }
1751 mutex_lock(&dirty_i->seglist_lock); 1969 mutex_lock(&dirty_i->seglist_lock);
1752 __locate_dirty_segment(sbi, segno, DIRTY); 1970 __locate_dirty_segment(sbi, segno, DIRTY);
1753 mutex_unlock(&dirty_i->seglist_lock); 1971 mutex_unlock(&dirty_i->seglist_lock);
@@ -1757,7 +1975,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1757static int init_victim_secmap(struct f2fs_sb_info *sbi) 1975static int init_victim_secmap(struct f2fs_sb_info *sbi)
1758{ 1976{
1759 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1977 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1760 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1978 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1761 1979
1762 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); 1980 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
1763 if (!dirty_i->victim_secmap) 1981 if (!dirty_i->victim_secmap)
@@ -1778,7 +1996,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1778 SM_I(sbi)->dirty_info = dirty_i; 1996 SM_I(sbi)->dirty_info = dirty_i;
1779 mutex_init(&dirty_i->seglist_lock); 1997 mutex_init(&dirty_i->seglist_lock);
1780 1998
1781 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1999 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1782 2000
1783 for (i = 0; i < NR_DIRTY_TYPE; i++) { 2001 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1784 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); 2002 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
@@ -1802,7 +2020,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1802 2020
1803 sit_i->min_mtime = LLONG_MAX; 2021 sit_i->min_mtime = LLONG_MAX;
1804 2022
1805 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 2023 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
1806 unsigned int i; 2024 unsigned int i;
1807 unsigned long long mtime = 0; 2025 unsigned long long mtime = 0;
1808 2026
@@ -1840,13 +2058,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1840 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 2058 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1841 sm_info->rec_prefree_segments = sm_info->main_segments * 2059 sm_info->rec_prefree_segments = sm_info->main_segments *
1842 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 2060 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
1843 sm_info->ipu_policy = F2FS_IPU_DISABLE; 2061 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
1844 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 2062 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2063 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
1845 2064
1846 INIT_LIST_HEAD(&sm_info->discard_list); 2065 INIT_LIST_HEAD(&sm_info->discard_list);
1847 sm_info->nr_discards = 0; 2066 sm_info->nr_discards = 0;
1848 sm_info->max_discards = 0; 2067 sm_info->max_discards = 0;
1849 2068
2069 INIT_LIST_HEAD(&sm_info->sit_entry_set);
2070
1850 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 2071 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
1851 err = create_flush_cmd_control(sbi); 2072 err = create_flush_cmd_control(sbi);
1852 if (err) 2073 if (err)
@@ -1942,7 +2163,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
1942 return; 2163 return;
1943 2164
1944 if (sit_i->sentries) { 2165 if (sit_i->sentries) {
1945 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 2166 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1946 kfree(sit_i->sentries[start].cur_valid_map); 2167 kfree(sit_i->sentries[start].cur_valid_map);
1947 kfree(sit_i->sentries[start].ckpt_valid_map); 2168 kfree(sit_i->sentries[start].ckpt_valid_map);
1948 } 2169 }
@@ -1976,11 +2197,30 @@ int __init create_segment_manager_caches(void)
1976 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 2197 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1977 sizeof(struct discard_entry)); 2198 sizeof(struct discard_entry));
1978 if (!discard_entry_slab) 2199 if (!discard_entry_slab)
1979 return -ENOMEM; 2200 goto fail;
2201
2202 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2203 sizeof(struct nat_entry_set));
2204 if (!sit_entry_set_slab)
2205 goto destory_discard_entry;
2206
2207 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2208 sizeof(struct inmem_pages));
2209 if (!inmem_entry_slab)
2210 goto destroy_sit_entry_set;
1980 return 0; 2211 return 0;
2212
2213destroy_sit_entry_set:
2214 kmem_cache_destroy(sit_entry_set_slab);
2215destory_discard_entry:
2216 kmem_cache_destroy(discard_entry_slab);
2217fail:
2218 return -ENOMEM;
1981} 2219}
1982 2220
1983void destroy_segment_manager_caches(void) 2221void destroy_segment_manager_caches(void)
1984{ 2222{
2223 kmem_cache_destroy(sit_entry_set_slab);
1985 kmem_cache_destroy(discard_entry_slab); 2224 kmem_cache_destroy(discard_entry_slab);
2225 kmem_cache_destroy(inmem_entry_slab);
1986} 2226}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ff483257283b..2495bec1c621 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -45,16 +45,26 @@
45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ 45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
46 sbi->segs_per_sec)) \ 46 sbi->segs_per_sec)) \
47 47
48#define START_BLOCK(sbi, segno) \ 48#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
49 (SM_I(sbi)->seg0_blkaddr + \ 49#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
50
51#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
52#define MAIN_SECS(sbi) (sbi->total_sections)
53
54#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
55#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
56
57#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
58#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
59 sbi->log_blocks_per_seg))
60
61#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
50 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) 62 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
63
51#define NEXT_FREE_BLKADDR(sbi, curseg) \ 64#define NEXT_FREE_BLKADDR(sbi, curseg) \
52 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) 65 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
53 66
54#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) 67#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
55
56#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 68#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 69 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ 70#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
@@ -77,23 +87,21 @@
77 87
78#define SIT_ENTRY_OFFSET(sit_i, segno) \ 88#define SIT_ENTRY_OFFSET(sit_i, segno) \
79 (segno % sit_i->sents_per_block) 89 (segno % sit_i->sents_per_block)
80#define SIT_BLOCK_OFFSET(sit_i, segno) \ 90#define SIT_BLOCK_OFFSET(segno) \
81 (segno / SIT_ENTRY_PER_BLOCK) 91 (segno / SIT_ENTRY_PER_BLOCK)
82#define START_SEGNO(sit_i, segno) \ 92#define START_SEGNO(segno) \
83 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) 93 (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
84#define SIT_BLK_CNT(sbi) \ 94#define SIT_BLK_CNT(sbi) \
85 ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) 95 ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
86#define f2fs_bitmap_size(nr) \ 96#define f2fs_bitmap_size(nr) \
87 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 97 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
88#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
89#define TOTAL_SECS(sbi) (sbi->total_sections)
90 98
91#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 99#define SECTOR_FROM_BLOCK(blk_addr) \
92 (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) 100 (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
93#define SECTOR_TO_BLOCK(sbi, sectors) \ 101#define SECTOR_TO_BLOCK(sectors) \
94 (sectors >> (sbi)->log_sectors_per_block) 102 (sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
95#define MAX_BIO_BLOCKS(max_hw_blocks) \ 103#define MAX_BIO_BLOCKS(sbi) \
96 (min((int)max_hw_blocks, BIO_MAX_PAGES)) 104 ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
97 105
98/* 106/*
99 * indicate a block allocation direction: RIGHT and LEFT. 107 * indicate a block allocation direction: RIGHT and LEFT.
@@ -167,6 +175,11 @@ struct segment_allocation {
167 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 175 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
168}; 176};
169 177
178struct inmem_pages {
179 struct list_head list;
180 struct page *page;
181};
182
170struct sit_info { 183struct sit_info {
171 const struct segment_allocation *s_ops; 184 const struct segment_allocation *s_ops;
172 185
@@ -237,6 +250,12 @@ struct curseg_info {
237 unsigned int next_segno; /* preallocated segment */ 250 unsigned int next_segno; /* preallocated segment */
238}; 251};
239 252
253struct sit_entry_set {
254 struct list_head set_list; /* link with all sit sets */
255 unsigned int start_segno; /* start segno of sits in set */
256 unsigned int entry_cnt; /* the # of sit entries in set */
257};
258
240/* 259/*
241 * inline functions 260 * inline functions
242 */ 261 */
@@ -316,7 +335,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
316 clear_bit(segno, free_i->free_segmap); 335 clear_bit(segno, free_i->free_segmap);
317 free_i->free_segments++; 336 free_i->free_segments++;
318 337
319 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); 338 next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno);
320 if (next >= start_segno + sbi->segs_per_sec) { 339 if (next >= start_segno + sbi->segs_per_sec) {
321 clear_bit(secno, free_i->free_secmap); 340 clear_bit(secno, free_i->free_secmap);
322 free_i->free_sections++; 341 free_i->free_sections++;
@@ -430,8 +449,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
430 449
431static inline bool need_SSR(struct f2fs_sb_info *sbi) 450static inline bool need_SSR(struct f2fs_sb_info *sbi)
432{ 451{
433 return (prefree_segments(sbi) / sbi->segs_per_sec) 452 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
434 + free_sections(sbi) < overprovision_sections(sbi); 453 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
454 return free_sections(sbi) <= (node_secs + 2 * dent_secs +
455 reserved_sections(sbi) + 1);
435} 456}
436 457
437static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) 458static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -466,48 +487,47 @@ static inline int utilization(struct f2fs_sb_info *sbi)
466 * F2FS_IPU_UTIL - if FS utilization is over threashold, 487 * F2FS_IPU_UTIL - if FS utilization is over threashold,
467 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over 488 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
468 * threashold, 489 * threashold,
490 * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash
491 * storages. IPU will be triggered only if the # of dirty
492 * pages over min_fsync_blocks.
469 * F2FS_IPUT_DISABLE - disable IPU. (=default option) 493 * F2FS_IPUT_DISABLE - disable IPU. (=default option)
470 */ 494 */
471#define DEF_MIN_IPU_UTIL 70 495#define DEF_MIN_IPU_UTIL 70
496#define DEF_MIN_FSYNC_BLOCKS 8
472 497
473enum { 498enum {
474 F2FS_IPU_FORCE, 499 F2FS_IPU_FORCE,
475 F2FS_IPU_SSR, 500 F2FS_IPU_SSR,
476 F2FS_IPU_UTIL, 501 F2FS_IPU_UTIL,
477 F2FS_IPU_SSR_UTIL, 502 F2FS_IPU_SSR_UTIL,
478 F2FS_IPU_DISABLE, 503 F2FS_IPU_FSYNC,
479}; 504};
480 505
481static inline bool need_inplace_update(struct inode *inode) 506static inline bool need_inplace_update(struct inode *inode)
482{ 507{
483 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 508 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
509 unsigned int policy = SM_I(sbi)->ipu_policy;
484 510
485 /* IPU can be done only for the user data */ 511 /* IPU can be done only for the user data */
486 if (S_ISDIR(inode->i_mode)) 512 if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
487 return false; 513 return false;
488 514
489 /* this is only set during fdatasync */ 515 if (policy & (0x1 << F2FS_IPU_FORCE))
490 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) 516 return true;
517 if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
518 return true;
519 if (policy & (0x1 << F2FS_IPU_UTIL) &&
520 utilization(sbi) > SM_I(sbi)->min_ipu_util)
521 return true;
522 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
523 utilization(sbi) > SM_I(sbi)->min_ipu_util)
491 return true; 524 return true;
492 525
493 switch (SM_I(sbi)->ipu_policy) { 526 /* this is only set during fdatasync */
494 case F2FS_IPU_FORCE: 527 if (policy & (0x1 << F2FS_IPU_FSYNC) &&
528 is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
495 return true; 529 return true;
496 case F2FS_IPU_SSR: 530
497 if (need_SSR(sbi))
498 return true;
499 break;
500 case F2FS_IPU_UTIL:
501 if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
502 return true;
503 break;
504 case F2FS_IPU_SSR_UTIL:
505 if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
506 return true;
507 break;
508 case F2FS_IPU_DISABLE:
509 break;
510 }
511 return false; 531 return false;
512} 532}
513 533
@@ -534,18 +554,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
534#ifdef CONFIG_F2FS_CHECK_FS 554#ifdef CONFIG_F2FS_CHECK_FS
535static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 555static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
536{ 556{
537 unsigned int end_segno = SM_I(sbi)->segment_count - 1; 557 BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
538 BUG_ON(segno > end_segno);
539} 558}
540 559
541static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 560static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
542{ 561{
543 struct f2fs_sm_info *sm_info = SM_I(sbi); 562 BUG_ON(blk_addr < SEG0_BLKADDR(sbi));
544 block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; 563 BUG_ON(blk_addr >= MAX_BLKADDR(sbi));
545 block_t start_addr = sm_info->seg0_blkaddr;
546 block_t end_addr = start_addr + total_blks - 1;
547 BUG_ON(blk_addr < start_addr);
548 BUG_ON(blk_addr > end_addr);
549} 564}
550 565
551/* 566/*
@@ -554,8 +569,6 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
554static inline void check_block_count(struct f2fs_sb_info *sbi, 569static inline void check_block_count(struct f2fs_sb_info *sbi,
555 int segno, struct f2fs_sit_entry *raw_sit) 570 int segno, struct f2fs_sit_entry *raw_sit)
556{ 571{
557 struct f2fs_sm_info *sm_info = SM_I(sbi);
558 unsigned int end_segno = sm_info->segment_count - 1;
559 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; 572 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
560 int valid_blocks = 0; 573 int valid_blocks = 0;
561 int cur_pos = 0, next_pos; 574 int cur_pos = 0, next_pos;
@@ -564,7 +577,7 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
564 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); 577 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
565 578
566 /* check boundary of a given segment number */ 579 /* check boundary of a given segment number */
567 BUG_ON(segno > end_segno); 580 BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
568 581
569 /* check bitmap with valid block count */ 582 /* check bitmap with valid block count */
570 do { 583 do {
@@ -583,16 +596,39 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
583 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 596 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
584} 597}
585#else 598#else
586#define check_seg_range(sbi, segno) 599static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
587#define verify_block_addr(sbi, blk_addr) 600{
588#define check_block_count(sbi, segno, raw_sit) 601 if (segno > TOTAL_SEGS(sbi) - 1)
602 sbi->need_fsck = true;
603}
604
605static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
606{
607 if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
608 sbi->need_fsck = true;
609}
610
611/*
612 * Summary block is always treated as an invalid block
613 */
614static inline void check_block_count(struct f2fs_sb_info *sbi,
615 int segno, struct f2fs_sit_entry *raw_sit)
616{
617 /* check segment usage */
618 if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
619 sbi->need_fsck = true;
620
621 /* check boundary of a given segment number */
622 if (segno > TOTAL_SEGS(sbi) - 1)
623 sbi->need_fsck = true;
624}
589#endif 625#endif
590 626
591static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 627static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
592 unsigned int start) 628 unsigned int start)
593{ 629{
594 struct sit_info *sit_i = SIT_I(sbi); 630 struct sit_info *sit_i = SIT_I(sbi);
595 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); 631 unsigned int offset = SIT_BLOCK_OFFSET(start);
596 block_t blk_addr = sit_i->sit_base_addr + offset; 632 block_t blk_addr = sit_i->sit_base_addr + offset;
597 633
598 check_seg_range(sbi, start); 634 check_seg_range(sbi, start);
@@ -619,7 +655,7 @@ static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
619 655
620static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) 656static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
621{ 657{
622 unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); 658 unsigned int block_off = SIT_BLOCK_OFFSET(start);
623 659
624 if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) 660 if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
625 f2fs_clear_bit(block_off, sit_i->sit_bitmap); 661 f2fs_clear_bit(block_off, sit_i->sit_bitmap);
@@ -666,7 +702,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
666{ 702{
667 struct block_device *bdev = sbi->sb->s_bdev; 703 struct block_device *bdev = sbi->sb->s_bdev;
668 struct request_queue *q = bdev_get_queue(bdev); 704 struct request_queue *q = bdev_get_queue(bdev);
669 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 705 return SECTOR_TO_BLOCK(queue_max_sectors(q));
670} 706}
671 707
672/* 708/*
@@ -683,7 +719,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
683 else if (type == NODE) 719 else if (type == NODE)
684 return 3 * sbi->blocks_per_seg; 720 return 3 * sbi->blocks_per_seg;
685 else if (type == META) 721 else if (type == META)
686 return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 722 return MAX_BIO_BLOCKS(sbi);
687 else 723 else
688 return 0; 724 return 0;
689} 725}
@@ -706,7 +742,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
706 else if (type == NODE) 742 else if (type == NODE)
707 desired = 3 * max_hw_blocks(sbi); 743 desired = 3 * max_hw_blocks(sbi);
708 else 744 else
709 desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 745 desired = MAX_BIO_BLOCKS(sbi);
710 746
711 wbc->nr_to_write = desired; 747 wbc->nr_to_write = desired;
712 return desired - nr_to_write; 748 return desired - nr_to_write;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41bdf511003d..41d6f700f4ee 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -190,6 +190,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
191F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 191F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
192F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 192F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
193F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
193F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 194F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
194F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 195F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
195F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 196F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
@@ -204,6 +205,7 @@ static struct attribute *f2fs_attrs[] = {
204 ATTR_LIST(max_small_discards), 205 ATTR_LIST(max_small_discards),
205 ATTR_LIST(ipu_policy), 206 ATTR_LIST(ipu_policy),
206 ATTR_LIST(min_ipu_util), 207 ATTR_LIST(min_ipu_util),
208 ATTR_LIST(min_fsync_blocks),
207 ATTR_LIST(max_victim_search), 209 ATTR_LIST(max_victim_search),
208 ATTR_LIST(dir_level), 210 ATTR_LIST(dir_level),
209 ATTR_LIST(ram_thresh), 211 ATTR_LIST(ram_thresh),
@@ -366,11 +368,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
366 368
367 /* Initialize f2fs-specific inode info */ 369 /* Initialize f2fs-specific inode info */
368 fi->vfs_inode.i_version = 1; 370 fi->vfs_inode.i_version = 1;
369 atomic_set(&fi->dirty_dents, 0); 371 atomic_set(&fi->dirty_pages, 0);
370 fi->i_current_depth = 1; 372 fi->i_current_depth = 1;
371 fi->i_advise = 0; 373 fi->i_advise = 0;
372 rwlock_init(&fi->ext.ext_lock); 374 rwlock_init(&fi->ext.ext_lock);
373 init_rwsem(&fi->i_sem); 375 init_rwsem(&fi->i_sem);
376 INIT_LIST_HEAD(&fi->inmem_pages);
377 mutex_init(&fi->inmem_lock);
374 378
375 set_inode_flag(fi, FI_NEW_INODE); 379 set_inode_flag(fi, FI_NEW_INODE);
376 380
@@ -432,14 +436,19 @@ static void f2fs_put_super(struct super_block *sb)
432 stop_gc_thread(sbi); 436 stop_gc_thread(sbi);
433 437
434 /* We don't need to do checkpoint when it's clean */ 438 /* We don't need to do checkpoint when it's clean */
435 if (sbi->s_dirty) 439 if (sbi->s_dirty) {
436 write_checkpoint(sbi, true); 440 struct cp_control cpc = {
441 .reason = CP_UMOUNT,
442 };
443 write_checkpoint(sbi, &cpc);
444 }
437 445
438 /* 446 /*
439 * normally superblock is clean, so we need to release this. 447 * normally superblock is clean, so we need to release this.
440 * In addition, EIO will skip do checkpoint, we need this as well. 448 * In addition, EIO will skip do checkpoint, we need this as well.
441 */ 449 */
442 release_dirty_inode(sbi); 450 release_dirty_inode(sbi);
451 release_discard_addrs(sbi);
443 452
444 iput(sbi->node_inode); 453 iput(sbi->node_inode);
445 iput(sbi->meta_inode); 454 iput(sbi->meta_inode);
@@ -464,8 +473,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
464 trace_f2fs_sync_fs(sb, sync); 473 trace_f2fs_sync_fs(sb, sync);
465 474
466 if (sync) { 475 if (sync) {
476 struct cp_control cpc = {
477 .reason = CP_SYNC,
478 };
467 mutex_lock(&sbi->gc_mutex); 479 mutex_lock(&sbi->gc_mutex);
468 write_checkpoint(sbi, false); 480 write_checkpoint(sbi, &cpc);
469 mutex_unlock(&sbi->gc_mutex); 481 mutex_unlock(&sbi->gc_mutex);
470 } else { 482 } else {
471 f2fs_balance_fs(sbi); 483 f2fs_balance_fs(sbi);
@@ -616,6 +628,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
616 org_mount_opt = sbi->mount_opt; 628 org_mount_opt = sbi->mount_opt;
617 active_logs = sbi->active_logs; 629 active_logs = sbi->active_logs;
618 630
631 sbi->mount_opt.opt = 0;
632 sbi->active_logs = NR_CURSEG_TYPE;
633
619 /* parse mount options */ 634 /* parse mount options */
620 err = parse_options(sb, data); 635 err = parse_options(sb, data);
621 if (err) 636 if (err)
@@ -786,14 +801,22 @@ static int sanity_check_raw_super(struct super_block *sb,
786 return 1; 801 return 1;
787 } 802 }
788 803
789 if (le32_to_cpu(raw_super->log_sectorsize) != 804 /* Currently, support 512/1024/2048/4096 bytes sector size */
790 F2FS_LOG_SECTOR_SIZE) { 805 if (le32_to_cpu(raw_super->log_sectorsize) >
791 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); 806 F2FS_MAX_LOG_SECTOR_SIZE ||
807 le32_to_cpu(raw_super->log_sectorsize) <
808 F2FS_MIN_LOG_SECTOR_SIZE) {
809 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
810 le32_to_cpu(raw_super->log_sectorsize));
792 return 1; 811 return 1;
793 } 812 }
794 if (le32_to_cpu(raw_super->log_sectors_per_block) != 813 if (le32_to_cpu(raw_super->log_sectors_per_block) +
795 F2FS_LOG_SECTORS_PER_BLOCK) { 814 le32_to_cpu(raw_super->log_sectorsize) !=
796 f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); 815 F2FS_MAX_LOG_SECTOR_SIZE) {
816 f2fs_msg(sb, KERN_INFO,
817 "Invalid log sectors per block(%u) log sectorsize(%u)",
818 le32_to_cpu(raw_super->log_sectors_per_block),
819 le32_to_cpu(raw_super->log_sectorsize));
797 return 1; 820 return 1;
798 } 821 }
799 return 0; 822 return 0;
@@ -849,6 +872,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
849 atomic_set(&sbi->nr_pages[i], 0); 872 atomic_set(&sbi->nr_pages[i], 0);
850 873
851 sbi->dir_level = DEF_DIR_LEVEL; 874 sbi->dir_level = DEF_DIR_LEVEL;
875 sbi->need_fsck = false;
852} 876}
853 877
854/* 878/*
@@ -1082,6 +1106,9 @@ try_onemore:
1082 if (err) 1106 if (err)
1083 goto free_proc; 1107 goto free_proc;
1084 1108
1109 if (!retry)
1110 sbi->need_fsck = true;
1111
1085 /* recover fsynced data */ 1112 /* recover fsynced data */
1086 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1113 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1087 err = recover_fsync_data(sbi); 1114 err = recover_fsync_data(sbi);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 728a5dc3dc16..deca8728117b 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -266,7 +266,7 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
266 266
267static void *read_all_xattrs(struct inode *inode, struct page *ipage) 267static void *read_all_xattrs(struct inode *inode, struct page *ipage)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 struct f2fs_xattr_header *header; 270 struct f2fs_xattr_header *header;
271 size_t size = PAGE_SIZE, inline_size = 0; 271 size_t size = PAGE_SIZE, inline_size = 0;
272 void *txattr_addr; 272 void *txattr_addr;
@@ -325,7 +325,7 @@ fail:
325static inline int write_all_xattrs(struct inode *inode, __u32 hsize, 325static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
326 void *txattr_addr, struct page *ipage) 326 void *txattr_addr, struct page *ipage)
327{ 327{
328 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 328 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
329 size_t inline_size = 0; 329 size_t inline_size = 0;
330 void *xattr_addr; 330 void *xattr_addr;
331 struct page *xpage; 331 struct page *xpage;
@@ -373,7 +373,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
373 alloc_nid_failed(sbi, new_nid); 373 alloc_nid_failed(sbi, new_nid);
374 return PTR_ERR(xpage); 374 return PTR_ERR(xpage);
375 } 375 }
376 f2fs_bug_on(new_nid); 376 f2fs_bug_on(sbi, new_nid);
377 f2fs_wait_on_page_writeback(xpage, NODE); 377 f2fs_wait_on_page_writeback(xpage, NODE);
378 } else { 378 } else {
379 struct dnode_of_data dn; 379 struct dnode_of_data dn;
@@ -596,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
596 const void *value, size_t size, 596 const void *value, size_t size,
597 struct page *ipage, int flags) 597 struct page *ipage, int flags)
598{ 598{
599 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 599 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
600 int err; 600 int err;
601 601
602 /* this case is only from init_inode_metadata */ 602 /* this case is only from init_inode_metadata */