aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 12:53:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 12:53:15 -0400
commitda01e61428aa2b5c424fddc11178498462d8c77f (patch)
treef22a693c3292658c665b77df5990b4caceed846d
parent6dea0737bc5e160efc77f4c39d393b94fd2746dc (diff)
parent02a1335f25a386db9afc68f8315162f862aac93f (diff)
Merge tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch-set introduces a couple of new features such as large sector size, FITRIM, and atomic/volatile writes. Several patches enhance power-off recovery and checkpoint routines. The fsck.f2fs starts to support fixing corrupted partitions with recovery hints provided by this patch-set. Summary: - retain some recovery information for fsck.f2fs - enhance checkpoint speed - enhance flush command management - bug fix for lseek - tune in-place-update policies - enhance roll-forward speed - revisit all the roll-forward and fsync rules - support larget sector size - support FITRIM - support atomic and volatile writes And several clean-ups and bug fixes are included" * tag 'f2fs-for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (42 commits) f2fs: support volatile operations for transient data f2fs: support atomic writes f2fs: remove unused return value f2fs: clean up f2fs_ioctl functions f2fs: potential shift wrapping buf in f2fs_trim_fs() f2fs: call f2fs_unlock_op after error was handled f2fs: check the use of macros on block counts and addresses f2fs: refactor flush_nat_entries to remove costly reorganizing ops f2fs: introduce FITRIM in f2fs_ioctl f2fs: introduce cp_control structure f2fs: use more free segments until SSR is activated f2fs: change the ipu_policy option to enable combinations f2fs: fix to search whole dirty segmap when get_victim f2fs: fix to clean previous mount option when remount_fs f2fs: skip punching hole in special condition f2fs: support large sector size f2fs: fix to truncate blocks past EOF in ->setattr f2fs: update i_size when __allocate_data_block f2fs: use MAX_BIO_BLOCKS(sbi) f2fs: remove redundant operation during roll-forward recovery ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs7
-rw-r--r--Documentation/filesystems/f2fs.txt13
-rw-r--r--fs/f2fs/checkpoint.c97
-rw-r--r--fs/f2fs/data.c69
-rw-r--r--fs/f2fs/debug.c20
-rw-r--r--fs/f2fs/dir.c19
-rw-r--r--fs/f2fs/f2fs.h163
-rw-r--r--fs/f2fs/file.c257
-rw-r--r--fs/f2fs/gc.c26
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c37
-rw-r--r--fs/f2fs/namei.c53
-rw-r--r--fs/f2fs/node.c460
-rw-r--r--fs/f2fs/node.h60
-rw-r--r--fs/f2fs/recovery.c191
-rw-r--r--fs/f2fs/segment.c520
-rw-r--r--fs/f2fs/segment.h160
-rw-r--r--fs/f2fs/super.c47
-rw-r--r--fs/f2fs/xattr.c8
-rw-r--r--include/linux/f2fs_fs.h6
-rw-r--r--include/trace/events/f2fs.h16
21 files changed, 1453 insertions, 796 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 62dd72522d6e..6f9157f16725 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -44,6 +44,13 @@ Description:
44 Controls the FS utilization condition for the in-place-update 44 Controls the FS utilization condition for the in-place-update
45 policies. 45 policies.
46 46
47What: /sys/fs/f2fs/<disk>/min_fsync_blocks
48Date: September 2014
49Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
50Description:
51 Controls the dirty page count condition for the in-place-update
52 policies.
53
47What: /sys/fs/f2fs/<disk>/max_small_discards 54What: /sys/fs/f2fs/<disk>/max_small_discards
48Date: November 2013 55Date: November 2013
49Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 56Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index a2046a7d0a9d..2cca5a25ef89 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -192,15 +192,22 @@ Files in /sys/fs/f2fs/<devname>
192 192
193 ipu_policy This parameter controls the policy of in-place 193 ipu_policy This parameter controls the policy of in-place
194 updates in f2fs. There are five policies: 194 updates in f2fs. There are five policies:
195 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, 195 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
196 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL, 196 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL,
197 4: F2FS_IPU_DISABLE. 197 0x10: F2FS_IPU_FSYNC.
198 198
199 min_ipu_util This parameter controls the threshold to trigger 199 min_ipu_util This parameter controls the threshold to trigger
200 in-place-updates. The number indicates percentage 200 in-place-updates. The number indicates percentage
201 of the filesystem utilization, and used by 201 of the filesystem utilization, and used by
202 F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. 202 F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
203 203
204 min_fsync_blocks This parameter controls the threshold to trigger
205 in-place-updates when F2FS_IPU_FSYNC mode is set.
206 The number indicates the number of dirty pages
207 when fsync needs to flush on its call path. If
208 the number is less than this value, it triggers
209 in-place-updates.
210
204 max_victim_search This parameter controls the number of trials to 211 max_victim_search This parameter controls the number of trials to
205 find a victim segment when conducting SSR and 212 find a victim segment when conducting SSR and
206 cleaning operations. The default value is 4096 213 cleaning operations. The default value is 4096
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ec3b7a5381fa..dd10a031c052 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,7 +72,22 @@ out:
72 return page; 72 return page;
73} 73}
74 74
75static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 75struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
76{
77 bool readahead = false;
78 struct page *page;
79
80 page = find_get_page(META_MAPPING(sbi), index);
81 if (!page || (page && !PageUptodate(page)))
82 readahead = true;
83 f2fs_put_page(page, 0);
84
85 if (readahead)
86 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
87 return get_meta_page(sbi, index);
88}
89
90static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
76{ 91{
77 switch (type) { 92 switch (type) {
78 case META_NAT: 93 case META_NAT:
@@ -82,6 +97,8 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
82 case META_SSA: 97 case META_SSA:
83 case META_CP: 98 case META_CP:
84 return 0; 99 return 0;
100 case META_POR:
101 return MAX_BLKADDR(sbi);
85 default: 102 default:
86 BUG(); 103 BUG();
87 } 104 }
@@ -90,12 +107,12 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
90/* 107/*
91 * Readahead CP/NAT/SIT/SSA pages 108 * Readahead CP/NAT/SIT/SSA pages
92 */ 109 */
93int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) 110int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
94{ 111{
95 block_t prev_blk_addr = 0; 112 block_t prev_blk_addr = 0;
96 struct page *page; 113 struct page *page;
97 int blkno = start; 114 block_t blkno = start;
98 int max_blks = get_max_meta_blks(sbi, type); 115 block_t max_blks = get_max_meta_blks(sbi, type);
99 116
100 struct f2fs_io_info fio = { 117 struct f2fs_io_info fio = {
101 .type = META, 118 .type = META,
@@ -125,7 +142,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
125 break; 142 break;
126 case META_SSA: 143 case META_SSA:
127 case META_CP: 144 case META_CP:
128 /* get ssa/cp block addr */ 145 case META_POR:
146 if (unlikely(blkno >= max_blks))
147 goto out;
148 if (unlikely(blkno < SEG0_BLKADDR(sbi)))
149 goto out;
129 blk_addr = blkno; 150 blk_addr = blkno;
130 break; 151 break;
131 default: 152 default:
@@ -151,8 +172,7 @@ out:
151static int f2fs_write_meta_page(struct page *page, 172static int f2fs_write_meta_page(struct page *page,
152 struct writeback_control *wbc) 173 struct writeback_control *wbc)
153{ 174{
154 struct inode *inode = page->mapping->host; 175 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
155 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
156 176
157 trace_f2fs_writepage(page, META); 177 trace_f2fs_writepage(page, META);
158 178
@@ -177,7 +197,7 @@ redirty_out:
177static int f2fs_write_meta_pages(struct address_space *mapping, 197static int f2fs_write_meta_pages(struct address_space *mapping,
178 struct writeback_control *wbc) 198 struct writeback_control *wbc)
179{ 199{
180 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 200 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
181 long diff, written; 201 long diff, written;
182 202
183 trace_f2fs_writepages(mapping->host, wbc, META); 203 trace_f2fs_writepages(mapping->host, wbc, META);
@@ -259,15 +279,12 @@ continue_unlock:
259 279
260static int f2fs_set_meta_page_dirty(struct page *page) 280static int f2fs_set_meta_page_dirty(struct page *page)
261{ 281{
262 struct address_space *mapping = page->mapping;
263 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
264
265 trace_f2fs_set_page_dirty(page, META); 282 trace_f2fs_set_page_dirty(page, META);
266 283
267 SetPageUptodate(page); 284 SetPageUptodate(page);
268 if (!PageDirty(page)) { 285 if (!PageDirty(page)) {
269 __set_page_dirty_nobuffers(page); 286 __set_page_dirty_nobuffers(page);
270 inc_page_count(sbi, F2FS_DIRTY_META); 287 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
271 return 1; 288 return 1;
272 } 289 }
273 return 0; 290 return 0;
@@ -378,7 +395,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
378void release_orphan_inode(struct f2fs_sb_info *sbi) 395void release_orphan_inode(struct f2fs_sb_info *sbi)
379{ 396{
380 spin_lock(&sbi->ino_lock[ORPHAN_INO]); 397 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
381 f2fs_bug_on(sbi->n_orphans == 0); 398 f2fs_bug_on(sbi, sbi->n_orphans == 0);
382 sbi->n_orphans--; 399 sbi->n_orphans--;
383 spin_unlock(&sbi->ino_lock[ORPHAN_INO]); 400 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
384} 401}
@@ -398,7 +415,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
398static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 415static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
399{ 416{
400 struct inode *inode = f2fs_iget(sbi->sb, ino); 417 struct inode *inode = f2fs_iget(sbi->sb, ino);
401 f2fs_bug_on(IS_ERR(inode)); 418 f2fs_bug_on(sbi, IS_ERR(inode));
402 clear_nlink(inode); 419 clear_nlink(inode);
403 420
404 /* truncate all the data during iput */ 421 /* truncate all the data during iput */
@@ -459,7 +476,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
459 list_for_each_entry(orphan, head, list) { 476 list_for_each_entry(orphan, head, list) {
460 if (!page) { 477 if (!page) {
461 page = find_get_page(META_MAPPING(sbi), start_blk++); 478 page = find_get_page(META_MAPPING(sbi), start_blk++);
462 f2fs_bug_on(!page); 479 f2fs_bug_on(sbi, !page);
463 orphan_blk = 480 orphan_blk =
464 (struct f2fs_orphan_block *)page_address(page); 481 (struct f2fs_orphan_block *)page_address(page);
465 memset(orphan_blk, 0, sizeof(*orphan_blk)); 482 memset(orphan_blk, 0, sizeof(*orphan_blk));
@@ -619,7 +636,7 @@ fail_no_cp:
619 636
620static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 637static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
621{ 638{
622 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 639 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
623 640
624 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) 641 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
625 return -EEXIST; 642 return -EEXIST;
@@ -631,32 +648,38 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
631 return 0; 648 return 0;
632} 649}
633 650
634void set_dirty_dir_page(struct inode *inode, struct page *page) 651void update_dirty_page(struct inode *inode, struct page *page)
635{ 652{
636 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 653 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
637 struct dir_inode_entry *new; 654 struct dir_inode_entry *new;
638 int ret = 0; 655 int ret = 0;
639 656
640 if (!S_ISDIR(inode->i_mode)) 657 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
641 return; 658 return;
642 659
660 if (!S_ISDIR(inode->i_mode)) {
661 inode_inc_dirty_pages(inode);
662 goto out;
663 }
664
643 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 665 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
644 new->inode = inode; 666 new->inode = inode;
645 INIT_LIST_HEAD(&new->list); 667 INIT_LIST_HEAD(&new->list);
646 668
647 spin_lock(&sbi->dir_inode_lock); 669 spin_lock(&sbi->dir_inode_lock);
648 ret = __add_dirty_inode(inode, new); 670 ret = __add_dirty_inode(inode, new);
649 inode_inc_dirty_dents(inode); 671 inode_inc_dirty_pages(inode);
650 SetPagePrivate(page);
651 spin_unlock(&sbi->dir_inode_lock); 672 spin_unlock(&sbi->dir_inode_lock);
652 673
653 if (ret) 674 if (ret)
654 kmem_cache_free(inode_entry_slab, new); 675 kmem_cache_free(inode_entry_slab, new);
676out:
677 SetPagePrivate(page);
655} 678}
656 679
657void add_dirty_dir_inode(struct inode *inode) 680void add_dirty_dir_inode(struct inode *inode)
658{ 681{
659 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 682 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
660 struct dir_inode_entry *new = 683 struct dir_inode_entry *new =
661 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 684 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
662 int ret = 0; 685 int ret = 0;
@@ -674,14 +697,14 @@ void add_dirty_dir_inode(struct inode *inode)
674 697
675void remove_dirty_dir_inode(struct inode *inode) 698void remove_dirty_dir_inode(struct inode *inode)
676{ 699{
677 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 700 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
678 struct dir_inode_entry *entry; 701 struct dir_inode_entry *entry;
679 702
680 if (!S_ISDIR(inode->i_mode)) 703 if (!S_ISDIR(inode->i_mode))
681 return; 704 return;
682 705
683 spin_lock(&sbi->dir_inode_lock); 706 spin_lock(&sbi->dir_inode_lock);
684 if (get_dirty_dents(inode) || 707 if (get_dirty_pages(inode) ||
685 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { 708 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
686 spin_unlock(&sbi->dir_inode_lock); 709 spin_unlock(&sbi->dir_inode_lock);
687 return; 710 return;
@@ -802,11 +825,12 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
802 finish_wait(&sbi->cp_wait, &wait); 825 finish_wait(&sbi->cp_wait, &wait);
803} 826}
804 827
805static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 828static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
806{ 829{
807 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 830 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
808 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 831 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
809 nid_t last_nid = 0; 832 struct f2fs_nm_info *nm_i = NM_I(sbi);
833 nid_t last_nid = nm_i->next_scan_nid;
810 block_t start_blk; 834 block_t start_blk;
811 struct page *cp_page; 835 struct page *cp_page;
812 unsigned int data_sum_blocks, orphan_blocks; 836 unsigned int data_sum_blocks, orphan_blocks;
@@ -869,7 +893,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 893 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
870 orphan_blocks); 894 orphan_blocks);
871 895
872 if (is_umount) { 896 if (cpc->reason == CP_UMOUNT) {
873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 897 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
874 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 898 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
875 cp_payload_blks + data_sum_blocks + 899 cp_payload_blks + data_sum_blocks +
@@ -886,6 +910,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
886 else 910 else
887 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 911 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
888 912
913 if (sbi->need_fsck)
914 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
915
889 /* update SIT/NAT bitmap */ 916 /* update SIT/NAT bitmap */
890 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); 917 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
891 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); 918 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
@@ -920,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
920 947
921 write_data_summaries(sbi, start_blk); 948 write_data_summaries(sbi, start_blk);
922 start_blk += data_sum_blocks; 949 start_blk += data_sum_blocks;
923 if (is_umount) { 950 if (cpc->reason == CP_UMOUNT) {
924 write_node_summaries(sbi, start_blk); 951 write_node_summaries(sbi, start_blk);
925 start_blk += NR_CURSEG_NODE_TYPE; 952 start_blk += NR_CURSEG_NODE_TYPE;
926 } 953 }
@@ -960,23 +987,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
960/* 987/*
961 * We guarantee that this checkpoint procedure will not fail. 988 * We guarantee that this checkpoint procedure will not fail.
962 */ 989 */
963void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 990void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
964{ 991{
965 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 992 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
966 unsigned long long ckpt_ver; 993 unsigned long long ckpt_ver;
967 994
968 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 995 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
969 996
970 mutex_lock(&sbi->cp_mutex); 997 mutex_lock(&sbi->cp_mutex);
971 998
972 if (!sbi->s_dirty) 999 if (!sbi->s_dirty && cpc->reason != CP_DISCARD)
973 goto out; 1000 goto out;
974 if (unlikely(f2fs_cp_error(sbi))) 1001 if (unlikely(f2fs_cp_error(sbi)))
975 goto out; 1002 goto out;
976 if (block_operations(sbi)) 1003 if (block_operations(sbi))
977 goto out; 1004 goto out;
978 1005
979 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 1006 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
980 1007
981 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1008 f2fs_submit_merged_bio(sbi, DATA, WRITE);
982 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1009 f2fs_submit_merged_bio(sbi, NODE, WRITE);
@@ -992,16 +1019,16 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
992 1019
993 /* write cached NAT/SIT entries to NAT/SIT area */ 1020 /* write cached NAT/SIT entries to NAT/SIT area */
994 flush_nat_entries(sbi); 1021 flush_nat_entries(sbi);
995 flush_sit_entries(sbi); 1022 flush_sit_entries(sbi, cpc);
996 1023
997 /* unlock all the fs_lock[] in do_checkpoint() */ 1024 /* unlock all the fs_lock[] in do_checkpoint() */
998 do_checkpoint(sbi, is_umount); 1025 do_checkpoint(sbi, cpc);
999 1026
1000 unblock_operations(sbi); 1027 unblock_operations(sbi);
1001 stat_inc_cp_count(sbi->stat_info); 1028 stat_inc_cp_count(sbi->stat_info);
1002out: 1029out:
1003 mutex_unlock(&sbi->cp_mutex); 1030 mutex_unlock(&sbi->cp_mutex);
1004 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 1031 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1005} 1032}
1006 1033
1007void init_ino_entry_info(struct f2fs_sb_info *sbi) 1034void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 76de83e25a89..8e58c4cc2cb9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
85 bio = bio_alloc(GFP_NOIO, npages); 85 bio = bio_alloc(GFP_NOIO, npages);
86 86
87 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_bdev = sbi->sb->s_bdev;
88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 bio->bi_private = sbi; 90 bio->bi_private = sbi;
91 91
@@ -193,7 +193,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
193 __submit_merged_bio(io); 193 __submit_merged_bio(io);
194alloc_new: 194alloc_new:
195 if (io->bio == NULL) { 195 if (io->bio == NULL) {
196 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 196 int bio_blocks = MAX_BIO_BLOCKS(sbi);
197 197
198 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 198 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
199 io->fio = *fio; 199 io->fio = *fio;
@@ -236,7 +236,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
236 236
237int reserve_new_block(struct dnode_of_data *dn) 237int reserve_new_block(struct dnode_of_data *dn)
238{ 238{
239 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
240 240
241 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 241 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
242 return -EPERM; 242 return -EPERM;
@@ -258,7 +258,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
258 int err; 258 int err;
259 259
260 /* if inode_page exists, index should be zero */ 260 /* if inode_page exists, index should be zero */
261 f2fs_bug_on(!need_put && index); 261 f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
262 262
263 err = get_dnode_of_data(dn, index, ALLOC_NODE); 263 err = get_dnode_of_data(dn, index, ALLOC_NODE);
264 if (err) 264 if (err)
@@ -321,7 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
321 block_t start_blkaddr, end_blkaddr; 321 block_t start_blkaddr, end_blkaddr;
322 int need_update = true; 322 int need_update = true;
323 323
324 f2fs_bug_on(blk_addr == NEW_ADDR); 324 f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR);
325 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 325 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
326 dn->ofs_in_node; 326 dn->ofs_in_node;
327 327
@@ -396,7 +396,6 @@ end_update:
396 396
397struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 397struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
398{ 398{
399 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
400 struct address_space *mapping = inode->i_mapping; 399 struct address_space *mapping = inode->i_mapping;
401 struct dnode_of_data dn; 400 struct dnode_of_data dn;
402 struct page *page; 401 struct page *page;
@@ -429,7 +428,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
429 return page; 428 return page;
430 } 429 }
431 430
432 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 431 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr,
433 sync ? READ_SYNC : READA); 432 sync ? READ_SYNC : READA);
434 if (err) 433 if (err)
435 return ERR_PTR(err); 434 return ERR_PTR(err);
@@ -451,7 +450,6 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
451 */ 450 */
452struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 451struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
453{ 452{
454 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
455 struct address_space *mapping = inode->i_mapping; 453 struct address_space *mapping = inode->i_mapping;
456 struct dnode_of_data dn; 454 struct dnode_of_data dn;
457 struct page *page; 455 struct page *page;
@@ -490,7 +488,8 @@ repeat:
490 return page; 488 return page;
491 } 489 }
492 490
493 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); 491 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
492 dn.data_blkaddr, READ_SYNC);
494 if (err) 493 if (err)
495 return ERR_PTR(err); 494 return ERR_PTR(err);
496 495
@@ -517,7 +516,6 @@ repeat:
517struct page *get_new_data_page(struct inode *inode, 516struct page *get_new_data_page(struct inode *inode,
518 struct page *ipage, pgoff_t index, bool new_i_size) 517 struct page *ipage, pgoff_t index, bool new_i_size)
519{ 518{
520 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
521 struct address_space *mapping = inode->i_mapping; 519 struct address_space *mapping = inode->i_mapping;
522 struct page *page; 520 struct page *page;
523 struct dnode_of_data dn; 521 struct dnode_of_data dn;
@@ -541,8 +539,8 @@ repeat:
541 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 539 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
542 SetPageUptodate(page); 540 SetPageUptodate(page);
543 } else { 541 } else {
544 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 542 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
545 READ_SYNC); 543 dn.data_blkaddr, READ_SYNC);
546 if (err) 544 if (err)
547 goto put_err; 545 goto put_err;
548 546
@@ -573,10 +571,12 @@ put_err:
573 571
574static int __allocate_data_block(struct dnode_of_data *dn) 572static int __allocate_data_block(struct dnode_of_data *dn)
575{ 573{
576 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 574 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
575 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
577 struct f2fs_summary sum; 576 struct f2fs_summary sum;
578 block_t new_blkaddr; 577 block_t new_blkaddr;
579 struct node_info ni; 578 struct node_info ni;
579 pgoff_t fofs;
580 int type; 580 int type;
581 581
582 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 582 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
@@ -599,6 +599,12 @@ static int __allocate_data_block(struct dnode_of_data *dn)
599 update_extent_cache(new_blkaddr, dn); 599 update_extent_cache(new_blkaddr, dn);
600 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 600 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
601 601
602 /* update i_size */
603 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
604 dn->ofs_in_node;
605 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
606 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
607
602 dn->data_blkaddr = new_blkaddr; 608 dn->data_blkaddr = new_blkaddr;
603 return 0; 609 return 0;
604} 610}
@@ -614,7 +620,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
614static int __get_data_block(struct inode *inode, sector_t iblock, 620static int __get_data_block(struct inode *inode, sector_t iblock,
615 struct buffer_head *bh_result, int create, bool fiemap) 621 struct buffer_head *bh_result, int create, bool fiemap)
616{ 622{
617 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
618 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 623 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
619 unsigned maxblocks = bh_result->b_size >> blkbits; 624 unsigned maxblocks = bh_result->b_size >> blkbits;
620 struct dnode_of_data dn; 625 struct dnode_of_data dn;
@@ -630,8 +635,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
630 goto out; 635 goto out;
631 636
632 if (create) { 637 if (create) {
633 f2fs_balance_fs(sbi); 638 f2fs_balance_fs(F2FS_I_SB(inode));
634 f2fs_lock_op(sbi); 639 f2fs_lock_op(F2FS_I_SB(inode));
635 } 640 }
636 641
637 /* When reading holes, we need its node page */ 642 /* When reading holes, we need its node page */
@@ -707,7 +712,7 @@ put_out:
707 f2fs_put_dnode(&dn); 712 f2fs_put_dnode(&dn);
708unlock_out: 713unlock_out:
709 if (create) 714 if (create)
710 f2fs_unlock_op(sbi); 715 f2fs_unlock_op(F2FS_I_SB(inode));
711out: 716out:
712 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 717 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
713 return err; 718 return err;
@@ -804,7 +809,7 @@ static int f2fs_write_data_page(struct page *page,
804 struct writeback_control *wbc) 809 struct writeback_control *wbc)
805{ 810{
806 struct inode *inode = page->mapping->host; 811 struct inode *inode = page->mapping->host;
807 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 812 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
808 loff_t i_size = i_size_read(inode); 813 loff_t i_size = i_size_read(inode);
809 const pgoff_t end_index = ((unsigned long long) i_size) 814 const pgoff_t end_index = ((unsigned long long) i_size)
810 >> PAGE_CACHE_SHIFT; 815 >> PAGE_CACHE_SHIFT;
@@ -846,7 +851,7 @@ write:
846 if (unlikely(f2fs_cp_error(sbi))) { 851 if (unlikely(f2fs_cp_error(sbi))) {
847 SetPageError(page); 852 SetPageError(page);
848 unlock_page(page); 853 unlock_page(page);
849 return 0; 854 goto out;
850 } 855 }
851 856
852 if (!wbc->for_reclaim) 857 if (!wbc->for_reclaim)
@@ -866,7 +871,7 @@ done:
866 871
867 clear_cold_data(page); 872 clear_cold_data(page);
868out: 873out:
869 inode_dec_dirty_dents(inode); 874 inode_dec_dirty_pages(inode);
870 unlock_page(page); 875 unlock_page(page);
871 if (need_balance_fs) 876 if (need_balance_fs)
872 f2fs_balance_fs(sbi); 877 f2fs_balance_fs(sbi);
@@ -892,7 +897,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
892 struct writeback_control *wbc) 897 struct writeback_control *wbc)
893{ 898{
894 struct inode *inode = mapping->host; 899 struct inode *inode = mapping->host;
895 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 900 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
896 bool locked = false; 901 bool locked = false;
897 int ret; 902 int ret;
898 long diff; 903 long diff;
@@ -904,7 +909,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
904 return 0; 909 return 0;
905 910
906 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 911 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
907 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && 912 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
908 available_free_memory(sbi, DIRTY_DENTS)) 913 available_free_memory(sbi, DIRTY_DENTS))
909 goto skip_write; 914 goto skip_write;
910 915
@@ -926,7 +931,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
926 return ret; 931 return ret;
927 932
928skip_write: 933skip_write:
929 wbc->pages_skipped += get_dirty_dents(inode); 934 wbc->pages_skipped += get_dirty_pages(inode);
930 return 0; 935 return 0;
931} 936}
932 937
@@ -945,7 +950,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
945 struct page **pagep, void **fsdata) 950 struct page **pagep, void **fsdata)
946{ 951{
947 struct inode *inode = mapping->host; 952 struct inode *inode = mapping->host;
948 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 953 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
949 struct page *page; 954 struct page *page;
950 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 955 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
951 struct dnode_of_data dn; 956 struct dnode_of_data dn;
@@ -1047,7 +1052,10 @@ static int f2fs_write_end(struct file *file,
1047 1052
1048 trace_f2fs_write_end(inode, pos, len, copied); 1053 trace_f2fs_write_end(inode, pos, len, copied);
1049 1054
1050 set_page_dirty(page); 1055 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
1056 register_inmem_page(inode, page);
1057 else
1058 set_page_dirty(page);
1051 1059
1052 if (pos + copied > i_size_read(inode)) { 1060 if (pos + copied > i_size_read(inode)) {
1053 i_size_write(inode, pos + copied); 1061 i_size_write(inode, pos + copied);
@@ -1092,9 +1100,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1092 if (check_direct_IO(inode, rw, iter, offset)) 1100 if (check_direct_IO(inode, rw, iter, offset))
1093 return 0; 1101 return 0;
1094 1102
1095 /* clear fsync mark to recover these blocks */
1096 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1097
1098 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1103 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1099 1104
1100 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1105 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
@@ -1110,8 +1115,12 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1110 unsigned int length) 1115 unsigned int length)
1111{ 1116{
1112 struct inode *inode = page->mapping->host; 1117 struct inode *inode = page->mapping->host;
1118
1119 if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
1120 return;
1121
1113 if (PageDirty(page)) 1122 if (PageDirty(page))
1114 inode_dec_dirty_dents(inode); 1123 inode_dec_dirty_pages(inode);
1115 ClearPagePrivate(page); 1124 ClearPagePrivate(page);
1116} 1125}
1117 1126
@@ -1133,7 +1142,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
1133 1142
1134 if (!PageDirty(page)) { 1143 if (!PageDirty(page)) {
1135 __set_page_dirty_nobuffers(page); 1144 __set_page_dirty_nobuffers(page);
1136 set_dirty_dir_page(inode, page); 1145 update_dirty_page(inode, page);
1137 return 1; 1146 return 1;
1138 } 1147 }
1139 return 0; 1148 return 0;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fecebdbfd781..0a91ab813a9e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -93,7 +93,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
93 total_vblocks = 0; 93 total_vblocks = 0;
94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
95 hblks_per_sec = blks_per_sec / 2; 95 hblks_per_sec = blks_per_sec / 2;
96 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
98 dist = abs(vblocks - hblks_per_sec); 98 dist = abs(vblocks - hblks_per_sec);
99 bimodal += dist * dist; 99 bimodal += dist * dist;
@@ -103,7 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
103 ndirty++; 103 ndirty++;
104 } 104 }
105 } 105 }
106 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
107 si->bimodal = bimodal / dist; 107 si->bimodal = bimodal / dist;
108 if (si->dirty_count) 108 if (si->dirty_count)
109 si->avg_vblocks = total_vblocks / ndirty; 109 si->avg_vblocks = total_vblocks / ndirty;
@@ -131,17 +131,17 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
131 131
132 /* build sit */ 132 /* build sit */
133 si->base_mem += sizeof(struct sit_info); 133 si->base_mem += sizeof(struct sit_info);
134 si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); 134 si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
135 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 135 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
136 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); 136 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
137 if (sbi->segs_per_sec > 1) 137 if (sbi->segs_per_sec > 1)
138 si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); 138 si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
139 si->base_mem += __bitmap_size(sbi, SIT_BITMAP); 139 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
140 140
141 /* build free segmap */ 141 /* build free segmap */
142 si->base_mem += sizeof(struct free_segmap_info); 142 si->base_mem += sizeof(struct free_segmap_info);
143 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 143 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
144 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 144 si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
145 145
146 /* build curseg */ 146 /* build curseg */
147 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; 147 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
@@ -149,8 +149,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
149 149
150 /* build dirty segmap */ 150 /* build dirty segmap */
151 si->base_mem += sizeof(struct dirty_seglist_info); 151 si->base_mem += sizeof(struct dirty_seglist_info);
152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi));
153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 153 si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
154 154
155 /* build nm */ 155 /* build nm */
156 si->base_mem += sizeof(struct f2fs_nm_info); 156 si->base_mem += sizeof(struct f2fs_nm_info);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 155fb056b7f1..b54f87149c09 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -126,7 +126,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
126 * For the most part, it should be a bug when name_len is zero. 126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs has occurred. 127 * We stop here for figuring out where the bugs has occurred.
128 */ 128 */
129 f2fs_bug_on(!de->name_len); 129 f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len);
130 130
131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
132 } 132 }
@@ -151,7 +151,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
151 bool room = false; 151 bool room = false;
152 int max_slots = 0; 152 int max_slots = 0;
153 153
154 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 154 f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
155 155
156 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); 156 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
157 nblock = bucket_blocks(level); 157 nblock = bucket_blocks(level);
@@ -284,10 +284,9 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
284 284
285int update_dent_inode(struct inode *inode, const struct qstr *name) 285int update_dent_inode(struct inode *inode, const struct qstr *name)
286{ 286{
287 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
288 struct page *page; 287 struct page *page;
289 288
290 page = get_node_page(sbi, inode->i_ino); 289 page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
291 if (IS_ERR(page)) 290 if (IS_ERR(page))
292 return PTR_ERR(page); 291 return PTR_ERR(page);
293 292
@@ -337,7 +336,6 @@ static int make_empty_dir(struct inode *inode,
337static struct page *init_inode_metadata(struct inode *inode, 336static struct page *init_inode_metadata(struct inode *inode,
338 struct inode *dir, const struct qstr *name) 337 struct inode *dir, const struct qstr *name)
339{ 338{
340 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
341 struct page *page; 339 struct page *page;
342 int err; 340 int err;
343 341
@@ -360,7 +358,7 @@ static struct page *init_inode_metadata(struct inode *inode,
360 if (err) 358 if (err)
361 goto put_error; 359 goto put_error;
362 } else { 360 } else {
363 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 361 page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
364 if (IS_ERR(page)) 362 if (IS_ERR(page))
365 return page; 363 return page;
366 364
@@ -381,7 +379,7 @@ static struct page *init_inode_metadata(struct inode *inode,
381 * we should remove this inode from orphan list. 379 * we should remove this inode from orphan list.
382 */ 380 */
383 if (inode->i_nlink == 0) 381 if (inode->i_nlink == 0)
384 remove_orphan_inode(sbi, inode->i_ino); 382 remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
385 inc_nlink(inode); 383 inc_nlink(inode);
386 } 384 }
387 return page; 385 return page;
@@ -571,8 +569,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
571{ 569{
572 struct f2fs_dentry_block *dentry_blk; 570 struct f2fs_dentry_block *dentry_blk;
573 unsigned int bit_pos; 571 unsigned int bit_pos;
574 struct address_space *mapping = page->mapping; 572 struct inode *dir = page->mapping->host;
575 struct inode *dir = mapping->host;
576 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 573 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
577 int i; 574 int i;
578 575
@@ -594,7 +591,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
594 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 591 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
595 592
596 if (inode) { 593 if (inode) {
597 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 594 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
598 595
599 down_write(&F2FS_I(inode)->i_sem); 596 down_write(&F2FS_I(inode)->i_sem);
600 597
@@ -621,7 +618,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
621 truncate_hole(dir, page->index, page->index + 1); 618 truncate_hole(dir, page->index, page->index + 1);
622 clear_page_dirty_for_io(page); 619 clear_page_dirty_for_io(page);
623 ClearPageUptodate(page); 620 ClearPageUptodate(page);
624 inode_dec_dirty_dents(dir); 621 inode_dec_dirty_pages(dir);
625 } 622 }
626 f2fs_put_page(page, 1); 623 f2fs_put_page(page, 1);
627} 624}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e921242186f6..8171e80b2ee9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -21,10 +21,16 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22 22
23#ifdef CONFIG_F2FS_CHECK_FS 23#ifdef CONFIG_F2FS_CHECK_FS
24#define f2fs_bug_on(condition) BUG_ON(condition) 24#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
25#define f2fs_down_write(x, y) down_write_nest_lock(x, y) 25#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
26#else 26#else
27#define f2fs_bug_on(condition) WARN_ON(condition) 27#define f2fs_bug_on(sbi, condition) \
28 do { \
29 if (unlikely(condition)) { \
30 WARN_ON(1); \
31 sbi->need_fsck = true; \
32 } \
33 } while (0)
28#define f2fs_down_write(x, y) down_write(x) 34#define f2fs_down_write(x, y) down_write(x)
29#endif 35#endif
30 36
@@ -90,6 +96,20 @@ enum {
90 SIT_BITMAP 96 SIT_BITMAP
91}; 97};
92 98
99enum {
100 CP_UMOUNT,
101 CP_SYNC,
102 CP_DISCARD,
103};
104
105struct cp_control {
106 int reason;
107 __u64 trim_start;
108 __u64 trim_end;
109 __u64 trim_minlen;
110 __u64 trimmed;
111};
112
93/* 113/*
94 * For CP/NAT/SIT/SSA readahead 114 * For CP/NAT/SIT/SSA readahead
95 */ 115 */
@@ -97,7 +117,8 @@ enum {
97 META_CP, 117 META_CP,
98 META_NAT, 118 META_NAT,
99 META_SIT, 119 META_SIT,
100 META_SSA 120 META_SSA,
121 META_POR,
101}; 122};
102 123
103/* for the list of ino */ 124/* for the list of ino */
@@ -130,7 +151,9 @@ struct discard_entry {
130struct fsync_inode_entry { 151struct fsync_inode_entry {
131 struct list_head list; /* list head */ 152 struct list_head list; /* list head */
132 struct inode *inode; /* vfs inode pointer */ 153 struct inode *inode; /* vfs inode pointer */
133 block_t blkaddr; /* block address locating the last inode */ 154 block_t blkaddr; /* block address locating the last fsync */
155 block_t last_dentry; /* block address locating the last dentry */
156 block_t last_inode; /* block address locating the last inode */
134}; 157};
135 158
136#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) 159#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
@@ -141,6 +164,9 @@ struct fsync_inode_entry {
141#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) 164#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
142#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) 165#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
143 166
167#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
168#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
169
144static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) 170static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
145{ 171{
146 int before = nats_in_cursum(rs); 172 int before = nats_in_cursum(rs);
@@ -155,11 +181,24 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
155 return before; 181 return before;
156} 182}
157 183
184static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
185 int type)
186{
187 if (type == NAT_JOURNAL)
188 return size <= MAX_NAT_JENTRIES(sum);
189 return size <= MAX_SIT_JENTRIES(sum);
190}
191
158/* 192/*
159 * ioctl commands 193 * ioctl commands
160 */ 194 */
161#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 195#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
162#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 196#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
197
198#define F2FS_IOCTL_MAGIC 0xf5
199#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
200#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
201#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
163 202
164#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 203#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
165/* 204/*
@@ -222,13 +261,16 @@ struct f2fs_inode_info {
222 /* Use below internally in f2fs*/ 261 /* Use below internally in f2fs*/
223 unsigned long flags; /* use to pass per-file flags */ 262 unsigned long flags; /* use to pass per-file flags */
224 struct rw_semaphore i_sem; /* protect fi info */ 263 struct rw_semaphore i_sem; /* protect fi info */
225 atomic_t dirty_dents; /* # of dirty dentry pages */ 264 atomic_t dirty_pages; /* # of dirty pages */
226 f2fs_hash_t chash; /* hash value of given file name */ 265 f2fs_hash_t chash; /* hash value of given file name */
227 unsigned int clevel; /* maximum level of given file name */ 266 unsigned int clevel; /* maximum level of given file name */
228 nid_t i_xattr_nid; /* node id that contains xattrs */ 267 nid_t i_xattr_nid; /* node id that contains xattrs */
229 unsigned long long xattr_ver; /* cp version of xattr modification */ 268 unsigned long long xattr_ver; /* cp version of xattr modification */
230 struct extent_info ext; /* in-memory extent cache entry */ 269 struct extent_info ext; /* in-memory extent cache entry */
231 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ 270 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
271
272 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
273 struct mutex inmem_lock; /* lock for inmemory pages */
232}; 274};
233 275
234static inline void get_extent_info(struct extent_info *ext, 276static inline void get_extent_info(struct extent_info *ext,
@@ -260,11 +302,10 @@ struct f2fs_nm_info {
260 302
261 /* NAT cache management */ 303 /* NAT cache management */
262 struct radix_tree_root nat_root;/* root of the nat entry cache */ 304 struct radix_tree_root nat_root;/* root of the nat entry cache */
305 struct radix_tree_root nat_set_root;/* root of the nat set cache */
263 rwlock_t nat_tree_lock; /* protect nat_tree_lock */ 306 rwlock_t nat_tree_lock; /* protect nat_tree_lock */
264 unsigned int nat_cnt; /* the # of cached nat entries */
265 struct list_head nat_entries; /* cached nat entry list (clean) */ 307 struct list_head nat_entries; /* cached nat entry list (clean) */
266 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 308 unsigned int nat_cnt; /* the # of cached nat entries */
267 struct list_head nat_entry_set; /* nat entry set list */
268 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 309 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
269 310
270 /* free node ids management */ 311 /* free node ids management */
@@ -332,18 +373,16 @@ enum {
332}; 373};
333 374
334struct flush_cmd { 375struct flush_cmd {
335 struct flush_cmd *next;
336 struct completion wait; 376 struct completion wait;
377 struct llist_node llnode;
337 int ret; 378 int ret;
338}; 379};
339 380
340struct flush_cmd_control { 381struct flush_cmd_control {
341 struct task_struct *f2fs_issue_flush; /* flush thread */ 382 struct task_struct *f2fs_issue_flush; /* flush thread */
342 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ 383 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
343 struct flush_cmd *issue_list; /* list for command issue */ 384 struct llist_head issue_list; /* list for command issue */
344 struct flush_cmd *dispatch_list; /* list for command dispatch */ 385 struct llist_node *dispatch_list; /* list for command dispatch */
345 spinlock_t issue_lock; /* for issue list lock */
346 struct flush_cmd *issue_tail; /* list tail of issue list */
347}; 386};
348 387
349struct f2fs_sm_info { 388struct f2fs_sm_info {
@@ -369,8 +408,11 @@ struct f2fs_sm_info {
369 int nr_discards; /* # of discards in the list */ 408 int nr_discards; /* # of discards in the list */
370 int max_discards; /* max. discards to be issued */ 409 int max_discards; /* max. discards to be issued */
371 410
411 struct list_head sit_entry_set; /* sit entry set list */
412
372 unsigned int ipu_policy; /* in-place-update policy */ 413 unsigned int ipu_policy; /* in-place-update policy */
373 unsigned int min_ipu_util; /* in-place-update threshold */ 414 unsigned int min_ipu_util; /* in-place-update threshold */
415 unsigned int min_fsync_blocks; /* threshold for fsync */
374 416
375 /* for flush command control */ 417 /* for flush command control */
376 struct flush_cmd_control *cmd_control_info; 418 struct flush_cmd_control *cmd_control_info;
@@ -434,6 +476,7 @@ struct f2fs_sb_info {
434 struct buffer_head *raw_super_buf; /* buffer head of raw sb */ 476 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
435 struct f2fs_super_block *raw_super; /* raw super block pointer */ 477 struct f2fs_super_block *raw_super; /* raw super block pointer */
436 int s_dirty; /* dirty flag for checkpoint */ 478 int s_dirty; /* dirty flag for checkpoint */
479 bool need_fsck; /* need fsck.f2fs to fix */
437 480
438 /* for node-related operations */ 481 /* for node-related operations */
439 struct f2fs_nm_info *nm_info; /* node manager */ 482 struct f2fs_nm_info *nm_info; /* node manager */
@@ -539,6 +582,21 @@ static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
539 return sb->s_fs_info; 582 return sb->s_fs_info;
540} 583}
541 584
585static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode)
586{
587 return F2FS_SB(inode->i_sb);
588}
589
590static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
591{
592 return F2FS_I_SB(mapping->host);
593}
594
595static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
596{
597 return F2FS_M_SB(page->mapping);
598}
599
542static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) 600static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
543{ 601{
544 return (struct f2fs_super_block *)(sbi->raw_super); 602 return (struct f2fs_super_block *)(sbi->raw_super);
@@ -703,8 +761,8 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
703 blkcnt_t count) 761 blkcnt_t count)
704{ 762{
705 spin_lock(&sbi->stat_lock); 763 spin_lock(&sbi->stat_lock);
706 f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); 764 f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
707 f2fs_bug_on(inode->i_blocks < count); 765 f2fs_bug_on(sbi, inode->i_blocks < count);
708 inode->i_blocks -= count; 766 inode->i_blocks -= count;
709 sbi->total_valid_block_count -= (block_t)count; 767 sbi->total_valid_block_count -= (block_t)count;
710 spin_unlock(&sbi->stat_lock); 768 spin_unlock(&sbi->stat_lock);
@@ -716,10 +774,11 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
716 F2FS_SET_SB_DIRT(sbi); 774 F2FS_SET_SB_DIRT(sbi);
717} 775}
718 776
719static inline void inode_inc_dirty_dents(struct inode *inode) 777static inline void inode_inc_dirty_pages(struct inode *inode)
720{ 778{
721 inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 779 atomic_inc(&F2FS_I(inode)->dirty_pages);
722 atomic_inc(&F2FS_I(inode)->dirty_dents); 780 if (S_ISDIR(inode->i_mode))
781 inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
723} 782}
724 783
725static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 784static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -727,13 +786,15 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
727 atomic_dec(&sbi->nr_pages[count_type]); 786 atomic_dec(&sbi->nr_pages[count_type]);
728} 787}
729 788
730static inline void inode_dec_dirty_dents(struct inode *inode) 789static inline void inode_dec_dirty_pages(struct inode *inode)
731{ 790{
732 if (!S_ISDIR(inode->i_mode)) 791 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
733 return; 792 return;
734 793
735 dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); 794 atomic_dec(&F2FS_I(inode)->dirty_pages);
736 atomic_dec(&F2FS_I(inode)->dirty_dents); 795
796 if (S_ISDIR(inode->i_mode))
797 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
737} 798}
738 799
739static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 800static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -741,9 +802,9 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
741 return atomic_read(&sbi->nr_pages[count_type]); 802 return atomic_read(&sbi->nr_pages[count_type]);
742} 803}
743 804
744static inline int get_dirty_dents(struct inode *inode) 805static inline int get_dirty_pages(struct inode *inode)
745{ 806{
746 return atomic_read(&F2FS_I(inode)->dirty_dents); 807 return atomic_read(&F2FS_I(inode)->dirty_pages);
747} 808}
748 809
749static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 810static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
@@ -848,9 +909,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
848{ 909{
849 spin_lock(&sbi->stat_lock); 910 spin_lock(&sbi->stat_lock);
850 911
851 f2fs_bug_on(!sbi->total_valid_block_count); 912 f2fs_bug_on(sbi, !sbi->total_valid_block_count);
852 f2fs_bug_on(!sbi->total_valid_node_count); 913 f2fs_bug_on(sbi, !sbi->total_valid_node_count);
853 f2fs_bug_on(!inode->i_blocks); 914 f2fs_bug_on(sbi, !inode->i_blocks);
854 915
855 inode->i_blocks--; 916 inode->i_blocks--;
856 sbi->total_valid_node_count--; 917 sbi->total_valid_node_count--;
@@ -867,7 +928,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
867static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 928static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
868{ 929{
869 spin_lock(&sbi->stat_lock); 930 spin_lock(&sbi->stat_lock);
870 f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); 931 f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count);
871 sbi->total_valid_inode_count++; 932 sbi->total_valid_inode_count++;
872 spin_unlock(&sbi->stat_lock); 933 spin_unlock(&sbi->stat_lock);
873} 934}
@@ -875,7 +936,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
875static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 936static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
876{ 937{
877 spin_lock(&sbi->stat_lock); 938 spin_lock(&sbi->stat_lock);
878 f2fs_bug_on(!sbi->total_valid_inode_count); 939 f2fs_bug_on(sbi, !sbi->total_valid_inode_count);
879 sbi->total_valid_inode_count--; 940 sbi->total_valid_inode_count--;
880 spin_unlock(&sbi->stat_lock); 941 spin_unlock(&sbi->stat_lock);
881} 942}
@@ -891,7 +952,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
891 return; 952 return;
892 953
893 if (unlock) { 954 if (unlock) {
894 f2fs_bug_on(!PageLocked(page)); 955 f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
895 unlock_page(page); 956 unlock_page(page);
896 } 957 }
897 page_cache_release(page); 958 page_cache_release(page);
@@ -998,7 +1059,9 @@ enum {
998 FI_INLINE_DATA, /* used for inline data*/ 1059 FI_INLINE_DATA, /* used for inline data*/
999 FI_APPEND_WRITE, /* inode has appended data */ 1060 FI_APPEND_WRITE, /* inode has appended data */
1000 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1061 FI_UPDATE_WRITE, /* inode has in-place-update data */
1001 FI_NEED_IPU, /* used fo ipu for fdatasync */ 1062 FI_NEED_IPU, /* used for ipu per file */
1063 FI_ATOMIC_FILE, /* indicate atomic file */
1064 FI_VOLATILE_FILE, /* indicate volatile file */
1002}; 1065};
1003 1066
1004static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1067static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1085,6 +1148,16 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1085 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); 1148 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
1086} 1149}
1087 1150
1151static inline bool f2fs_is_atomic_file(struct inode *inode)
1152{
1153 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
1154}
1155
1156static inline bool f2fs_is_volatile_file(struct inode *inode)
1157{
1158 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1159}
1160
1088static inline void *inline_data_addr(struct page *page) 1161static inline void *inline_data_addr(struct page *page)
1089{ 1162{
1090 struct f2fs_inode *ri = F2FS_INODE(page); 1163 struct f2fs_inode *ri = F2FS_INODE(page);
@@ -1141,6 +1214,7 @@ void update_inode(struct inode *, struct page *);
1141void update_inode_page(struct inode *); 1214void update_inode_page(struct inode *);
1142int f2fs_write_inode(struct inode *, struct writeback_control *); 1215int f2fs_write_inode(struct inode *, struct writeback_control *);
1143void f2fs_evict_inode(struct inode *); 1216void f2fs_evict_inode(struct inode *);
1217void handle_failed_inode(struct inode *);
1144 1218
1145/* 1219/*
1146 * namei.c 1220 * namei.c
@@ -1188,9 +1262,9 @@ struct dnode_of_data;
1188struct node_info; 1262struct node_info;
1189 1263
1190bool available_free_memory(struct f2fs_sb_info *, int); 1264bool available_free_memory(struct f2fs_sb_info *, int);
1191int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1265bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1192bool fsync_mark_done(struct f2fs_sb_info *, nid_t); 1266bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
1193void fsync_mark_clear(struct f2fs_sb_info *, nid_t); 1267bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
1194void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1268void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1195int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1269int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1196int truncate_inode_blocks(struct inode *, pgoff_t); 1270int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1221,6 +1295,8 @@ void destroy_node_manager_caches(void);
1221/* 1295/*
1222 * segment.c 1296 * segment.c
1223 */ 1297 */
1298void register_inmem_page(struct inode *, struct page *);
1299void commit_inmem_pages(struct inode *, bool);
1224void f2fs_balance_fs(struct f2fs_sb_info *); 1300void f2fs_balance_fs(struct f2fs_sb_info *);
1225void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1301void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1226int f2fs_issue_flush(struct f2fs_sb_info *); 1302int f2fs_issue_flush(struct f2fs_sb_info *);
@@ -1229,9 +1305,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
1229void invalidate_blocks(struct f2fs_sb_info *, block_t); 1305void invalidate_blocks(struct f2fs_sb_info *, block_t);
1230void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1306void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1231void clear_prefree_segments(struct f2fs_sb_info *); 1307void clear_prefree_segments(struct f2fs_sb_info *);
1308void release_discard_addrs(struct f2fs_sb_info *);
1232void discard_next_dnode(struct f2fs_sb_info *, block_t); 1309void discard_next_dnode(struct f2fs_sb_info *, block_t);
1233int npages_for_summary_flush(struct f2fs_sb_info *); 1310int npages_for_summary_flush(struct f2fs_sb_info *);
1234void allocate_new_segments(struct f2fs_sb_info *); 1311void allocate_new_segments(struct f2fs_sb_info *);
1312int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
1235struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1313struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
1236void write_meta_page(struct f2fs_sb_info *, struct page *); 1314void write_meta_page(struct f2fs_sb_info *, struct page *);
1237void write_node_page(struct f2fs_sb_info *, struct page *, 1315void write_node_page(struct f2fs_sb_info *, struct page *,
@@ -1248,7 +1326,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t);
1248void write_node_summaries(struct f2fs_sb_info *, block_t); 1326void write_node_summaries(struct f2fs_sb_info *, block_t);
1249int lookup_journal_in_cursum(struct f2fs_summary_block *, 1327int lookup_journal_in_cursum(struct f2fs_summary_block *,
1250 int, unsigned int, int); 1328 int, unsigned int, int);
1251void flush_sit_entries(struct f2fs_sb_info *); 1329void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
1252int build_segment_manager(struct f2fs_sb_info *); 1330int build_segment_manager(struct f2fs_sb_info *);
1253void destroy_segment_manager(struct f2fs_sb_info *); 1331void destroy_segment_manager(struct f2fs_sb_info *);
1254int __init create_segment_manager_caches(void); 1332int __init create_segment_manager_caches(void);
@@ -1259,7 +1337,8 @@ void destroy_segment_manager_caches(void);
1259 */ 1337 */
1260struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1338struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1261struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1339struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1262int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1340struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t);
1341int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
1263long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1342long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1264void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1343void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1265void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1344void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1271,11 +1350,11 @@ void add_orphan_inode(struct f2fs_sb_info *, nid_t);
1271void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1350void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
1272void recover_orphan_inodes(struct f2fs_sb_info *); 1351void recover_orphan_inodes(struct f2fs_sb_info *);
1273int get_valid_checkpoint(struct f2fs_sb_info *); 1352int get_valid_checkpoint(struct f2fs_sb_info *);
1274void set_dirty_dir_page(struct inode *, struct page *); 1353void update_dirty_page(struct inode *, struct page *);
1275void add_dirty_dir_inode(struct inode *); 1354void add_dirty_dir_inode(struct inode *);
1276void remove_dirty_dir_inode(struct inode *); 1355void remove_dirty_dir_inode(struct inode *);
1277void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1356void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1278void write_checkpoint(struct f2fs_sb_info *, bool); 1357void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
1279void init_ino_entry_info(struct f2fs_sb_info *); 1358void init_ino_entry_info(struct f2fs_sb_info *);
1280int __init create_checkpoint_caches(void); 1359int __init create_checkpoint_caches(void);
1281void destroy_checkpoint_caches(void); 1360void destroy_checkpoint_caches(void);
@@ -1359,12 +1438,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1359#define stat_inc_inline_inode(inode) \ 1438#define stat_inc_inline_inode(inode) \
1360 do { \ 1439 do { \
1361 if (f2fs_has_inline_data(inode)) \ 1440 if (f2fs_has_inline_data(inode)) \
1362 ((F2FS_SB(inode->i_sb))->inline_inode++); \ 1441 ((F2FS_I_SB(inode))->inline_inode++); \
1363 } while (0) 1442 } while (0)
1364#define stat_dec_inline_inode(inode) \ 1443#define stat_dec_inline_inode(inode) \
1365 do { \ 1444 do { \
1366 if (f2fs_has_inline_data(inode)) \ 1445 if (f2fs_has_inline_data(inode)) \
1367 ((F2FS_SB(inode->i_sb))->inline_inode--); \ 1446 ((F2FS_I_SB(inode))->inline_inode--); \
1368 } while (0) 1447 } while (0)
1369 1448
1370#define stat_inc_seg_type(sbi, curseg) \ 1449#define stat_inc_seg_type(sbi, curseg) \
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 060aee65aee8..8e68bb64f835 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,7 +33,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
33{ 33{
34 struct page *page = vmf->page; 34 struct page *page = vmf->page;
35 struct inode *inode = file_inode(vma->vm_file); 35 struct inode *inode = file_inode(vma->vm_file);
36 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 36 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
37 struct dnode_of_data dn; 37 struct dnode_of_data dn;
38 int err; 38 int err;
39 39
@@ -117,7 +117,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
117 117
118static inline bool need_do_checkpoint(struct inode *inode) 118static inline bool need_do_checkpoint(struct inode *inode)
119{ 119{
120 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 120 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
121 bool need_cp = false; 121 bool need_cp = false;
122 122
123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
@@ -138,7 +138,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
138{ 138{
139 struct inode *inode = file->f_mapping->host; 139 struct inode *inode = file->f_mapping->host;
140 struct f2fs_inode_info *fi = F2FS_I(inode); 140 struct f2fs_inode_info *fi = F2FS_I(inode);
141 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 141 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
142 nid_t ino = inode->i_ino;
142 int ret = 0; 143 int ret = 0;
143 bool need_cp = false; 144 bool need_cp = false;
144 struct writeback_control wbc = { 145 struct writeback_control wbc = {
@@ -153,12 +154,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
153 trace_f2fs_sync_file_enter(inode); 154 trace_f2fs_sync_file_enter(inode);
154 155
155 /* if fdatasync is triggered, let's do in-place-update */ 156 /* if fdatasync is triggered, let's do in-place-update */
156 if (datasync) 157 if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
157 set_inode_flag(fi, FI_NEED_IPU); 158 set_inode_flag(fi, FI_NEED_IPU);
158
159 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 159 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
160 if (datasync) 160 clear_inode_flag(fi, FI_NEED_IPU);
161 clear_inode_flag(fi, FI_NEED_IPU); 161
162 if (ret) { 162 if (ret) {
163 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 163 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
164 return ret; 164 return ret;
@@ -168,13 +168,22 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
168 * if there is no written data, don't waste time to write recovery info. 168 * if there is no written data, don't waste time to write recovery info.
169 */ 169 */
170 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && 170 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
171 !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { 171 !exist_written_data(sbi, ino, APPEND_INO)) {
172 struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
173
174 /* But we need to avoid that there are some inode updates */
175 if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
176 f2fs_put_page(i, 0);
177 goto go_write;
178 }
179 f2fs_put_page(i, 0);
180
172 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || 181 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
173 exist_written_data(sbi, inode->i_ino, UPDATE_INO)) 182 exist_written_data(sbi, ino, UPDATE_INO))
174 goto flush_out; 183 goto flush_out;
175 goto out; 184 goto out;
176 } 185 }
177 186go_write:
178 /* guarantee free sections for fsync */ 187 /* guarantee free sections for fsync */
179 f2fs_balance_fs(sbi); 188 f2fs_balance_fs(sbi);
180 189
@@ -207,26 +216,28 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
207 up_write(&fi->i_sem); 216 up_write(&fi->i_sem);
208 } 217 }
209 } else { 218 } else {
210 /* if there is no written node page, write its inode page */ 219sync_nodes:
211 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 220 sync_node_pages(sbi, ino, &wbc);
212 if (fsync_mark_done(sbi, inode->i_ino)) 221
213 goto out; 222 if (need_inode_block_update(sbi, ino)) {
214 mark_inode_dirty_sync(inode); 223 mark_inode_dirty_sync(inode);
215 ret = f2fs_write_inode(inode, NULL); 224 ret = f2fs_write_inode(inode, NULL);
216 if (ret) 225 if (ret)
217 goto out; 226 goto out;
227 goto sync_nodes;
218 } 228 }
219 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 229
230 ret = wait_on_node_pages_writeback(sbi, ino);
220 if (ret) 231 if (ret)
221 goto out; 232 goto out;
222 233
223 /* once recovery info is written, don't need to tack this */ 234 /* once recovery info is written, don't need to tack this */
224 remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); 235 remove_dirty_inode(sbi, ino, APPEND_INO);
225 clear_inode_flag(fi, FI_APPEND_WRITE); 236 clear_inode_flag(fi, FI_APPEND_WRITE);
226flush_out: 237flush_out:
227 remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 238 remove_dirty_inode(sbi, ino, UPDATE_INO);
228 clear_inode_flag(fi, FI_UPDATE_WRITE); 239 clear_inode_flag(fi, FI_UPDATE_WRITE);
229 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 240 ret = f2fs_issue_flush(F2FS_I_SB(inode));
230 } 241 }
231out: 242out:
232 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 243 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -353,6 +364,8 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
353 maxbytes, i_size_read(inode)); 364 maxbytes, i_size_read(inode));
354 case SEEK_DATA: 365 case SEEK_DATA:
355 case SEEK_HOLE: 366 case SEEK_HOLE:
367 if (offset < 0)
368 return -ENXIO;
356 return f2fs_seek_block(file, offset, whence); 369 return f2fs_seek_block(file, offset, whence);
357 } 370 }
358 371
@@ -369,7 +382,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
369int truncate_data_blocks_range(struct dnode_of_data *dn, int count) 382int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
370{ 383{
371 int nr_free = 0, ofs = dn->ofs_in_node; 384 int nr_free = 0, ofs = dn->ofs_in_node;
372 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 385 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
373 struct f2fs_node *raw_node; 386 struct f2fs_node *raw_node;
374 __le32 *addr; 387 __le32 *addr;
375 388
@@ -432,7 +445,7 @@ out:
432 445
433int truncate_blocks(struct inode *inode, u64 from, bool lock) 446int truncate_blocks(struct inode *inode, u64 from, bool lock)
434{ 447{
435 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 448 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
436 unsigned int blocksize = inode->i_sb->s_blocksize; 449 unsigned int blocksize = inode->i_sb->s_blocksize;
437 struct dnode_of_data dn; 450 struct dnode_of_data dn;
438 pgoff_t free_from; 451 pgoff_t free_from;
@@ -463,7 +476,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
463 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 476 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
464 477
465 count -= dn.ofs_in_node; 478 count -= dn.ofs_in_node;
466 f2fs_bug_on(count < 0); 479 f2fs_bug_on(sbi, count < 0);
467 480
468 if (dn.ofs_in_node || IS_INODE(dn.node_page)) { 481 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
469 truncate_data_blocks_range(&dn, count); 482 truncate_data_blocks_range(&dn, count);
@@ -547,15 +560,22 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
547 if (err) 560 if (err)
548 return err; 561 return err;
549 562
550 if ((attr->ia_valid & ATTR_SIZE) && 563 if (attr->ia_valid & ATTR_SIZE) {
551 attr->ia_size != i_size_read(inode)) {
552 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); 564 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
553 if (err) 565 if (err)
554 return err; 566 return err;
555 567
556 truncate_setsize(inode, attr->ia_size); 568 if (attr->ia_size != i_size_read(inode)) {
557 f2fs_truncate(inode); 569 truncate_setsize(inode, attr->ia_size);
558 f2fs_balance_fs(F2FS_SB(inode->i_sb)); 570 f2fs_truncate(inode);
571 f2fs_balance_fs(F2FS_I_SB(inode));
572 } else {
573 /*
574 * giving a chance to truncate blocks past EOF which
575 * are fallocated with FALLOC_FL_KEEP_SIZE.
576 */
577 f2fs_truncate(inode);
578 }
559 } 579 }
560 580
561 __setattr_copy(inode, attr); 581 __setattr_copy(inode, attr);
@@ -589,7 +609,7 @@ const struct inode_operations f2fs_file_inode_operations = {
589static void fill_zero(struct inode *inode, pgoff_t index, 609static void fill_zero(struct inode *inode, pgoff_t index,
590 loff_t start, loff_t len) 610 loff_t start, loff_t len)
591{ 611{
592 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 612 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
593 struct page *page; 613 struct page *page;
594 614
595 if (!len) 615 if (!len)
@@ -638,6 +658,13 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
638 loff_t off_start, off_end; 658 loff_t off_start, off_end;
639 int ret = 0; 659 int ret = 0;
640 660
661 if (!S_ISREG(inode->i_mode))
662 return -EOPNOTSUPP;
663
664 /* skip punching hole beyond i_size */
665 if (offset >= inode->i_size)
666 return ret;
667
641 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); 668 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
642 if (ret) 669 if (ret)
643 return ret; 670 return ret;
@@ -661,7 +688,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
661 if (pg_start < pg_end) { 688 if (pg_start < pg_end) {
662 struct address_space *mapping = inode->i_mapping; 689 struct address_space *mapping = inode->i_mapping;
663 loff_t blk_start, blk_end; 690 loff_t blk_start, blk_end;
664 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 691 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
665 692
666 f2fs_balance_fs(sbi); 693 f2fs_balance_fs(sbi);
667 694
@@ -682,7 +709,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
682static int expand_inode_data(struct inode *inode, loff_t offset, 709static int expand_inode_data(struct inode *inode, loff_t offset,
683 loff_t len, int mode) 710 loff_t len, int mode)
684{ 711{
685 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 712 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
686 pgoff_t index, pg_start, pg_end; 713 pgoff_t index, pg_start, pg_end;
687 loff_t new_size = i_size_read(inode); 714 loff_t new_size = i_size_read(inode);
688 loff_t off_start, off_end; 715 loff_t off_start, off_end;
@@ -778,61 +805,157 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
778 return flags & F2FS_OTHER_FLMASK; 805 return flags & F2FS_OTHER_FLMASK;
779} 806}
780 807
781long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 808static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
809{
810 struct inode *inode = file_inode(filp);
811 struct f2fs_inode_info *fi = F2FS_I(inode);
812 unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
813 return put_user(flags, (int __user *)arg);
814}
815
816static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
782{ 817{
783 struct inode *inode = file_inode(filp); 818 struct inode *inode = file_inode(filp);
784 struct f2fs_inode_info *fi = F2FS_I(inode); 819 struct f2fs_inode_info *fi = F2FS_I(inode);
785 unsigned int flags; 820 unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
821 unsigned int oldflags;
786 int ret; 822 int ret;
787 823
788 switch (cmd) { 824 ret = mnt_want_write_file(filp);
789 case F2FS_IOC_GETFLAGS: 825 if (ret)
790 flags = fi->i_flags & FS_FL_USER_VISIBLE; 826 return ret;
791 return put_user(flags, (int __user *) arg);
792 case F2FS_IOC_SETFLAGS:
793 {
794 unsigned int oldflags;
795 827
796 ret = mnt_want_write_file(filp); 828 if (!inode_owner_or_capable(inode)) {
797 if (ret) 829 ret = -EACCES;
798 return ret; 830 goto out;
831 }
799 832
800 if (!inode_owner_or_capable(inode)) { 833 if (get_user(flags, (int __user *)arg)) {
801 ret = -EACCES; 834 ret = -EFAULT;
802 goto out; 835 goto out;
803 } 836 }
837
838 flags = f2fs_mask_flags(inode->i_mode, flags);
839
840 mutex_lock(&inode->i_mutex);
804 841
805 if (get_user(flags, (int __user *) arg)) { 842 oldflags = fi->i_flags;
806 ret = -EFAULT; 843
844 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
845 if (!capable(CAP_LINUX_IMMUTABLE)) {
846 mutex_unlock(&inode->i_mutex);
847 ret = -EPERM;
807 goto out; 848 goto out;
808 } 849 }
850 }
809 851
810 flags = f2fs_mask_flags(inode->i_mode, flags); 852 flags = flags & FS_FL_USER_MODIFIABLE;
853 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
854 fi->i_flags = flags;
855 mutex_unlock(&inode->i_mutex);
811 856
812 mutex_lock(&inode->i_mutex); 857 f2fs_set_inode_flags(inode);
858 inode->i_ctime = CURRENT_TIME;
859 mark_inode_dirty(inode);
860out:
861 mnt_drop_write_file(filp);
862 return ret;
863}
813 864
814 oldflags = fi->i_flags; 865static int f2fs_ioc_start_atomic_write(struct file *filp)
866{
867 struct inode *inode = file_inode(filp);
868 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
815 869
816 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 870 if (!inode_owner_or_capable(inode))
817 if (!capable(CAP_LINUX_IMMUTABLE)) { 871 return -EACCES;
818 mutex_unlock(&inode->i_mutex);
819 ret = -EPERM;
820 goto out;
821 }
822 }
823 872
824 flags = flags & FS_FL_USER_MODIFIABLE; 873 f2fs_balance_fs(sbi);
825 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
826 fi->i_flags = flags;
827 mutex_unlock(&inode->i_mutex);
828 874
829 f2fs_set_inode_flags(inode); 875 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
830 inode->i_ctime = CURRENT_TIME; 876
831 mark_inode_dirty(inode); 877 return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
832out: 878}
833 mnt_drop_write_file(filp); 879
880static int f2fs_ioc_commit_atomic_write(struct file *filp)
881{
882 struct inode *inode = file_inode(filp);
883 int ret;
884
885 if (!inode_owner_or_capable(inode))
886 return -EACCES;
887
888 if (f2fs_is_volatile_file(inode))
889 return 0;
890
891 ret = mnt_want_write_file(filp);
892 if (ret)
834 return ret; 893 return ret;
835 } 894
895 if (f2fs_is_atomic_file(inode))
896 commit_inmem_pages(inode, false);
897
898 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
899 mnt_drop_write_file(filp);
900 return ret;
901}
902
903static int f2fs_ioc_start_volatile_write(struct file *filp)
904{
905 struct inode *inode = file_inode(filp);
906
907 if (!inode_owner_or_capable(inode))
908 return -EACCES;
909
910 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
911 return 0;
912}
913
914static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
915{
916 struct inode *inode = file_inode(filp);
917 struct super_block *sb = inode->i_sb;
918 struct request_queue *q = bdev_get_queue(sb->s_bdev);
919 struct fstrim_range range;
920 int ret;
921
922 if (!capable(CAP_SYS_ADMIN))
923 return -EPERM;
924
925 if (!blk_queue_discard(q))
926 return -EOPNOTSUPP;
927
928 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
929 sizeof(range)))
930 return -EFAULT;
931
932 range.minlen = max((unsigned int)range.minlen,
933 q->limits.discard_granularity);
934 ret = f2fs_trim_fs(F2FS_SB(sb), &range);
935 if (ret < 0)
936 return ret;
937
938 if (copy_to_user((struct fstrim_range __user *)arg, &range,
939 sizeof(range)))
940 return -EFAULT;
941 return 0;
942}
943
944long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
945{
946 switch (cmd) {
947 case F2FS_IOC_GETFLAGS:
948 return f2fs_ioc_getflags(filp, arg);
949 case F2FS_IOC_SETFLAGS:
950 return f2fs_ioc_setflags(filp, arg);
951 case F2FS_IOC_START_ATOMIC_WRITE:
952 return f2fs_ioc_start_atomic_write(filp);
953 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
954 return f2fs_ioc_commit_atomic_write(filp);
955 case F2FS_IOC_START_VOLATILE_WRITE:
956 return f2fs_ioc_start_volatile_write(filp);
957 case FITRIM:
958 return f2fs_ioc_fitrim(filp, arg);
836 default: 959 default:
837 return -ENOTTY; 960 return -ENOTTY;
838 } 961 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 943a31db7cc3..2a8f4acdb86b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -193,7 +193,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
193 * selected by background GC before. 193 * selected by background GC before.
194 * Those segments guarantee they have small valid blocks. 194 * Those segments guarantee they have small valid blocks.
195 */ 195 */
196 for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { 196 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
197 if (sec_usage_check(sbi, secno)) 197 if (sec_usage_check(sbi, secno))
198 continue; 198 continue;
199 clear_bit(secno, dirty_i->victim_secmap); 199 clear_bit(secno, dirty_i->victim_secmap);
@@ -263,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
263 unsigned int secno, max_cost; 263 unsigned int secno, max_cost;
264 int nsearched = 0; 264 int nsearched = 0;
265 265
266 mutex_lock(&dirty_i->seglist_lock);
267
266 p.alloc_mode = alloc_mode; 268 p.alloc_mode = alloc_mode;
267 select_policy(sbi, gc_type, type, &p); 269 select_policy(sbi, gc_type, type, &p);
268 270
269 p.min_segno = NULL_SEGNO; 271 p.min_segno = NULL_SEGNO;
270 p.min_cost = max_cost = get_max_cost(sbi, &p); 272 p.min_cost = max_cost = get_max_cost(sbi, &p);
271 273
272 mutex_lock(&dirty_i->seglist_lock);
273
274 if (p.alloc_mode == LFS && gc_type == FG_GC) { 274 if (p.alloc_mode == LFS && gc_type == FG_GC) {
275 p.min_segno = check_bg_victims(sbi); 275 p.min_segno = check_bg_victims(sbi);
276 if (p.min_segno != NULL_SEGNO) 276 if (p.min_segno != NULL_SEGNO)
@@ -281,9 +281,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
281 unsigned long cost; 281 unsigned long cost;
282 unsigned int segno; 282 unsigned int segno;
283 283
284 segno = find_next_bit(p.dirty_segmap, 284 segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset);
285 TOTAL_SEGS(sbi), p.offset); 285 if (segno >= MAIN_SEGS(sbi)) {
286 if (segno >= TOTAL_SEGS(sbi)) {
287 if (sbi->last_victim[p.gc_mode]) { 286 if (sbi->last_victim[p.gc_mode]) {
288 sbi->last_victim[p.gc_mode] = 0; 287 sbi->last_victim[p.gc_mode] = 0;
289 p.offset = 0; 288 p.offset = 0;
@@ -423,6 +422,12 @@ next_step:
423 if (IS_ERR(node_page)) 422 if (IS_ERR(node_page))
424 continue; 423 continue;
425 424
425 /* block may become invalid during get_node_page */
426 if (check_valid_map(sbi, segno, off) == 0) {
427 f2fs_put_page(node_page, 1);
428 continue;
429 }
430
426 /* set page dirty and write it */ 431 /* set page dirty and write it */
427 if (gc_type == FG_GC) { 432 if (gc_type == FG_GC) {
428 f2fs_wait_on_page_writeback(node_page, NODE); 433 f2fs_wait_on_page_writeback(node_page, NODE);
@@ -531,7 +536,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
531 f2fs_wait_on_page_writeback(page, DATA); 536 f2fs_wait_on_page_writeback(page, DATA);
532 537
533 if (clear_page_dirty_for_io(page)) 538 if (clear_page_dirty_for_io(page))
534 inode_dec_dirty_dents(inode); 539 inode_dec_dirty_pages(inode);
535 set_cold_data(page); 540 set_cold_data(page);
536 do_write_data_page(page, &fio); 541 do_write_data_page(page, &fio);
537 clear_cold_data(page); 542 clear_cold_data(page);
@@ -688,6 +693,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
688 int gc_type = BG_GC; 693 int gc_type = BG_GC;
689 int nfree = 0; 694 int nfree = 0;
690 int ret = -1; 695 int ret = -1;
696 struct cp_control cpc = {
697 .reason = CP_SYNC,
698 };
691 699
692 INIT_LIST_HEAD(&ilist); 700 INIT_LIST_HEAD(&ilist);
693gc_more: 701gc_more:
@@ -698,7 +706,7 @@ gc_more:
698 706
699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 707 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
700 gc_type = FG_GC; 708 gc_type = FG_GC;
701 write_checkpoint(sbi, false); 709 write_checkpoint(sbi, &cpc);
702 } 710 }
703 711
704 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 712 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
@@ -723,7 +731,7 @@ gc_more:
723 goto gc_more; 731 goto gc_more;
724 732
725 if (gc_type == FG_GC) 733 if (gc_type == FG_GC)
726 write_checkpoint(sbi, false); 734 write_checkpoint(sbi, &cpc);
727stop: 735stop:
728 mutex_unlock(&sbi->gc_mutex); 736 mutex_unlock(&sbi->gc_mutex);
729 737
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 3e8ecdf3742b..88036fd75797 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,11 +15,13 @@
15 15
16bool f2fs_may_inline(struct inode *inode) 16bool f2fs_may_inline(struct inode *inode)
17{ 17{
18 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
19 block_t nr_blocks; 18 block_t nr_blocks;
20 loff_t i_size; 19 loff_t i_size;
21 20
22 if (!test_opt(sbi, INLINE_DATA)) 21 if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
22 return false;
23
24 if (f2fs_is_atomic_file(inode))
23 return false; 25 return false;
24 26
25 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; 27 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
@@ -35,7 +37,6 @@ bool f2fs_may_inline(struct inode *inode)
35 37
36int f2fs_read_inline_data(struct inode *inode, struct page *page) 38int f2fs_read_inline_data(struct inode *inode, struct page *page)
37{ 39{
38 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
39 struct page *ipage; 40 struct page *ipage;
40 void *src_addr, *dst_addr; 41 void *src_addr, *dst_addr;
41 42
@@ -44,7 +45,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
44 goto out; 45 goto out;
45 } 46 }
46 47
47 ipage = get_node_page(sbi, inode->i_ino); 48 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
48 if (IS_ERR(ipage)) { 49 if (IS_ERR(ipage)) {
49 unlock_page(page); 50 unlock_page(page);
50 return PTR_ERR(ipage); 51 return PTR_ERR(ipage);
@@ -73,7 +74,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
73 struct dnode_of_data dn; 74 struct dnode_of_data dn;
74 void *src_addr, *dst_addr; 75 void *src_addr, *dst_addr;
75 block_t new_blk_addr; 76 block_t new_blk_addr;
76 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 77 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
77 struct f2fs_io_info fio = { 78 struct f2fs_io_info fio = {
78 .type = DATA, 79 .type = DATA,
79 .rw = WRITE_SYNC | REQ_PRIO, 80 .rw = WRITE_SYNC | REQ_PRIO,
@@ -189,13 +190,12 @@ int f2fs_write_inline_data(struct inode *inode,
189 190
190void truncate_inline_data(struct inode *inode, u64 from) 191void truncate_inline_data(struct inode *inode, u64 from)
191{ 192{
192 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
193 struct page *ipage; 193 struct page *ipage;
194 194
195 if (from >= MAX_INLINE_DATA) 195 if (from >= MAX_INLINE_DATA)
196 return; 196 return;
197 197
198 ipage = get_node_page(sbi, inode->i_ino); 198 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
199 if (IS_ERR(ipage)) 199 if (IS_ERR(ipage))
200 return; 200 return;
201 201
@@ -209,7 +209,7 @@ void truncate_inline_data(struct inode *inode, u64 from)
209 209
210bool recover_inline_data(struct inode *inode, struct page *npage) 210bool recover_inline_data(struct inode *inode, struct page *npage)
211{ 211{
212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 212 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
213 struct f2fs_inode *ri = NULL; 213 struct f2fs_inode *ri = NULL;
214 void *src_addr, *dst_addr; 214 void *src_addr, *dst_addr;
215 struct page *ipage; 215 struct page *ipage;
@@ -229,7 +229,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
229 ri && (ri->i_inline & F2FS_INLINE_DATA)) { 229 ri && (ri->i_inline & F2FS_INLINE_DATA)) {
230process_inline: 230process_inline:
231 ipage = get_node_page(sbi, inode->i_ino); 231 ipage = get_node_page(sbi, inode->i_ino);
232 f2fs_bug_on(IS_ERR(ipage)); 232 f2fs_bug_on(sbi, IS_ERR(ipage));
233 233
234 f2fs_wait_on_page_writeback(ipage, NODE); 234 f2fs_wait_on_page_writeback(ipage, NODE);
235 235
@@ -243,7 +243,7 @@ process_inline:
243 243
244 if (f2fs_has_inline_data(inode)) { 244 if (f2fs_has_inline_data(inode)) {
245 ipage = get_node_page(sbi, inode->i_ino); 245 ipage = get_node_page(sbi, inode->i_ino);
246 f2fs_bug_on(IS_ERR(ipage)); 246 f2fs_bug_on(sbi, IS_ERR(ipage));
247 f2fs_wait_on_page_writeback(ipage, NODE); 247 f2fs_wait_on_page_writeback(ipage, NODE);
248 zero_user_segment(ipage, INLINE_DATA_OFFSET, 248 zero_user_segment(ipage, INLINE_DATA_OFFSET,
249 INLINE_DATA_OFFSET + MAX_INLINE_DATA); 249 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2c39999f3868..0deead4505e7 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -69,7 +69,7 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
69 69
70static int do_read_inode(struct inode *inode) 70static int do_read_inode(struct inode *inode)
71{ 71{
72 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 72 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
73 struct f2fs_inode_info *fi = F2FS_I(inode); 73 struct f2fs_inode_info *fi = F2FS_I(inode);
74 struct page *node_page; 74 struct page *node_page;
75 struct f2fs_inode *ri; 75 struct f2fs_inode *ri;
@@ -218,7 +218,7 @@ void update_inode(struct inode *inode, struct page *node_page)
218 218
219void update_inode_page(struct inode *inode) 219void update_inode_page(struct inode *inode)
220{ 220{
221 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 221 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222 struct page *node_page; 222 struct page *node_page;
223retry: 223retry:
224 node_page = get_node_page(sbi, inode->i_ino); 224 node_page = get_node_page(sbi, inode->i_ino);
@@ -238,7 +238,7 @@ retry:
238 238
239int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 239int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
240{ 240{
241 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 241 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
242 242
243 if (inode->i_ino == F2FS_NODE_INO(sbi) || 243 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
244 inode->i_ino == F2FS_META_INO(sbi)) 244 inode->i_ino == F2FS_META_INO(sbi))
@@ -266,9 +266,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
266 */ 266 */
267void f2fs_evict_inode(struct inode *inode) 267void f2fs_evict_inode(struct inode *inode)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
271 271
272 /* some remained atomic pages should discarded */
273 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
274 commit_inmem_pages(inode, true);
275
272 trace_f2fs_evict_inode(inode); 276 trace_f2fs_evict_inode(inode);
273 truncate_inode_pages_final(&inode->i_data); 277 truncate_inode_pages_final(&inode->i_data);
274 278
@@ -276,7 +280,7 @@ void f2fs_evict_inode(struct inode *inode)
276 inode->i_ino == F2FS_META_INO(sbi)) 280 inode->i_ino == F2FS_META_INO(sbi))
277 goto out_clear; 281 goto out_clear;
278 282
279 f2fs_bug_on(get_dirty_dents(inode)); 283 f2fs_bug_on(sbi, get_dirty_pages(inode));
280 remove_dirty_dir_inode(inode); 284 remove_dirty_dir_inode(inode);
281 285
282 if (inode->i_nlink || is_bad_inode(inode)) 286 if (inode->i_nlink || is_bad_inode(inode))
@@ -306,3 +310,26 @@ no_delete:
306out_clear: 310out_clear:
307 clear_inode(inode); 311 clear_inode(inode);
308} 312}
313
314/* caller should call f2fs_lock_op() */
315void handle_failed_inode(struct inode *inode)
316{
317 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
318
319 clear_nlink(inode);
320 make_bad_inode(inode);
321 unlock_new_inode(inode);
322
323 i_size_write(inode, 0);
324 if (F2FS_HAS_BLOCKS(inode))
325 f2fs_truncate(inode);
326
327 remove_inode_page(inode);
328 stat_dec_inline_inode(inode);
329
330 alloc_nid_failed(sbi, inode->i_ino);
331 f2fs_unlock_op(sbi);
332
333 /* iput will drop the inode object */
334 iput(inode);
335}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ee103fd7283c..0d2526e5aa11 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -23,7 +23,7 @@
23 23
24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
25{ 25{
26 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 26 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
@@ -102,7 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
103 bool excl) 103 bool excl)
104{ 104{
105 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 105 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
106 struct inode *inode; 106 struct inode *inode;
107 nid_t ino = 0; 107 nid_t ino = 0;
108 int err; 108 int err;
@@ -123,9 +123,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
123 123
124 f2fs_lock_op(sbi); 124 f2fs_lock_op(sbi);
125 err = f2fs_add_link(dentry, inode); 125 err = f2fs_add_link(dentry, inode);
126 f2fs_unlock_op(sbi);
127 if (err) 126 if (err)
128 goto out; 127 goto out;
128 f2fs_unlock_op(sbi);
129 129
130 alloc_nid_done(sbi, ino); 130 alloc_nid_done(sbi, ino);
131 131
@@ -133,9 +133,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
133 unlock_new_inode(inode); 133 unlock_new_inode(inode);
134 return 0; 134 return 0;
135out: 135out:
136 clear_nlink(inode); 136 handle_failed_inode(inode);
137 iget_failed(inode);
138 alloc_nid_failed(sbi, ino);
139 return err; 137 return err;
140} 138}
141 139
@@ -143,7 +141,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
143 struct dentry *dentry) 141 struct dentry *dentry)
144{ 142{
145 struct inode *inode = old_dentry->d_inode; 143 struct inode *inode = old_dentry->d_inode;
146 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 144 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
147 int err; 145 int err;
148 146
149 f2fs_balance_fs(sbi); 147 f2fs_balance_fs(sbi);
@@ -154,15 +152,16 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
154 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 152 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
155 f2fs_lock_op(sbi); 153 f2fs_lock_op(sbi);
156 err = f2fs_add_link(dentry, inode); 154 err = f2fs_add_link(dentry, inode);
157 f2fs_unlock_op(sbi);
158 if (err) 155 if (err)
159 goto out; 156 goto out;
157 f2fs_unlock_op(sbi);
160 158
161 d_instantiate(dentry, inode); 159 d_instantiate(dentry, inode);
162 return 0; 160 return 0;
163out: 161out:
164 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 162 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
165 iput(inode); 163 iput(inode);
164 f2fs_unlock_op(sbi);
166 return err; 165 return err;
167} 166}
168 167
@@ -203,7 +202,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
203 202
204static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 203static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
205{ 204{
206 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 205 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
207 struct inode *inode = dentry->d_inode; 206 struct inode *inode = dentry->d_inode;
208 struct f2fs_dir_entry *de; 207 struct f2fs_dir_entry *de;
209 struct page *page; 208 struct page *page;
@@ -237,7 +236,7 @@ fail:
237static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 236static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
238 const char *symname) 237 const char *symname)
239{ 238{
240 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
241 struct inode *inode; 240 struct inode *inode;
242 size_t symlen = strlen(symname) + 1; 241 size_t symlen = strlen(symname) + 1;
243 int err; 242 int err;
@@ -253,9 +252,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
253 252
254 f2fs_lock_op(sbi); 253 f2fs_lock_op(sbi);
255 err = f2fs_add_link(dentry, inode); 254 err = f2fs_add_link(dentry, inode);
256 f2fs_unlock_op(sbi);
257 if (err) 255 if (err)
258 goto out; 256 goto out;
257 f2fs_unlock_op(sbi);
259 258
260 err = page_symlink(inode, symname, symlen); 259 err = page_symlink(inode, symname, symlen);
261 alloc_nid_done(sbi, inode->i_ino); 260 alloc_nid_done(sbi, inode->i_ino);
@@ -264,15 +263,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
264 unlock_new_inode(inode); 263 unlock_new_inode(inode);
265 return err; 264 return err;
266out: 265out:
267 clear_nlink(inode); 266 handle_failed_inode(inode);
268 iget_failed(inode);
269 alloc_nid_failed(sbi, inode->i_ino);
270 return err; 267 return err;
271} 268}
272 269
273static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 270static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
274{ 271{
275 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 272 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
276 struct inode *inode; 273 struct inode *inode;
277 int err; 274 int err;
278 275
@@ -290,9 +287,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
290 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 287 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
291 f2fs_lock_op(sbi); 288 f2fs_lock_op(sbi);
292 err = f2fs_add_link(dentry, inode); 289 err = f2fs_add_link(dentry, inode);
293 f2fs_unlock_op(sbi);
294 if (err) 290 if (err)
295 goto out_fail; 291 goto out_fail;
292 f2fs_unlock_op(sbi);
296 293
297 alloc_nid_done(sbi, inode->i_ino); 294 alloc_nid_done(sbi, inode->i_ino);
298 295
@@ -303,9 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
303 300
304out_fail: 301out_fail:
305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 302 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
306 clear_nlink(inode); 303 handle_failed_inode(inode);
307 iget_failed(inode);
308 alloc_nid_failed(sbi, inode->i_ino);
309 return err; 304 return err;
310} 305}
311 306
@@ -320,7 +315,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
320static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 315static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
321 umode_t mode, dev_t rdev) 316 umode_t mode, dev_t rdev)
322{ 317{
323 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 318 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
324 struct inode *inode; 319 struct inode *inode;
325 int err = 0; 320 int err = 0;
326 321
@@ -338,25 +333,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
338 333
339 f2fs_lock_op(sbi); 334 f2fs_lock_op(sbi);
340 err = f2fs_add_link(dentry, inode); 335 err = f2fs_add_link(dentry, inode);
341 f2fs_unlock_op(sbi);
342 if (err) 336 if (err)
343 goto out; 337 goto out;
338 f2fs_unlock_op(sbi);
344 339
345 alloc_nid_done(sbi, inode->i_ino); 340 alloc_nid_done(sbi, inode->i_ino);
346 d_instantiate(dentry, inode); 341 d_instantiate(dentry, inode);
347 unlock_new_inode(inode); 342 unlock_new_inode(inode);
348 return 0; 343 return 0;
349out: 344out:
350 clear_nlink(inode); 345 handle_failed_inode(inode);
351 iget_failed(inode);
352 alloc_nid_failed(sbi, inode->i_ino);
353 return err; 346 return err;
354} 347}
355 348
356static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 349static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
357 struct inode *new_dir, struct dentry *new_dentry) 350 struct inode *new_dir, struct dentry *new_dentry)
358{ 351{
359 struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); 352 struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
360 struct inode *old_inode = old_dentry->d_inode; 353 struct inode *old_inode = old_dentry->d_inode;
361 struct inode *new_inode = new_dentry->d_inode; 354 struct inode *new_inode = new_dentry->d_inode;
362 struct page *old_dir_page; 355 struct page *old_dir_page;
@@ -480,8 +473,7 @@ out:
480static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, 473static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
481 struct inode *new_dir, struct dentry *new_dentry) 474 struct inode *new_dir, struct dentry *new_dentry)
482{ 475{
483 struct super_block *sb = old_dir->i_sb; 476 struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
484 struct f2fs_sb_info *sbi = F2FS_SB(sb);
485 struct inode *old_inode = old_dentry->d_inode; 477 struct inode *old_inode = old_dentry->d_inode;
486 struct inode *new_inode = new_dentry->d_inode; 478 struct inode *new_inode = new_dentry->d_inode;
487 struct page *old_dir_page, *new_dir_page; 479 struct page *old_dir_page, *new_dir_page;
@@ -642,7 +634,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
642 634
643static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 635static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
644{ 636{
645 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 637 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
646 struct inode *inode; 638 struct inode *inode;
647 int err; 639 int err;
648 640
@@ -678,10 +670,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
678release_out: 670release_out:
679 release_orphan_inode(sbi); 671 release_orphan_inode(sbi);
680out: 672out:
681 f2fs_unlock_op(sbi); 673 handle_failed_inode(inode);
682 clear_nlink(inode);
683 iget_failed(inode);
684 alloc_nid_failed(sbi, inode->i_ino);
685 return err; 674 return err;
686} 675}
687 676
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 45378196e19a..44b8afef43d9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -54,7 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
54static void clear_node_page_dirty(struct page *page) 54static void clear_node_page_dirty(struct page *page)
55{ 55{
56 struct address_space *mapping = page->mapping; 56 struct address_space *mapping = page->mapping;
57 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
58 unsigned int long flags; 57 unsigned int long flags;
59 58
60 if (PageDirty(page)) { 59 if (PageDirty(page)) {
@@ -65,7 +64,7 @@ static void clear_node_page_dirty(struct page *page)
65 spin_unlock_irqrestore(&mapping->tree_lock, flags); 64 spin_unlock_irqrestore(&mapping->tree_lock, flags);
66 65
67 clear_page_dirty_for_io(page); 66 clear_page_dirty_for_io(page);
68 dec_page_count(sbi, F2FS_DIRTY_NODES); 67 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
69 } 68 }
70 ClearPageUptodate(page); 69 ClearPageUptodate(page);
71} 70}
@@ -92,7 +91,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
92 /* get current nat block page with lock */ 91 /* get current nat block page with lock */
93 src_page = get_meta_page(sbi, src_off); 92 src_page = get_meta_page(sbi, src_off);
94 dst_page = grab_meta_page(sbi, dst_off); 93 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page)); 94 f2fs_bug_on(sbi, PageDirty(src_page));
96 95
97 src_addr = page_address(src_page); 96 src_addr = page_address(src_page);
98 dst_addr = page_address(dst_page); 97 dst_addr = page_address(dst_page);
@@ -124,44 +123,99 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
124 kmem_cache_free(nat_entry_slab, e); 123 kmem_cache_free(nat_entry_slab, e);
125} 124}
126 125
127int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 126static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
127 struct nat_entry *ne)
128{
129 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
130 struct nat_entry_set *head;
131
132 if (get_nat_flag(ne, IS_DIRTY))
133 return;
134retry:
135 head = radix_tree_lookup(&nm_i->nat_set_root, set);
136 if (!head) {
137 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
138
139 INIT_LIST_HEAD(&head->entry_list);
140 INIT_LIST_HEAD(&head->set_list);
141 head->set = set;
142 head->entry_cnt = 0;
143
144 if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
145 cond_resched();
146 goto retry;
147 }
148 }
149 list_move_tail(&ne->list, &head->entry_list);
150 nm_i->dirty_nat_cnt++;
151 head->entry_cnt++;
152 set_nat_flag(ne, IS_DIRTY, true);
153}
154
155static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
156 struct nat_entry *ne)
157{
158 nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
159 struct nat_entry_set *head;
160
161 head = radix_tree_lookup(&nm_i->nat_set_root, set);
162 if (head) {
163 list_move_tail(&ne->list, &nm_i->nat_entries);
164 set_nat_flag(ne, IS_DIRTY, false);
165 head->entry_cnt--;
166 nm_i->dirty_nat_cnt--;
167 }
168}
169
170static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
171 nid_t start, unsigned int nr, struct nat_entry_set **ep)
172{
173 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
174 start, nr);
175}
176
177bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
128{ 178{
129 struct f2fs_nm_info *nm_i = NM_I(sbi); 179 struct f2fs_nm_info *nm_i = NM_I(sbi);
130 struct nat_entry *e; 180 struct nat_entry *e;
131 int is_cp = 1; 181 bool is_cp = true;
132 182
133 read_lock(&nm_i->nat_tree_lock); 183 read_lock(&nm_i->nat_tree_lock);
134 e = __lookup_nat_cache(nm_i, nid); 184 e = __lookup_nat_cache(nm_i, nid);
135 if (e && !e->checkpointed) 185 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
136 is_cp = 0; 186 is_cp = false;
137 read_unlock(&nm_i->nat_tree_lock); 187 read_unlock(&nm_i->nat_tree_lock);
138 return is_cp; 188 return is_cp;
139} 189}
140 190
141bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) 191bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
142{ 192{
143 struct f2fs_nm_info *nm_i = NM_I(sbi); 193 struct f2fs_nm_info *nm_i = NM_I(sbi);
144 struct nat_entry *e; 194 struct nat_entry *e;
145 bool fsync_done = false; 195 bool fsynced = false;
146 196
147 read_lock(&nm_i->nat_tree_lock); 197 read_lock(&nm_i->nat_tree_lock);
148 e = __lookup_nat_cache(nm_i, nid); 198 e = __lookup_nat_cache(nm_i, ino);
149 if (e) 199 if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
150 fsync_done = e->fsync_done; 200 fsynced = true;
151 read_unlock(&nm_i->nat_tree_lock); 201 read_unlock(&nm_i->nat_tree_lock);
152 return fsync_done; 202 return fsynced;
153} 203}
154 204
155void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) 205bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
156{ 206{
157 struct f2fs_nm_info *nm_i = NM_I(sbi); 207 struct f2fs_nm_info *nm_i = NM_I(sbi);
158 struct nat_entry *e; 208 struct nat_entry *e;
209 bool need_update = true;
159 210
160 write_lock(&nm_i->nat_tree_lock); 211 read_lock(&nm_i->nat_tree_lock);
161 e = __lookup_nat_cache(nm_i, nid); 212 e = __lookup_nat_cache(nm_i, ino);
162 if (e) 213 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
163 e->fsync_done = false; 214 (get_nat_flag(e, IS_CHECKPOINTED) ||
164 write_unlock(&nm_i->nat_tree_lock); 215 get_nat_flag(e, HAS_FSYNCED_INODE)))
216 need_update = false;
217 read_unlock(&nm_i->nat_tree_lock);
218 return need_update;
165} 219}
166 220
167static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 221static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
@@ -177,7 +231,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
177 } 231 }
178 memset(new, 0, sizeof(struct nat_entry)); 232 memset(new, 0, sizeof(struct nat_entry));
179 nat_set_nid(new, nid); 233 nat_set_nid(new, nid);
180 new->checkpointed = true; 234 nat_reset_flag(new);
181 list_add_tail(&new->list, &nm_i->nat_entries); 235 list_add_tail(&new->list, &nm_i->nat_entries);
182 nm_i->nat_cnt++; 236 nm_i->nat_cnt++;
183 return new; 237 return new;
@@ -216,7 +270,7 @@ retry:
216 goto retry; 270 goto retry;
217 } 271 }
218 e->ni = *ni; 272 e->ni = *ni;
219 f2fs_bug_on(ni->blk_addr == NEW_ADDR); 273 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
220 } else if (new_blkaddr == NEW_ADDR) { 274 } else if (new_blkaddr == NEW_ADDR) {
221 /* 275 /*
222 * when nid is reallocated, 276 * when nid is reallocated,
@@ -224,16 +278,16 @@ retry:
224 * So, reinitialize it with new information. 278 * So, reinitialize it with new information.
225 */ 279 */
226 e->ni = *ni; 280 e->ni = *ni;
227 f2fs_bug_on(ni->blk_addr != NULL_ADDR); 281 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
228 } 282 }
229 283
230 /* sanity check */ 284 /* sanity check */
231 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 285 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
232 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 286 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
233 new_blkaddr == NULL_ADDR); 287 new_blkaddr == NULL_ADDR);
234 f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && 288 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
235 new_blkaddr == NEW_ADDR); 289 new_blkaddr == NEW_ADDR);
236 f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && 290 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
237 nat_get_blkaddr(e) != NULL_ADDR && 291 nat_get_blkaddr(e) != NULL_ADDR &&
238 new_blkaddr == NEW_ADDR); 292 new_blkaddr == NEW_ADDR);
239 293
@@ -245,12 +299,17 @@ retry:
245 299
246 /* change address */ 300 /* change address */
247 nat_set_blkaddr(e, new_blkaddr); 301 nat_set_blkaddr(e, new_blkaddr);
302 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
303 set_nat_flag(e, IS_CHECKPOINTED, false);
248 __set_nat_cache_dirty(nm_i, e); 304 __set_nat_cache_dirty(nm_i, e);
249 305
250 /* update fsync_mark if its inode nat entry is still alive */ 306 /* update fsync_mark if its inode nat entry is still alive */
251 e = __lookup_nat_cache(nm_i, ni->ino); 307 e = __lookup_nat_cache(nm_i, ni->ino);
252 if (e) 308 if (e) {
253 e->fsync_done = fsync_done; 309 if (fsync_done && ni->nid == ni->ino)
310 set_nat_flag(e, HAS_FSYNCED_INODE, true);
311 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
312 }
254 write_unlock(&nm_i->nat_tree_lock); 313 write_unlock(&nm_i->nat_tree_lock);
255} 314}
256 315
@@ -411,7 +470,7 @@ got:
411 */ 470 */
412int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 471int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
413{ 472{
414 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 473 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
415 struct page *npage[4]; 474 struct page *npage[4];
416 struct page *parent; 475 struct page *parent;
417 int offset[4]; 476 int offset[4];
@@ -504,15 +563,15 @@ release_out:
504 563
505static void truncate_node(struct dnode_of_data *dn) 564static void truncate_node(struct dnode_of_data *dn)
506{ 565{
507 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 566 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
508 struct node_info ni; 567 struct node_info ni;
509 568
510 get_node_info(sbi, dn->nid, &ni); 569 get_node_info(sbi, dn->nid, &ni);
511 if (dn->inode->i_blocks == 0) { 570 if (dn->inode->i_blocks == 0) {
512 f2fs_bug_on(ni.blk_addr != NULL_ADDR); 571 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
513 goto invalidate; 572 goto invalidate;
514 } 573 }
515 f2fs_bug_on(ni.blk_addr == NULL_ADDR); 574 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
516 575
517 /* Deallocate node address */ 576 /* Deallocate node address */
518 invalidate_blocks(sbi, ni.blk_addr); 577 invalidate_blocks(sbi, ni.blk_addr);
@@ -540,14 +599,13 @@ invalidate:
540 599
541static int truncate_dnode(struct dnode_of_data *dn) 600static int truncate_dnode(struct dnode_of_data *dn)
542{ 601{
543 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
544 struct page *page; 602 struct page *page;
545 603
546 if (dn->nid == 0) 604 if (dn->nid == 0)
547 return 1; 605 return 1;
548 606
549 /* get direct node */ 607 /* get direct node */
550 page = get_node_page(sbi, dn->nid); 608 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
551 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) 609 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
552 return 1; 610 return 1;
553 else if (IS_ERR(page)) 611 else if (IS_ERR(page))
@@ -564,7 +622,6 @@ static int truncate_dnode(struct dnode_of_data *dn)
564static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, 622static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
565 int ofs, int depth) 623 int ofs, int depth)
566{ 624{
567 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
568 struct dnode_of_data rdn = *dn; 625 struct dnode_of_data rdn = *dn;
569 struct page *page; 626 struct page *page;
570 struct f2fs_node *rn; 627 struct f2fs_node *rn;
@@ -578,7 +635,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
578 635
579 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); 636 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
580 637
581 page = get_node_page(sbi, dn->nid); 638 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
582 if (IS_ERR(page)) { 639 if (IS_ERR(page)) {
583 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); 640 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
584 return PTR_ERR(page); 641 return PTR_ERR(page);
@@ -636,7 +693,6 @@ out_err:
636static int truncate_partial_nodes(struct dnode_of_data *dn, 693static int truncate_partial_nodes(struct dnode_of_data *dn,
637 struct f2fs_inode *ri, int *offset, int depth) 694 struct f2fs_inode *ri, int *offset, int depth)
638{ 695{
639 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
640 struct page *pages[2]; 696 struct page *pages[2];
641 nid_t nid[3]; 697 nid_t nid[3];
642 nid_t child_nid; 698 nid_t child_nid;
@@ -651,7 +707,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
651 /* get indirect nodes in the path */ 707 /* get indirect nodes in the path */
652 for (i = 0; i < idx + 1; i++) { 708 for (i = 0; i < idx + 1; i++) {
653 /* reference count'll be increased */ 709 /* reference count'll be increased */
654 pages[i] = get_node_page(sbi, nid[i]); 710 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
655 if (IS_ERR(pages[i])) { 711 if (IS_ERR(pages[i])) {
656 err = PTR_ERR(pages[i]); 712 err = PTR_ERR(pages[i]);
657 idx = i - 1; 713 idx = i - 1;
@@ -696,7 +752,7 @@ fail:
696 */ 752 */
697int truncate_inode_blocks(struct inode *inode, pgoff_t from) 753int truncate_inode_blocks(struct inode *inode, pgoff_t from)
698{ 754{
699 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 755 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
700 int err = 0, cont = 1; 756 int err = 0, cont = 1;
701 int level, offset[4], noffset[4]; 757 int level, offset[4], noffset[4];
702 unsigned int nofs = 0; 758 unsigned int nofs = 0;
@@ -792,7 +848,7 @@ fail:
792 848
793int truncate_xattr_node(struct inode *inode, struct page *page) 849int truncate_xattr_node(struct inode *inode, struct page *page)
794{ 850{
795 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 851 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
796 nid_t nid = F2FS_I(inode)->i_xattr_nid; 852 nid_t nid = F2FS_I(inode)->i_xattr_nid;
797 struct dnode_of_data dn; 853 struct dnode_of_data dn;
798 struct page *npage; 854 struct page *npage;
@@ -840,7 +896,8 @@ void remove_inode_page(struct inode *inode)
840 truncate_data_blocks_range(&dn, 1); 896 truncate_data_blocks_range(&dn, 1);
841 897
842 /* 0 is possible, after f2fs_new_inode() has failed */ 898 /* 0 is possible, after f2fs_new_inode() has failed */
843 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 899 f2fs_bug_on(F2FS_I_SB(inode),
900 inode->i_blocks != 0 && inode->i_blocks != 1);
844 901
845 /* will put inode & node pages */ 902 /* will put inode & node pages */
846 truncate_node(&dn); 903 truncate_node(&dn);
@@ -860,7 +917,7 @@ struct page *new_inode_page(struct inode *inode)
860struct page *new_node_page(struct dnode_of_data *dn, 917struct page *new_node_page(struct dnode_of_data *dn,
861 unsigned int ofs, struct page *ipage) 918 unsigned int ofs, struct page *ipage)
862{ 919{
863 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 920 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
864 struct node_info old_ni, new_ni; 921 struct node_info old_ni, new_ni;
865 struct page *page; 922 struct page *page;
866 int err; 923 int err;
@@ -880,7 +937,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
880 get_node_info(sbi, dn->nid, &old_ni); 937 get_node_info(sbi, dn->nid, &old_ni);
881 938
882 /* Reinitialize old_ni with new node page */ 939 /* Reinitialize old_ni with new node page */
883 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 940 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
884 new_ni = old_ni; 941 new_ni = old_ni;
885 new_ni.ino = dn->inode->i_ino; 942 new_ni.ino = dn->inode->i_ino;
886 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 943 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
@@ -918,7 +975,7 @@ fail:
918 */ 975 */
919static int read_node_page(struct page *page, int rw) 976static int read_node_page(struct page *page, int rw)
920{ 977{
921 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 978 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
922 struct node_info ni; 979 struct node_info ni;
923 980
924 get_node_info(sbi, page->index, &ni); 981 get_node_info(sbi, page->index, &ni);
@@ -994,7 +1051,7 @@ got_it:
994 */ 1051 */
995struct page *get_node_page_ra(struct page *parent, int start) 1052struct page *get_node_page_ra(struct page *parent, int start)
996{ 1053{
997 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 1054 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
998 struct blk_plug plug; 1055 struct blk_plug plug;
999 struct page *page; 1056 struct page *page;
1000 int err, i, end; 1057 int err, i, end;
@@ -1124,10 +1181,14 @@ continue_unlock:
1124 1181
1125 /* called by fsync() */ 1182 /* called by fsync() */
1126 if (ino && IS_DNODE(page)) { 1183 if (ino && IS_DNODE(page)) {
1127 int mark = !is_checkpointed_node(sbi, ino);
1128 set_fsync_mark(page, 1); 1184 set_fsync_mark(page, 1);
1129 if (IS_INODE(page)) 1185 if (IS_INODE(page)) {
1130 set_dentry_mark(page, mark); 1186 if (!is_checkpointed_node(sbi, ino) &&
1187 !has_fsynced_inode(sbi, ino))
1188 set_dentry_mark(page, 1);
1189 else
1190 set_dentry_mark(page, 0);
1191 }
1131 nwritten++; 1192 nwritten++;
1132 } else { 1193 } else {
1133 set_fsync_mark(page, 0); 1194 set_fsync_mark(page, 0);
@@ -1206,7 +1267,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1206static int f2fs_write_node_page(struct page *page, 1267static int f2fs_write_node_page(struct page *page,
1207 struct writeback_control *wbc) 1268 struct writeback_control *wbc)
1208{ 1269{
1209 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1270 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1210 nid_t nid; 1271 nid_t nid;
1211 block_t new_addr; 1272 block_t new_addr;
1212 struct node_info ni; 1273 struct node_info ni;
@@ -1226,7 +1287,7 @@ static int f2fs_write_node_page(struct page *page,
1226 1287
1227 /* get old block addr of this node page */ 1288 /* get old block addr of this node page */
1228 nid = nid_of_node(page); 1289 nid = nid_of_node(page);
1229 f2fs_bug_on(page->index != nid); 1290 f2fs_bug_on(sbi, page->index != nid);
1230 1291
1231 get_node_info(sbi, nid, &ni); 1292 get_node_info(sbi, nid, &ni);
1232 1293
@@ -1257,7 +1318,7 @@ redirty_out:
1257static int f2fs_write_node_pages(struct address_space *mapping, 1318static int f2fs_write_node_pages(struct address_space *mapping,
1258 struct writeback_control *wbc) 1319 struct writeback_control *wbc)
1259{ 1320{
1260 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1321 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1261 long diff; 1322 long diff;
1262 1323
1263 trace_f2fs_writepages(mapping->host, wbc, NODE); 1324 trace_f2fs_writepages(mapping->host, wbc, NODE);
@@ -1282,15 +1343,12 @@ skip_write:
1282 1343
1283static int f2fs_set_node_page_dirty(struct page *page) 1344static int f2fs_set_node_page_dirty(struct page *page)
1284{ 1345{
1285 struct address_space *mapping = page->mapping;
1286 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1287
1288 trace_f2fs_set_page_dirty(page, NODE); 1346 trace_f2fs_set_page_dirty(page, NODE);
1289 1347
1290 SetPageUptodate(page); 1348 SetPageUptodate(page);
1291 if (!PageDirty(page)) { 1349 if (!PageDirty(page)) {
1292 __set_page_dirty_nobuffers(page); 1350 __set_page_dirty_nobuffers(page);
1293 inc_page_count(sbi, F2FS_DIRTY_NODES); 1351 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1294 SetPagePrivate(page); 1352 SetPagePrivate(page);
1295 return 1; 1353 return 1;
1296 } 1354 }
@@ -1301,9 +1359,8 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1301 unsigned int length) 1359 unsigned int length)
1302{ 1360{
1303 struct inode *inode = page->mapping->host; 1361 struct inode *inode = page->mapping->host;
1304 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1305 if (PageDirty(page)) 1362 if (PageDirty(page))
1306 dec_page_count(sbi, F2FS_DIRTY_NODES); 1363 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES);
1307 ClearPagePrivate(page); 1364 ClearPagePrivate(page);
1308} 1365}
1309 1366
@@ -1356,7 +1413,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1356 read_lock(&nm_i->nat_tree_lock); 1413 read_lock(&nm_i->nat_tree_lock);
1357 ne = __lookup_nat_cache(nm_i, nid); 1414 ne = __lookup_nat_cache(nm_i, nid);
1358 if (ne && 1415 if (ne &&
1359 (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) 1416 (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1417 nat_get_blkaddr(ne) != NULL_ADDR))
1360 allocated = true; 1418 allocated = true;
1361 read_unlock(&nm_i->nat_tree_lock); 1419 read_unlock(&nm_i->nat_tree_lock);
1362 if (allocated) 1420 if (allocated)
@@ -1413,7 +1471,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1413 break; 1471 break;
1414 1472
1415 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1473 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1416 f2fs_bug_on(blk_addr == NEW_ADDR); 1474 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1417 if (blk_addr == NULL_ADDR) { 1475 if (blk_addr == NULL_ADDR) {
1418 if (add_free_nid(sbi, start_nid, true) < 0) 1476 if (add_free_nid(sbi, start_nid, true) < 0)
1419 break; 1477 break;
@@ -1483,12 +1541,12 @@ retry:
1483 1541
1484 /* We should not use stale free nids created by build_free_nids */ 1542 /* We should not use stale free nids created by build_free_nids */
1485 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1543 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1486 f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1544 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1487 list_for_each_entry(i, &nm_i->free_nid_list, list) 1545 list_for_each_entry(i, &nm_i->free_nid_list, list)
1488 if (i->state == NID_NEW) 1546 if (i->state == NID_NEW)
1489 break; 1547 break;
1490 1548
1491 f2fs_bug_on(i->state != NID_NEW); 1549 f2fs_bug_on(sbi, i->state != NID_NEW);
1492 *nid = i->nid; 1550 *nid = i->nid;
1493 i->state = NID_ALLOC; 1551 i->state = NID_ALLOC;
1494 nm_i->fcnt--; 1552 nm_i->fcnt--;
@@ -1514,7 +1572,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1514 1572
1515 spin_lock(&nm_i->free_nid_list_lock); 1573 spin_lock(&nm_i->free_nid_list_lock);
1516 i = __lookup_free_nid_list(nm_i, nid); 1574 i = __lookup_free_nid_list(nm_i, nid);
1517 f2fs_bug_on(!i || i->state != NID_ALLOC); 1575 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1518 __del_from_free_nid_list(nm_i, i); 1576 __del_from_free_nid_list(nm_i, i);
1519 spin_unlock(&nm_i->free_nid_list_lock); 1577 spin_unlock(&nm_i->free_nid_list_lock);
1520 1578
@@ -1535,7 +1593,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1535 1593
1536 spin_lock(&nm_i->free_nid_list_lock); 1594 spin_lock(&nm_i->free_nid_list_lock);
1537 i = __lookup_free_nid_list(nm_i, nid); 1595 i = __lookup_free_nid_list(nm_i, nid);
1538 f2fs_bug_on(!i || i->state != NID_ALLOC); 1596 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1539 if (!available_free_memory(sbi, FREE_NIDS)) { 1597 if (!available_free_memory(sbi, FREE_NIDS)) {
1540 __del_from_free_nid_list(nm_i, i); 1598 __del_from_free_nid_list(nm_i, i);
1541 need_free = true; 1599 need_free = true;
@@ -1551,14 +1609,13 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1551 1609
1552void recover_inline_xattr(struct inode *inode, struct page *page) 1610void recover_inline_xattr(struct inode *inode, struct page *page)
1553{ 1611{
1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1555 void *src_addr, *dst_addr; 1612 void *src_addr, *dst_addr;
1556 size_t inline_size; 1613 size_t inline_size;
1557 struct page *ipage; 1614 struct page *ipage;
1558 struct f2fs_inode *ri; 1615 struct f2fs_inode *ri;
1559 1616
1560 ipage = get_node_page(sbi, inode->i_ino); 1617 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
1561 f2fs_bug_on(IS_ERR(ipage)); 1618 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
1562 1619
1563 ri = F2FS_INODE(page); 1620 ri = F2FS_INODE(page);
1564 if (!(ri->i_inline & F2FS_INLINE_XATTR)) { 1621 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
@@ -1579,7 +1636,7 @@ update_inode:
1579 1636
1580void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1637void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1581{ 1638{
1582 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1639 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1640 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1584 nid_t new_xnid = nid_of_node(page); 1641 nid_t new_xnid = nid_of_node(page);
1585 struct node_info ni; 1642 struct node_info ni;
@@ -1590,7 +1647,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1590 1647
1591 /* Deallocate node address */ 1648 /* Deallocate node address */
1592 get_node_info(sbi, prev_xnid, &ni); 1649 get_node_info(sbi, prev_xnid, &ni);
1593 f2fs_bug_on(ni.blk_addr == NULL_ADDR); 1650 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
1594 invalidate_blocks(sbi, ni.blk_addr); 1651 invalidate_blocks(sbi, ni.blk_addr);
1595 dec_valid_node_count(sbi, inode); 1652 dec_valid_node_count(sbi, inode);
1596 set_node_addr(sbi, &ni, NULL_ADDR, false); 1653 set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -1598,7 +1655,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1598recover_xnid: 1655recover_xnid:
1599 /* 2: allocate new xattr nid */ 1656 /* 2: allocate new xattr nid */
1600 if (unlikely(!inc_valid_node_count(sbi, inode))) 1657 if (unlikely(!inc_valid_node_count(sbi, inode)))
1601 f2fs_bug_on(1); 1658 f2fs_bug_on(sbi, 1);
1602 1659
1603 remove_free_nid(NM_I(sbi), new_xnid); 1660 remove_free_nid(NM_I(sbi), new_xnid);
1604 get_node_info(sbi, new_xnid, &ni); 1661 get_node_info(sbi, new_xnid, &ni);
@@ -1691,7 +1748,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1691 struct f2fs_summary *sum_entry; 1748 struct f2fs_summary *sum_entry;
1692 struct inode *inode = sbi->sb->s_bdev->bd_inode; 1749 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1693 block_t addr; 1750 block_t addr;
1694 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1751 int bio_blocks = MAX_BIO_BLOCKS(sbi);
1695 struct page *pages[bio_blocks]; 1752 struct page *pages[bio_blocks];
1696 int i, idx, last_offset, nrpages, err = 0; 1753 int i, idx, last_offset, nrpages, err = 0;
1697 1754
@@ -1733,89 +1790,6 @@ skip:
1733 return err; 1790 return err;
1734} 1791}
1735 1792
1736static struct nat_entry_set *grab_nat_entry_set(void)
1737{
1738 struct nat_entry_set *nes =
1739 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1740
1741 nes->entry_cnt = 0;
1742 INIT_LIST_HEAD(&nes->set_list);
1743 INIT_LIST_HEAD(&nes->entry_list);
1744 return nes;
1745}
1746
1747static void release_nat_entry_set(struct nat_entry_set *nes,
1748 struct f2fs_nm_info *nm_i)
1749{
1750 f2fs_bug_on(!list_empty(&nes->entry_list));
1751
1752 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1753 list_del(&nes->set_list);
1754 kmem_cache_free(nat_entry_set_slab, nes);
1755}
1756
1757static void adjust_nat_entry_set(struct nat_entry_set *nes,
1758 struct list_head *head)
1759{
1760 struct nat_entry_set *next = nes;
1761
1762 if (list_is_last(&nes->set_list, head))
1763 return;
1764
1765 list_for_each_entry_continue(next, head, set_list)
1766 if (nes->entry_cnt <= next->entry_cnt)
1767 break;
1768
1769 list_move_tail(&nes->set_list, &next->set_list);
1770}
1771
1772static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1773{
1774 struct nat_entry_set *nes;
1775 nid_t start_nid = START_NID(ne->ni.nid);
1776
1777 list_for_each_entry(nes, head, set_list) {
1778 if (nes->start_nid == start_nid) {
1779 list_move_tail(&ne->list, &nes->entry_list);
1780 nes->entry_cnt++;
1781 adjust_nat_entry_set(nes, head);
1782 return;
1783 }
1784 }
1785
1786 nes = grab_nat_entry_set();
1787
1788 nes->start_nid = start_nid;
1789 list_move_tail(&ne->list, &nes->entry_list);
1790 nes->entry_cnt++;
1791 list_add(&nes->set_list, head);
1792}
1793
1794static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1795{
1796 struct f2fs_nm_info *nm_i = NM_I(sbi);
1797 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1798 struct list_head *set_list = &nm_i->nat_entry_set;
1799 struct nat_entry *ne, *tmp;
1800
1801 write_lock(&nm_i->nat_tree_lock);
1802 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1803 if (nat_get_blkaddr(ne) == NEW_ADDR)
1804 continue;
1805 add_nat_entry(ne, set_list);
1806 nm_i->dirty_nat_cnt++;
1807 }
1808 write_unlock(&nm_i->nat_tree_lock);
1809}
1810
1811static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1812{
1813 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1814 return true;
1815 else
1816 return false;
1817}
1818
1819static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1793static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1820{ 1794{
1821 struct f2fs_nm_info *nm_i = NM_I(sbi); 1795 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1850,99 +1824,130 @@ found:
1850 mutex_unlock(&curseg->curseg_mutex); 1824 mutex_unlock(&curseg->curseg_mutex);
1851} 1825}
1852 1826
1853/* 1827static void __adjust_nat_entry_set(struct nat_entry_set *nes,
1854 * This function is called during the checkpointing process. 1828 struct list_head *head, int max)
1855 */
1856void flush_nat_entries(struct f2fs_sb_info *sbi)
1857{ 1829{
1858 struct f2fs_nm_info *nm_i = NM_I(sbi); 1830 struct nat_entry_set *cur;
1859 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1860 struct f2fs_summary_block *sum = curseg->sum_blk;
1861 struct nat_entry_set *nes, *tmp;
1862 struct list_head *head = &nm_i->nat_entry_set;
1863 bool to_journal = true;
1864 1831
1865 /* merge nat entries of dirty list to nat entry set temporarily */ 1832 if (nes->entry_cnt >= max)
1866 merge_nats_in_set(sbi); 1833 goto add_out;
1867 1834
1868 /* 1835 list_for_each_entry(cur, head, set_list) {
1869 * if there are no enough space in journal to store dirty nat 1836 if (cur->entry_cnt >= nes->entry_cnt) {
1870 * entries, remove all entries from journal and merge them 1837 list_add(&nes->set_list, cur->set_list.prev);
1871 * into nat entry set. 1838 return;
1872 */ 1839 }
1873 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1874 remove_nats_in_journal(sbi);
1875
1876 /*
1877 * merge nat entries of dirty list to nat entry set temporarily
1878 */
1879 merge_nats_in_set(sbi);
1880 } 1840 }
1841add_out:
1842 list_add_tail(&nes->set_list, head);
1843}
1881 1844
1882 if (!nm_i->dirty_nat_cnt) 1845static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1883 return; 1846 struct nat_entry_set *set)
1847{
1848 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1849 struct f2fs_summary_block *sum = curseg->sum_blk;
1850 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
1851 bool to_journal = true;
1852 struct f2fs_nat_block *nat_blk;
1853 struct nat_entry *ne, *cur;
1854 struct page *page = NULL;
1884 1855
1885 /* 1856 /*
1886 * there are two steps to flush nat entries: 1857 * there are two steps to flush nat entries:
1887 * #1, flush nat entries to journal in current hot data summary block. 1858 * #1, flush nat entries to journal in current hot data summary block.
1888 * #2, flush nat entries to nat page. 1859 * #2, flush nat entries to nat page.
1889 */ 1860 */
1890 list_for_each_entry_safe(nes, tmp, head, set_list) { 1861 if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
1891 struct f2fs_nat_block *nat_blk; 1862 to_journal = false;
1892 struct nat_entry *ne, *cur;
1893 struct page *page;
1894 nid_t start_nid = nes->start_nid;
1895 1863
1896 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) 1864 if (to_journal) {
1897 to_journal = false; 1865 mutex_lock(&curseg->curseg_mutex);
1866 } else {
1867 page = get_next_nat_page(sbi, start_nid);
1868 nat_blk = page_address(page);
1869 f2fs_bug_on(sbi, !nat_blk);
1870 }
1871
1872 /* flush dirty nats in nat entry set */
1873 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
1874 struct f2fs_nat_entry *raw_ne;
1875 nid_t nid = nat_get_nid(ne);
1876 int offset;
1877
1878 if (nat_get_blkaddr(ne) == NEW_ADDR)
1879 continue;
1898 1880
1899 if (to_journal) { 1881 if (to_journal) {
1900 mutex_lock(&curseg->curseg_mutex); 1882 offset = lookup_journal_in_cursum(sum,
1883 NAT_JOURNAL, nid, 1);
1884 f2fs_bug_on(sbi, offset < 0);
1885 raw_ne = &nat_in_journal(sum, offset);
1886 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1901 } else { 1887 } else {
1902 page = get_next_nat_page(sbi, start_nid); 1888 raw_ne = &nat_blk->entries[nid - start_nid];
1903 nat_blk = page_address(page);
1904 f2fs_bug_on(!nat_blk);
1905 } 1889 }
1890 raw_nat_from_node_info(raw_ne, &ne->ni);
1906 1891
1907 /* flush dirty nats in nat entry set */ 1892 write_lock(&NM_I(sbi)->nat_tree_lock);
1908 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { 1893 nat_reset_flag(ne);
1909 struct f2fs_nat_entry *raw_ne; 1894 __clear_nat_cache_dirty(NM_I(sbi), ne);
1910 nid_t nid = nat_get_nid(ne); 1895 write_unlock(&NM_I(sbi)->nat_tree_lock);
1911 int offset;
1912 1896
1913 if (to_journal) { 1897 if (nat_get_blkaddr(ne) == NULL_ADDR)
1914 offset = lookup_journal_in_cursum(sum, 1898 add_free_nid(sbi, nid, false);
1915 NAT_JOURNAL, nid, 1); 1899 }
1916 f2fs_bug_on(offset < 0);
1917 raw_ne = &nat_in_journal(sum, offset);
1918 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1919 } else {
1920 raw_ne = &nat_blk->entries[nid - start_nid];
1921 }
1922 raw_nat_from_node_info(raw_ne, &ne->ni);
1923 1900
1924 if (nat_get_blkaddr(ne) == NULL_ADDR && 1901 if (to_journal)
1925 add_free_nid(sbi, nid, false) <= 0) { 1902 mutex_unlock(&curseg->curseg_mutex);
1926 write_lock(&nm_i->nat_tree_lock); 1903 else
1927 __del_from_nat_cache(nm_i, ne); 1904 f2fs_put_page(page, 1);
1928 write_unlock(&nm_i->nat_tree_lock);
1929 } else {
1930 write_lock(&nm_i->nat_tree_lock);
1931 __clear_nat_cache_dirty(nm_i, ne);
1932 write_unlock(&nm_i->nat_tree_lock);
1933 }
1934 }
1935 1905
1936 if (to_journal) 1906 if (!set->entry_cnt) {
1937 mutex_unlock(&curseg->curseg_mutex); 1907 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1938 else 1908 kmem_cache_free(nat_entry_set_slab, set);
1939 f2fs_put_page(page, 1); 1909 }
1910}
1911
1912/*
1913 * This function is called during the checkpointing process.
1914 */
1915void flush_nat_entries(struct f2fs_sb_info *sbi)
1916{
1917 struct f2fs_nm_info *nm_i = NM_I(sbi);
1918 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1919 struct f2fs_summary_block *sum = curseg->sum_blk;
1920 struct nat_entry_set *setvec[NATVEC_SIZE];
1921 struct nat_entry_set *set, *tmp;
1922 unsigned int found;
1923 nid_t set_idx = 0;
1924 LIST_HEAD(sets);
1925
1926 /*
1927 * if there are no enough space in journal to store dirty nat
1928 * entries, remove all entries from journal and merge them
1929 * into nat entry set.
1930 */
1931 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1932 remove_nats_in_journal(sbi);
1940 1933
1941 release_nat_entry_set(nes, nm_i); 1934 if (!nm_i->dirty_nat_cnt)
1935 return;
1936
1937 while ((found = __gang_lookup_nat_set(nm_i,
1938 set_idx, NATVEC_SIZE, setvec))) {
1939 unsigned idx;
1940 set_idx = setvec[found - 1]->set + 1;
1941 for (idx = 0; idx < found; idx++)
1942 __adjust_nat_entry_set(setvec[idx], &sets,
1943 MAX_NAT_JENTRIES(sum));
1942 } 1944 }
1943 1945
1944 f2fs_bug_on(!list_empty(head)); 1946 /* flush dirty nats in nat entry set */
1945 f2fs_bug_on(nm_i->dirty_nat_cnt); 1947 list_for_each_entry_safe(set, tmp, &sets, set_list)
1948 __flush_nat_entry_set(sbi, set);
1949
1950 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
1946} 1951}
1947 1952
1948static int init_node_manager(struct f2fs_sb_info *sbi) 1953static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1969,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1969 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 1974 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1970 INIT_LIST_HEAD(&nm_i->free_nid_list); 1975 INIT_LIST_HEAD(&nm_i->free_nid_list);
1971 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1976 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1977 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
1972 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1973 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1974 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1975 1979
1976 mutex_init(&nm_i->build_lock); 1980 mutex_init(&nm_i->build_lock);
1977 spin_lock_init(&nm_i->free_nid_list_lock); 1981 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -2020,14 +2024,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2020 /* destroy free nid list */ 2024 /* destroy free nid list */
2021 spin_lock(&nm_i->free_nid_list_lock); 2025 spin_lock(&nm_i->free_nid_list_lock);
2022 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2026 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
2023 f2fs_bug_on(i->state == NID_ALLOC); 2027 f2fs_bug_on(sbi, i->state == NID_ALLOC);
2024 __del_from_free_nid_list(nm_i, i); 2028 __del_from_free_nid_list(nm_i, i);
2025 nm_i->fcnt--; 2029 nm_i->fcnt--;
2026 spin_unlock(&nm_i->free_nid_list_lock); 2030 spin_unlock(&nm_i->free_nid_list_lock);
2027 kmem_cache_free(free_nid_slab, i); 2031 kmem_cache_free(free_nid_slab, i);
2028 spin_lock(&nm_i->free_nid_list_lock); 2032 spin_lock(&nm_i->free_nid_list_lock);
2029 } 2033 }
2030 f2fs_bug_on(nm_i->fcnt); 2034 f2fs_bug_on(sbi, nm_i->fcnt);
2031 spin_unlock(&nm_i->free_nid_list_lock); 2035 spin_unlock(&nm_i->free_nid_list_lock);
2032 2036
2033 /* destroy nat cache */ 2037 /* destroy nat cache */
@@ -2039,7 +2043,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2039 for (idx = 0; idx < found; idx++) 2043 for (idx = 0; idx < found; idx++)
2040 __del_from_nat_cache(nm_i, natvec[idx]); 2044 __del_from_nat_cache(nm_i, natvec[idx]);
2041 } 2045 }
2042 f2fs_bug_on(nm_i->nat_cnt); 2046 f2fs_bug_on(sbi, nm_i->nat_cnt);
2043 write_unlock(&nm_i->nat_tree_lock); 2047 write_unlock(&nm_i->nat_tree_lock);
2044 2048
2045 kfree(nm_i->nat_bitmap); 2049 kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 8a116a407599..8d5e6e0dd840 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -39,10 +39,16 @@ struct node_info {
39 unsigned char version; /* version of the node */ 39 unsigned char version; /* version of the node */
40}; 40};
41 41
42enum {
43 IS_CHECKPOINTED, /* is it checkpointed before? */
44 HAS_FSYNCED_INODE, /* is the inode fsynced before? */
45 HAS_LAST_FSYNC, /* has the latest node fsync mark? */
46 IS_DIRTY, /* this nat entry is dirty? */
47};
48
42struct nat_entry { 49struct nat_entry {
43 struct list_head list; /* for clean or dirty nat list */ 50 struct list_head list; /* for clean or dirty nat list */
44 bool checkpointed; /* whether it is checkpointed or not */ 51 unsigned char flag; /* for node information bits */
45 bool fsync_done; /* whether the latest node has fsync mark */
46 struct node_info ni; /* in-memory node information */ 52 struct node_info ni; /* in-memory node information */
47}; 53};
48 54
@@ -55,18 +61,32 @@ struct nat_entry {
55#define nat_get_version(nat) (nat->ni.version) 61#define nat_get_version(nat) (nat->ni.version)
56#define nat_set_version(nat, v) (nat->ni.version = v) 62#define nat_set_version(nat, v) (nat->ni.version = v)
57 63
58#define __set_nat_cache_dirty(nm_i, ne) \
59 do { \
60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0)
63#define __clear_nat_cache_dirty(nm_i, ne) \
64 do { \
65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0)
68#define inc_node_version(version) (++version) 64#define inc_node_version(version) (++version)
69 65
66static inline void set_nat_flag(struct nat_entry *ne,
67 unsigned int type, bool set)
68{
69 unsigned char mask = 0x01 << type;
70 if (set)
71 ne->flag |= mask;
72 else
73 ne->flag &= ~mask;
74}
75
76static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
77{
78 unsigned char mask = 0x01 << type;
79 return ne->flag & mask;
80}
81
82static inline void nat_reset_flag(struct nat_entry *ne)
83{
84 /* these states can be set only after checkpoint was done */
85 set_nat_flag(ne, IS_CHECKPOINTED, true);
86 set_nat_flag(ne, HAS_FSYNCED_INODE, false);
87 set_nat_flag(ne, HAS_LAST_FSYNC, true);
88}
89
70static inline void node_info_from_raw_nat(struct node_info *ni, 90static inline void node_info_from_raw_nat(struct node_info *ni,
71 struct f2fs_nat_entry *raw_ne) 91 struct f2fs_nat_entry *raw_ne)
72{ 92{
@@ -90,9 +110,9 @@ enum mem_type {
90}; 110};
91 111
92struct nat_entry_set { 112struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */ 113 struct list_head set_list; /* link with other nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */ 114 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */ 115 nid_t set; /* set number*/
96 unsigned int entry_cnt; /* the # of nat entries in set */ 116 unsigned int entry_cnt; /* the # of nat entries in set */
97}; 117};
98 118
@@ -110,18 +130,19 @@ struct free_nid {
110 int state; /* in use or not: NID_NEW or NID_ALLOC */ 130 int state; /* in use or not: NID_NEW or NID_ALLOC */
111}; 131};
112 132
113static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) 133static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
114{ 134{
115 struct f2fs_nm_info *nm_i = NM_I(sbi); 135 struct f2fs_nm_info *nm_i = NM_I(sbi);
116 struct free_nid *fnid; 136 struct free_nid *fnid;
117 137
118 if (nm_i->fcnt <= 0)
119 return -1;
120 spin_lock(&nm_i->free_nid_list_lock); 138 spin_lock(&nm_i->free_nid_list_lock);
139 if (nm_i->fcnt <= 0) {
140 spin_unlock(&nm_i->free_nid_list_lock);
141 return;
142 }
121 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); 143 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
122 *nid = fnid->nid; 144 *nid = fnid->nid;
123 spin_unlock(&nm_i->free_nid_list_lock); 145 spin_unlock(&nm_i->free_nid_list_lock);
124 return 0;
125} 146}
126 147
127/* 148/*
@@ -197,8 +218,7 @@ static inline void copy_node_footer(struct page *dst, struct page *src)
197 218
198static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) 219static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
199{ 220{
200 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 221 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
201 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
202 struct f2fs_node *rn = F2FS_NODE(page); 222 struct f2fs_node *rn = F2FS_NODE(page);
203 223
204 rn->footer.cp_ver = ckpt->checkpoint_ver; 224 rn->footer.cp_ver = ckpt->checkpoint_ver;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 756c41cd2582..ebd013225788 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -14,6 +14,37 @@
14#include "node.h" 14#include "node.h"
15#include "segment.h" 15#include "segment.h"
16 16
17/*
18 * Roll forward recovery scenarios.
19 *
20 * [Term] F: fsync_mark, D: dentry_mark
21 *
22 * 1. inode(x) | CP | inode(x) | dnode(F)
23 * -> Update the latest inode(x).
24 *
25 * 2. inode(x) | CP | inode(F) | dnode(F)
26 * -> No problem.
27 *
28 * 3. inode(x) | CP | dnode(F) | inode(x)
29 * -> Recover to the latest dnode(F), and drop the last inode(x)
30 *
31 * 4. inode(x) | CP | dnode(F) | inode(F)
32 * -> No problem.
33 *
34 * 5. CP | inode(x) | dnode(F)
35 * -> The inode(DF) was missing. Should drop this dnode(F).
36 *
37 * 6. CP | inode(DF) | dnode(F)
38 * -> No problem.
39 *
40 * 7. CP | dnode(F) | inode(DF)
41 * -> If f2fs_iget fails, then goto next to find inode(DF).
42 *
43 * 8. CP | dnode(F) | inode(x)
44 * -> If f2fs_iget fails, then goto next to find inode(DF).
45 * But it will fail due to no inode(DF).
46 */
47
17static struct kmem_cache *fsync_entry_slab; 48static struct kmem_cache *fsync_entry_slab;
18 49
19bool space_for_roll_forward(struct f2fs_sb_info *sbi) 50bool space_for_roll_forward(struct f2fs_sb_info *sbi)
@@ -36,7 +67,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
36 return NULL; 67 return NULL;
37} 68}
38 69
39static int recover_dentry(struct page *ipage, struct inode *inode) 70static int recover_dentry(struct inode *inode, struct page *ipage)
40{ 71{
41 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 72 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
42 nid_t pino = le32_to_cpu(raw_inode->i_pino); 73 nid_t pino = le32_to_cpu(raw_inode->i_pino);
@@ -75,7 +106,7 @@ retry:
75 err = -EEXIST; 106 err = -EEXIST;
76 goto out_unmap_put; 107 goto out_unmap_put;
77 } 108 }
78 err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); 109 err = acquire_orphan_inode(F2FS_I_SB(inode));
79 if (err) { 110 if (err) {
80 iput(einode); 111 iput(einode);
81 goto out_unmap_put; 112 goto out_unmap_put;
@@ -110,35 +141,28 @@ out:
110 return err; 141 return err;
111} 142}
112 143
113static int recover_inode(struct inode *inode, struct page *node_page) 144static void recover_inode(struct inode *inode, struct page *page)
114{ 145{
115 struct f2fs_inode *raw_inode = F2FS_INODE(node_page); 146 struct f2fs_inode *raw = F2FS_INODE(page);
116
117 if (!IS_INODE(node_page))
118 return 0;
119 147
120 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 148 inode->i_mode = le16_to_cpu(raw->i_mode);
121 i_size_write(inode, le64_to_cpu(raw_inode->i_size)); 149 i_size_write(inode, le64_to_cpu(raw->i_size));
122 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 150 inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
123 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 151 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
124 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 152 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
125 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 153 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
126 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); 154 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
127 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); 155 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
128
129 if (is_dent_dnode(node_page))
130 return recover_dentry(node_page, inode);
131 156
132 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", 157 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
133 ino_of_node(node_page), raw_inode->i_name); 158 ino_of_node(page), F2FS_INODE(page)->i_name);
134 return 0;
135} 159}
136 160
137static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 161static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
138{ 162{
139 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 163 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
140 struct curseg_info *curseg; 164 struct curseg_info *curseg;
141 struct page *page; 165 struct page *page = NULL;
142 block_t blkaddr; 166 block_t blkaddr;
143 int err = 0; 167 int err = 0;
144 168
@@ -146,20 +170,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
146 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 170 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
147 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 171 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
148 172
149 /* read node page */
150 page = alloc_page(GFP_F2FS_ZERO);
151 if (!page)
152 return -ENOMEM;
153 lock_page(page);
154
155 while (1) { 173 while (1) {
156 struct fsync_inode_entry *entry; 174 struct fsync_inode_entry *entry;
157 175
158 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 176 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
159 if (err) 177 return 0;
160 return err;
161 178
162 lock_page(page); 179 page = get_meta_page_ra(sbi, blkaddr);
163 180
164 if (cp_ver != cpver_of_node(page)) 181 if (cp_ver != cpver_of_node(page))
165 break; 182 break;
@@ -180,33 +197,38 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
180 } 197 }
181 198
182 /* add this fsync inode to the list */ 199 /* add this fsync inode to the list */
183 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); 200 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
184 if (!entry) { 201 if (!entry) {
185 err = -ENOMEM; 202 err = -ENOMEM;
186 break; 203 break;
187 } 204 }
188 205 /*
206 * CP | dnode(F) | inode(DF)
207 * For this case, we should not give up now.
208 */
189 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 209 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
190 if (IS_ERR(entry->inode)) { 210 if (IS_ERR(entry->inode)) {
191 err = PTR_ERR(entry->inode); 211 err = PTR_ERR(entry->inode);
192 kmem_cache_free(fsync_entry_slab, entry); 212 kmem_cache_free(fsync_entry_slab, entry);
213 if (err == -ENOENT)
214 goto next;
193 break; 215 break;
194 } 216 }
195 list_add_tail(&entry->list, head); 217 list_add_tail(&entry->list, head);
196 } 218 }
197 entry->blkaddr = blkaddr; 219 entry->blkaddr = blkaddr;
198 220
199 err = recover_inode(entry->inode, page); 221 if (IS_INODE(page)) {
200 if (err && err != -ENOENT) 222 entry->last_inode = blkaddr;
201 break; 223 if (is_dent_dnode(page))
224 entry->last_dentry = blkaddr;
225 }
202next: 226next:
203 /* check next segment */ 227 /* check next segment */
204 blkaddr = next_blkaddr_of_node(page); 228 blkaddr = next_blkaddr_of_node(page);
229 f2fs_put_page(page, 1);
205 } 230 }
206 231 f2fs_put_page(page, 1);
207 unlock_page(page);
208 __free_pages(page, 0);
209
210 return err; 232 return err;
211} 233}
212 234
@@ -279,16 +301,30 @@ got_it:
279 ino = ino_of_node(node_page); 301 ino = ino_of_node(node_page);
280 f2fs_put_page(node_page, 1); 302 f2fs_put_page(node_page, 1);
281 303
282 /* Deallocate previous index in the node page */ 304 if (ino != dn->inode->i_ino) {
283 inode = f2fs_iget(sbi->sb, ino); 305 /* Deallocate previous index in the node page */
284 if (IS_ERR(inode)) 306 inode = f2fs_iget(sbi->sb, ino);
285 return PTR_ERR(inode); 307 if (IS_ERR(inode))
308 return PTR_ERR(inode);
309 } else {
310 inode = dn->inode;
311 }
286 312
287 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
288 le16_to_cpu(sum.ofs_in_node); 314 le16_to_cpu(sum.ofs_in_node);
289 315
290 truncate_hole(inode, bidx, bidx + 1); 316 if (ino != dn->inode->i_ino) {
291 iput(inode); 317 truncate_hole(inode, bidx, bidx + 1);
318 iput(inode);
319 } else {
320 struct dnode_of_data tdn;
321 set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
322 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
323 return 0;
324 if (tdn.data_blkaddr != NULL_ADDR)
325 truncate_data_blocks_range(&tdn, 1);
326 f2fs_put_page(tdn.node_page, 1);
327 }
292 return 0; 328 return 0;
293} 329}
294 330
@@ -331,8 +367,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
331 f2fs_wait_on_page_writeback(dn.node_page, NODE); 367 f2fs_wait_on_page_writeback(dn.node_page, NODE);
332 368
333 get_node_info(sbi, dn.nid, &ni); 369 get_node_info(sbi, dn.nid, &ni);
334 f2fs_bug_on(ni.ino != ino_of_node(page)); 370 f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
335 f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); 371 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
336 372
337 for (; start < end; start++) { 373 for (; start < end; start++) {
338 block_t src, dest; 374 block_t src, dest;
@@ -344,7 +380,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
344 if (src == NULL_ADDR) { 380 if (src == NULL_ADDR) {
345 err = reserve_new_block(&dn); 381 err = reserve_new_block(&dn);
346 /* We should not get -ENOSPC */ 382 /* We should not get -ENOSPC */
347 f2fs_bug_on(err); 383 f2fs_bug_on(sbi, err);
348 } 384 }
349 385
350 /* Check the previous node page having this index */ 386 /* Check the previous node page having this index */
@@ -386,7 +422,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
386{ 422{
387 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 423 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
388 struct curseg_info *curseg; 424 struct curseg_info *curseg;
389 struct page *page; 425 struct page *page = NULL;
390 int err = 0; 426 int err = 0;
391 block_t blkaddr; 427 block_t blkaddr;
392 428
@@ -394,32 +430,41 @@ static int recover_data(struct f2fs_sb_info *sbi,
394 curseg = CURSEG_I(sbi, type); 430 curseg = CURSEG_I(sbi, type);
395 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 431 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
396 432
397 /* read node page */
398 page = alloc_page(GFP_F2FS_ZERO);
399 if (!page)
400 return -ENOMEM;
401
402 lock_page(page);
403
404 while (1) { 433 while (1) {
405 struct fsync_inode_entry *entry; 434 struct fsync_inode_entry *entry;
406 435
407 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); 436 if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
408 if (err) 437 break;
409 return err;
410 438
411 lock_page(page); 439 page = get_meta_page_ra(sbi, blkaddr);
412 440
413 if (cp_ver != cpver_of_node(page)) 441 if (cp_ver != cpver_of_node(page)) {
442 f2fs_put_page(page, 1);
414 break; 443 break;
444 }
415 445
416 entry = get_fsync_inode(head, ino_of_node(page)); 446 entry = get_fsync_inode(head, ino_of_node(page));
417 if (!entry) 447 if (!entry)
418 goto next; 448 goto next;
419 449 /*
450 * inode(x) | CP | inode(x) | dnode(F)
451 * In this case, we can lose the latest inode(x).
452 * So, call recover_inode for the inode update.
453 */
454 if (entry->last_inode == blkaddr)
455 recover_inode(entry->inode, page);
456 if (entry->last_dentry == blkaddr) {
457 err = recover_dentry(entry->inode, page);
458 if (err) {
459 f2fs_put_page(page, 1);
460 break;
461 }
462 }
420 err = do_recover_data(sbi, entry->inode, page, blkaddr); 463 err = do_recover_data(sbi, entry->inode, page, blkaddr);
421 if (err) 464 if (err) {
465 f2fs_put_page(page, 1);
422 break; 466 break;
467 }
423 468
424 if (entry->blkaddr == blkaddr) { 469 if (entry->blkaddr == blkaddr) {
425 iput(entry->inode); 470 iput(entry->inode);
@@ -429,11 +474,8 @@ static int recover_data(struct f2fs_sb_info *sbi,
429next: 474next:
430 /* check next segment */ 475 /* check next segment */
431 blkaddr = next_blkaddr_of_node(page); 476 blkaddr = next_blkaddr_of_node(page);
477 f2fs_put_page(page, 1);
432 } 478 }
433
434 unlock_page(page);
435 __free_pages(page, 0);
436
437 if (!err) 479 if (!err)
438 allocate_new_segments(sbi); 480 allocate_new_segments(sbi);
439 return err; 481 return err;
@@ -474,11 +516,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
474 /* step #2: recover data */ 516 /* step #2: recover data */
475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 517 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
476 if (!err) 518 if (!err)
477 f2fs_bug_on(!list_empty(&inode_list)); 519 f2fs_bug_on(sbi, !list_empty(&inode_list));
478out: 520out:
479 destroy_fsync_dnodes(&inode_list); 521 destroy_fsync_dnodes(&inode_list);
480 kmem_cache_destroy(fsync_entry_slab); 522 kmem_cache_destroy(fsync_entry_slab);
481 523
524 /* truncate meta pages to be used by the recovery */
525 truncate_inode_pages_range(META_MAPPING(sbi),
526 MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
527
482 if (err) { 528 if (err) {
483 truncate_inode_pages_final(NODE_MAPPING(sbi)); 529 truncate_inode_pages_final(NODE_MAPPING(sbi));
484 truncate_inode_pages_final(META_MAPPING(sbi)); 530 truncate_inode_pages_final(META_MAPPING(sbi));
@@ -494,8 +540,11 @@ out:
494 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 540 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
495 mutex_unlock(&sbi->cp_mutex); 541 mutex_unlock(&sbi->cp_mutex);
496 } else if (need_writecp) { 542 } else if (need_writecp) {
543 struct cp_control cpc = {
544 .reason = CP_SYNC,
545 };
497 mutex_unlock(&sbi->cp_mutex); 546 mutex_unlock(&sbi->cp_mutex);
498 write_checkpoint(sbi, false); 547 write_checkpoint(sbi, &cpc);
499 } else { 548 } else {
500 mutex_unlock(&sbi->cp_mutex); 549 mutex_unlock(&sbi->cp_mutex);
501 } 550 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0aa337cd5bba..923cb76fdc46 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -25,6 +25,8 @@
25#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
26 26
27static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *sit_entry_set_slab;
29static struct kmem_cache *inmem_entry_slab;
28 30
29/* 31/*
30 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 32 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -172,6 +174,60 @@ found_middle:
172 return result + __reverse_ffz(tmp); 174 return result + __reverse_ffz(tmp);
173} 175}
174 176
177void register_inmem_page(struct inode *inode, struct page *page)
178{
179 struct f2fs_inode_info *fi = F2FS_I(inode);
180 struct inmem_pages *new;
181
182 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
183
184 /* add atomic page indices to the list */
185 new->page = page;
186 INIT_LIST_HEAD(&new->list);
187
188 /* increase reference count with clean state */
189 mutex_lock(&fi->inmem_lock);
190 get_page(page);
191 list_add_tail(&new->list, &fi->inmem_pages);
192 mutex_unlock(&fi->inmem_lock);
193}
194
195void commit_inmem_pages(struct inode *inode, bool abort)
196{
197 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
198 struct f2fs_inode_info *fi = F2FS_I(inode);
199 struct inmem_pages *cur, *tmp;
200 bool submit_bio = false;
201 struct f2fs_io_info fio = {
202 .type = DATA,
203 .rw = WRITE_SYNC,
204 };
205
206 f2fs_balance_fs(sbi);
207 f2fs_lock_op(sbi);
208
209 mutex_lock(&fi->inmem_lock);
210 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
211 lock_page(cur->page);
212 if (!abort && cur->page->mapping == inode->i_mapping) {
213 f2fs_wait_on_page_writeback(cur->page, DATA);
214 if (clear_page_dirty_for_io(cur->page))
215 inode_dec_dirty_pages(inode);
216 do_write_data_page(cur->page, &fio);
217 submit_bio = true;
218 }
219 f2fs_put_page(cur->page, 1);
220 list_del(&cur->list);
221 kmem_cache_free(inmem_entry_slab, cur);
222 }
223 if (submit_bio)
224 f2fs_submit_merged_bio(sbi, DATA, WRITE);
225 mutex_unlock(&fi->inmem_lock);
226
227 filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
228 f2fs_unlock_op(sbi);
229}
230
175/* 231/*
176 * This function balances dirty node and dentry pages. 232 * This function balances dirty node and dentry pages.
177 * In addition, it controls garbage collection. 233 * In addition, it controls garbage collection.
@@ -205,24 +261,20 @@ repeat:
205 if (kthread_should_stop()) 261 if (kthread_should_stop())
206 return 0; 262 return 0;
207 263
208 spin_lock(&fcc->issue_lock); 264 if (!llist_empty(&fcc->issue_list)) {
209 if (fcc->issue_list) {
210 fcc->dispatch_list = fcc->issue_list;
211 fcc->issue_list = fcc->issue_tail = NULL;
212 }
213 spin_unlock(&fcc->issue_lock);
214
215 if (fcc->dispatch_list) {
216 struct bio *bio = bio_alloc(GFP_NOIO, 0); 265 struct bio *bio = bio_alloc(GFP_NOIO, 0);
217 struct flush_cmd *cmd, *next; 266 struct flush_cmd *cmd, *next;
218 int ret; 267 int ret;
219 268
269 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
270 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
271
220 bio->bi_bdev = sbi->sb->s_bdev; 272 bio->bi_bdev = sbi->sb->s_bdev;
221 ret = submit_bio_wait(WRITE_FLUSH, bio); 273 ret = submit_bio_wait(WRITE_FLUSH, bio);
222 274
223 for (cmd = fcc->dispatch_list; cmd; cmd = next) { 275 llist_for_each_entry_safe(cmd, next,
276 fcc->dispatch_list, llnode) {
224 cmd->ret = ret; 277 cmd->ret = ret;
225 next = cmd->next;
226 complete(&cmd->wait); 278 complete(&cmd->wait);
227 } 279 }
228 bio_put(bio); 280 bio_put(bio);
@@ -230,7 +282,7 @@ repeat:
230 } 282 }
231 283
232 wait_event_interruptible(*q, 284 wait_event_interruptible(*q,
233 kthread_should_stop() || fcc->issue_list); 285 kthread_should_stop() || !llist_empty(&fcc->issue_list));
234 goto repeat; 286 goto repeat;
235} 287}
236 288
@@ -249,15 +301,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 301 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
250 302
251 init_completion(&cmd.wait); 303 init_completion(&cmd.wait);
252 cmd.next = NULL;
253 304
254 spin_lock(&fcc->issue_lock); 305 llist_add(&cmd.llnode, &fcc->issue_list);
255 if (fcc->issue_list)
256 fcc->issue_tail->next = &cmd;
257 else
258 fcc->issue_list = &cmd;
259 fcc->issue_tail = &cmd;
260 spin_unlock(&fcc->issue_lock);
261 306
262 if (!fcc->dispatch_list) 307 if (!fcc->dispatch_list)
263 wake_up(&fcc->flush_wait_queue); 308 wake_up(&fcc->flush_wait_queue);
@@ -276,8 +321,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
276 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 321 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
277 if (!fcc) 322 if (!fcc)
278 return -ENOMEM; 323 return -ENOMEM;
279 spin_lock_init(&fcc->issue_lock);
280 init_waitqueue_head(&fcc->flush_wait_queue); 324 init_waitqueue_head(&fcc->flush_wait_queue);
325 init_llist_head(&fcc->issue_list);
281 SM_I(sbi)->cmd_control_info = fcc; 326 SM_I(sbi)->cmd_control_info = fcc;
282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 327 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 328 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
@@ -317,6 +362,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
317 struct seg_entry *sentry = get_seg_entry(sbi, segno); 362 struct seg_entry *sentry = get_seg_entry(sbi, segno);
318 enum dirty_type t = sentry->type; 363 enum dirty_type t = sentry->type;
319 364
365 if (unlikely(t >= DIRTY)) {
366 f2fs_bug_on(sbi, 1);
367 return;
368 }
320 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) 369 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
321 dirty_i->nr_dirty[t]++; 370 dirty_i->nr_dirty[t]++;
322 } 371 }
@@ -376,8 +425,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
376static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 425static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
377 block_t blkstart, block_t blklen) 426 block_t blkstart, block_t blklen)
378{ 427{
379 sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); 428 sector_t start = SECTOR_FROM_BLOCK(blkstart);
380 sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); 429 sector_t len = SECTOR_FROM_BLOCK(blklen);
381 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 430 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 431 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
383} 432}
@@ -392,22 +441,48 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
392 } 441 }
393} 442}
394 443
395static void add_discard_addrs(struct f2fs_sb_info *sbi, 444static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
396 unsigned int segno, struct seg_entry *se)
397{ 445{
398 struct list_head *head = &SM_I(sbi)->discard_list; 446 struct list_head *head = &SM_I(sbi)->discard_list;
399 struct discard_entry *new; 447 struct discard_entry *new;
400 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 448 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
401 int max_blocks = sbi->blocks_per_seg; 449 int max_blocks = sbi->blocks_per_seg;
450 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
402 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 451 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
403 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 452 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
404 unsigned long dmap[entries]; 453 unsigned long dmap[entries];
405 unsigned int start = 0, end = -1; 454 unsigned int start = 0, end = -1;
455 bool force = (cpc->reason == CP_DISCARD);
406 int i; 456 int i;
407 457
408 if (!test_opt(sbi, DISCARD)) 458 if (!force && !test_opt(sbi, DISCARD))
409 return; 459 return;
410 460
461 if (force && !se->valid_blocks) {
462 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
463 /*
464 * if this segment is registered in the prefree list, then
465 * we should skip adding a discard candidate, and let the
466 * checkpoint do that later.
467 */
468 mutex_lock(&dirty_i->seglist_lock);
469 if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
470 mutex_unlock(&dirty_i->seglist_lock);
471 cpc->trimmed += sbi->blocks_per_seg;
472 return;
473 }
474 mutex_unlock(&dirty_i->seglist_lock);
475
476 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
477 INIT_LIST_HEAD(&new->list);
478 new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
479 new->len = sbi->blocks_per_seg;
480 list_add_tail(&new->list, head);
481 SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
482 cpc->trimmed += sbi->blocks_per_seg;
483 return;
484 }
485
411 /* zero block will be discarded through the prefree list */ 486 /* zero block will be discarded through the prefree list */
412 if (!se->valid_blocks || se->valid_blocks == max_blocks) 487 if (!se->valid_blocks || se->valid_blocks == max_blocks)
413 return; 488 return;
@@ -416,23 +491,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
416 for (i = 0; i < entries; i++) 491 for (i = 0; i < entries; i++)
417 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 492 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
418 493
419 while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 494 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
420 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 495 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
421 if (start >= max_blocks) 496 if (start >= max_blocks)
422 break; 497 break;
423 498
424 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 499 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
425 500
501 if (end - start < cpc->trim_minlen)
502 continue;
503
426 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); 504 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
427 INIT_LIST_HEAD(&new->list); 505 INIT_LIST_HEAD(&new->list);
428 new->blkaddr = START_BLOCK(sbi, segno) + start; 506 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
429 new->len = end - start; 507 new->len = end - start;
508 cpc->trimmed += end - start;
430 509
431 list_add_tail(&new->list, head); 510 list_add_tail(&new->list, head);
432 SM_I(sbi)->nr_discards += end - start; 511 SM_I(sbi)->nr_discards += end - start;
433 } 512 }
434} 513}
435 514
515void release_discard_addrs(struct f2fs_sb_info *sbi)
516{
517 struct list_head *head = &(SM_I(sbi)->discard_list);
518 struct discard_entry *entry, *this;
519
520 /* drop caches */
521 list_for_each_entry_safe(entry, this, head, list) {
522 list_del(&entry->list);
523 kmem_cache_free(discard_entry_slab, entry);
524 }
525}
526
436/* 527/*
437 * Should call clear_prefree_segments after checkpoint is done. 528 * Should call clear_prefree_segments after checkpoint is done.
438 */ 529 */
@@ -440,10 +531,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
440{ 531{
441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 532 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
442 unsigned int segno; 533 unsigned int segno;
443 unsigned int total_segs = TOTAL_SEGS(sbi);
444 534
445 mutex_lock(&dirty_i->seglist_lock); 535 mutex_lock(&dirty_i->seglist_lock);
446 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) 536 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
447 __set_test_and_free(sbi, segno); 537 __set_test_and_free(sbi, segno);
448 mutex_unlock(&dirty_i->seglist_lock); 538 mutex_unlock(&dirty_i->seglist_lock);
449} 539}
@@ -454,17 +544,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
454 struct discard_entry *entry, *this; 544 struct discard_entry *entry, *this;
455 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 545 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
456 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 546 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
457 unsigned int total_segs = TOTAL_SEGS(sbi);
458 unsigned int start = 0, end = -1; 547 unsigned int start = 0, end = -1;
459 548
460 mutex_lock(&dirty_i->seglist_lock); 549 mutex_lock(&dirty_i->seglist_lock);
461 550
462 while (1) { 551 while (1) {
463 int i; 552 int i;
464 start = find_next_bit(prefree_map, total_segs, end + 1); 553 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
465 if (start >= total_segs) 554 if (start >= MAIN_SEGS(sbi))
466 break; 555 break;
467 end = find_next_zero_bit(prefree_map, total_segs, start + 1); 556 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
557 start + 1);
468 558
469 for (i = start; i < end; i++) 559 for (i = start; i < end; i++)
470 clear_bit(i, prefree_map); 560 clear_bit(i, prefree_map);
@@ -488,11 +578,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
488 } 578 }
489} 579}
490 580
491static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 581static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
492{ 582{
493 struct sit_info *sit_i = SIT_I(sbi); 583 struct sit_info *sit_i = SIT_I(sbi);
494 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) 584
585 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
495 sit_i->dirty_sentries++; 586 sit_i->dirty_sentries++;
587 return false;
588 }
589
590 return true;
496} 591}
497 592
498static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 593static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
@@ -516,7 +611,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
516 new_vblocks = se->valid_blocks + del; 611 new_vblocks = se->valid_blocks + del;
517 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 612 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
518 613
519 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 614 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
520 (new_vblocks > sbi->blocks_per_seg))); 615 (new_vblocks > sbi->blocks_per_seg)));
521 616
522 se->valid_blocks = new_vblocks; 617 se->valid_blocks = new_vblocks;
@@ -526,10 +621,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
526 /* Update valid block bitmap */ 621 /* Update valid block bitmap */
527 if (del > 0) { 622 if (del > 0) {
528 if (f2fs_set_bit(offset, se->cur_valid_map)) 623 if (f2fs_set_bit(offset, se->cur_valid_map))
529 BUG(); 624 f2fs_bug_on(sbi, 1);
530 } else { 625 } else {
531 if (!f2fs_clear_bit(offset, se->cur_valid_map)) 626 if (!f2fs_clear_bit(offset, se->cur_valid_map))
532 BUG(); 627 f2fs_bug_on(sbi, 1);
533 } 628 }
534 if (!f2fs_test_bit(offset, se->ckpt_valid_map)) 629 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
535 se->ckpt_valid_blocks += del; 630 se->ckpt_valid_blocks += del;
@@ -558,7 +653,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
558 unsigned int segno = GET_SEGNO(sbi, addr); 653 unsigned int segno = GET_SEGNO(sbi, addr);
559 struct sit_info *sit_i = SIT_I(sbi); 654 struct sit_info *sit_i = SIT_I(sbi);
560 655
561 f2fs_bug_on(addr == NULL_ADDR); 656 f2fs_bug_on(sbi, addr == NULL_ADDR);
562 if (addr == NEW_ADDR) 657 if (addr == NEW_ADDR)
563 return; 658 return;
564 659
@@ -634,7 +729,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
634 unsigned int segno = curseg->segno + 1; 729 unsigned int segno = curseg->segno + 1;
635 struct free_segmap_info *free_i = FREE_I(sbi); 730 struct free_segmap_info *free_i = FREE_I(sbi);
636 731
637 if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) 732 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
638 return !test_bit(segno, free_i->free_segmap); 733 return !test_bit(segno, free_i->free_segmap);
639 return 0; 734 return 0;
640} 735}
@@ -648,7 +743,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
648{ 743{
649 struct free_segmap_info *free_i = FREE_I(sbi); 744 struct free_segmap_info *free_i = FREE_I(sbi);
650 unsigned int segno, secno, zoneno; 745 unsigned int segno, secno, zoneno;
651 unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; 746 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
652 unsigned int hint = *newseg / sbi->segs_per_sec; 747 unsigned int hint = *newseg / sbi->segs_per_sec;
653 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 748 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
654 unsigned int left_start = hint; 749 unsigned int left_start = hint;
@@ -660,18 +755,18 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
660 755
661 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 756 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
662 segno = find_next_zero_bit(free_i->free_segmap, 757 segno = find_next_zero_bit(free_i->free_segmap,
663 TOTAL_SEGS(sbi), *newseg + 1); 758 MAIN_SEGS(sbi), *newseg + 1);
664 if (segno - *newseg < sbi->segs_per_sec - 759 if (segno - *newseg < sbi->segs_per_sec -
665 (*newseg % sbi->segs_per_sec)) 760 (*newseg % sbi->segs_per_sec))
666 goto got_it; 761 goto got_it;
667 } 762 }
668find_other_zone: 763find_other_zone:
669 secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); 764 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
670 if (secno >= TOTAL_SECS(sbi)) { 765 if (secno >= MAIN_SECS(sbi)) {
671 if (dir == ALLOC_RIGHT) { 766 if (dir == ALLOC_RIGHT) {
672 secno = find_next_zero_bit(free_i->free_secmap, 767 secno = find_next_zero_bit(free_i->free_secmap,
673 TOTAL_SECS(sbi), 0); 768 MAIN_SECS(sbi), 0);
674 f2fs_bug_on(secno >= TOTAL_SECS(sbi)); 769 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
675 } else { 770 } else {
676 go_left = 1; 771 go_left = 1;
677 left_start = hint - 1; 772 left_start = hint - 1;
@@ -686,8 +781,8 @@ find_other_zone:
686 continue; 781 continue;
687 } 782 }
688 left_start = find_next_zero_bit(free_i->free_secmap, 783 left_start = find_next_zero_bit(free_i->free_secmap,
689 TOTAL_SECS(sbi), 0); 784 MAIN_SECS(sbi), 0);
690 f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); 785 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
691 break; 786 break;
692 } 787 }
693 secno = left_start; 788 secno = left_start;
@@ -726,7 +821,7 @@ skip_left:
726 } 821 }
727got_it: 822got_it:
728 /* set it as dirty segment in free segmap */ 823 /* set it as dirty segment in free segmap */
729 f2fs_bug_on(test_bit(segno, free_i->free_segmap)); 824 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
730 __set_inuse(sbi, segno); 825 __set_inuse(sbi, segno);
731 *newseg = segno; 826 *newseg = segno;
732 write_unlock(&free_i->segmap_lock); 827 write_unlock(&free_i->segmap_lock);
@@ -898,6 +993,37 @@ static const struct segment_allocation default_salloc_ops = {
898 .allocate_segment = allocate_segment_by_default, 993 .allocate_segment = allocate_segment_by_default,
899}; 994};
900 995
996int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
997{
998 __u64 start = range->start >> sbi->log_blocksize;
999 __u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1000 unsigned int start_segno, end_segno;
1001 struct cp_control cpc;
1002
1003 if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1004 range->len < sbi->blocksize)
1005 return -EINVAL;
1006
1007 if (end <= MAIN_BLKADDR(sbi))
1008 goto out;
1009
1010 /* start/end segment number in main_area */
1011 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1012 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1013 GET_SEGNO(sbi, end);
1014 cpc.reason = CP_DISCARD;
1015 cpc.trim_start = start_segno;
1016 cpc.trim_end = end_segno;
1017 cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1018 cpc.trimmed = 0;
1019
1020 /* do checkpoint to issue discard commands safely */
1021 write_checkpoint(sbi, &cpc);
1022out:
1023 range->len = cpc.trimmed << sbi->log_blocksize;
1024 return 0;
1025}
1026
901static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 1027static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
902{ 1028{
903 struct curseg_info *curseg = CURSEG_I(sbi, type); 1029 struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -953,15 +1079,15 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type)
953 1079
954static int __get_segment_type(struct page *page, enum page_type p_type) 1080static int __get_segment_type(struct page *page, enum page_type p_type)
955{ 1081{
956 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1082 switch (F2FS_P_SB(page)->active_logs) {
957 switch (sbi->active_logs) {
958 case 2: 1083 case 2:
959 return __get_segment_type_2(page, p_type); 1084 return __get_segment_type_2(page, p_type);
960 case 4: 1085 case 4:
961 return __get_segment_type_4(page, p_type); 1086 return __get_segment_type_4(page, p_type);
962 } 1087 }
963 /* NR_CURSEG_TYPE(6) logs by default */ 1088 /* NR_CURSEG_TYPE(6) logs by default */
964 f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); 1089 f2fs_bug_on(F2FS_P_SB(page),
1090 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
965 return __get_segment_type_6(page, p_type); 1091 return __get_segment_type_6(page, p_type);
966} 1092}
967 1093
@@ -1041,11 +1167,11 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1041void write_data_page(struct page *page, struct dnode_of_data *dn, 1167void write_data_page(struct page *page, struct dnode_of_data *dn,
1042 block_t *new_blkaddr, struct f2fs_io_info *fio) 1168 block_t *new_blkaddr, struct f2fs_io_info *fio)
1043{ 1169{
1044 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 1170 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1045 struct f2fs_summary sum; 1171 struct f2fs_summary sum;
1046 struct node_info ni; 1172 struct node_info ni;
1047 1173
1048 f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); 1174 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1049 get_node_info(sbi, dn->nid, &ni); 1175 get_node_info(sbi, dn->nid, &ni);
1050 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1176 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1051 1177
@@ -1055,9 +1181,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn,
1055void rewrite_data_page(struct page *page, block_t old_blkaddr, 1181void rewrite_data_page(struct page *page, block_t old_blkaddr,
1056 struct f2fs_io_info *fio) 1182 struct f2fs_io_info *fio)
1057{ 1183{
1058 struct inode *inode = page->mapping->host; 1184 f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1059 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1060 f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
1061} 1185}
1062 1186
1063void recover_data_page(struct f2fs_sb_info *sbi, 1187void recover_data_page(struct f2fs_sb_info *sbi,
@@ -1130,8 +1254,9 @@ out:
1130void f2fs_wait_on_page_writeback(struct page *page, 1254void f2fs_wait_on_page_writeback(struct page *page,
1131 enum page_type type) 1255 enum page_type type)
1132{ 1256{
1133 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1134 if (PageWriteback(page)) { 1257 if (PageWriteback(page)) {
1258 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1259
1135 if (is_merged_page(sbi, page, type)) 1260 if (is_merged_page(sbi, page, type))
1136 f2fs_submit_merged_bio(sbi, type, WRITE); 1261 f2fs_submit_merged_bio(sbi, type, WRITE);
1137 wait_on_page_writeback(page); 1262 wait_on_page_writeback(page);
@@ -1400,7 +1525,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1400 unsigned int segno) 1525 unsigned int segno)
1401{ 1526{
1402 struct sit_info *sit_i = SIT_I(sbi); 1527 struct sit_info *sit_i = SIT_I(sbi);
1403 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); 1528 unsigned int offset = SIT_BLOCK_OFFSET(segno);
1404 block_t blk_addr = sit_i->sit_base_addr + offset; 1529 block_t blk_addr = sit_i->sit_base_addr + offset;
1405 1530
1406 check_seg_range(sbi, segno); 1531 check_seg_range(sbi, segno);
@@ -1426,7 +1551,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1426 /* get current sit block page without lock */ 1551 /* get current sit block page without lock */
1427 src_page = get_meta_page(sbi, src_off); 1552 src_page = get_meta_page(sbi, src_off);
1428 dst_page = grab_meta_page(sbi, dst_off); 1553 dst_page = grab_meta_page(sbi, dst_off);
1429 f2fs_bug_on(PageDirty(src_page)); 1554 f2fs_bug_on(sbi, PageDirty(src_page));
1430 1555
1431 src_addr = page_address(src_page); 1556 src_addr = page_address(src_page);
1432 dst_addr = page_address(dst_page); 1557 dst_addr = page_address(dst_page);
@@ -1440,101 +1565,192 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1440 return dst_page; 1565 return dst_page;
1441} 1566}
1442 1567
1443static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) 1568static struct sit_entry_set *grab_sit_entry_set(void)
1569{
1570 struct sit_entry_set *ses =
1571 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1572
1573 ses->entry_cnt = 0;
1574 INIT_LIST_HEAD(&ses->set_list);
1575 return ses;
1576}
1577
1578static void release_sit_entry_set(struct sit_entry_set *ses)
1579{
1580 list_del(&ses->set_list);
1581 kmem_cache_free(sit_entry_set_slab, ses);
1582}
1583
1584static void adjust_sit_entry_set(struct sit_entry_set *ses,
1585 struct list_head *head)
1586{
1587 struct sit_entry_set *next = ses;
1588
1589 if (list_is_last(&ses->set_list, head))
1590 return;
1591
1592 list_for_each_entry_continue(next, head, set_list)
1593 if (ses->entry_cnt <= next->entry_cnt)
1594 break;
1595
1596 list_move_tail(&ses->set_list, &next->set_list);
1597}
1598
1599static void add_sit_entry(unsigned int segno, struct list_head *head)
1600{
1601 struct sit_entry_set *ses;
1602 unsigned int start_segno = START_SEGNO(segno);
1603
1604 list_for_each_entry(ses, head, set_list) {
1605 if (ses->start_segno == start_segno) {
1606 ses->entry_cnt++;
1607 adjust_sit_entry_set(ses, head);
1608 return;
1609 }
1610 }
1611
1612 ses = grab_sit_entry_set();
1613
1614 ses->start_segno = start_segno;
1615 ses->entry_cnt++;
1616 list_add(&ses->set_list, head);
1617}
1618
1619static void add_sits_in_set(struct f2fs_sb_info *sbi)
1620{
1621 struct f2fs_sm_info *sm_info = SM_I(sbi);
1622 struct list_head *set_list = &sm_info->sit_entry_set;
1623 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1624 unsigned int segno;
1625
1626 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1627 add_sit_entry(segno, set_list);
1628}
1629
1630static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1444{ 1631{
1445 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1632 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1446 struct f2fs_summary_block *sum = curseg->sum_blk; 1633 struct f2fs_summary_block *sum = curseg->sum_blk;
1447 int i; 1634 int i;
1448 1635
1449 /* 1636 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1450 * If the journal area in the current summary is full of sit entries, 1637 unsigned int segno;
1451 * all the sit entries will be flushed. Otherwise the sit entries 1638 bool dirtied;
1452 * are not able to replace with newly hot sit entries. 1639
1453 */ 1640 segno = le32_to_cpu(segno_in_journal(sum, i));
1454 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { 1641 dirtied = __mark_sit_entry_dirty(sbi, segno);
1455 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1642
1456 unsigned int segno; 1643 if (!dirtied)
1457 segno = le32_to_cpu(segno_in_journal(sum, i)); 1644 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1458 __mark_sit_entry_dirty(sbi, segno);
1459 }
1460 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1461 return true;
1462 } 1645 }
1463 return false; 1646 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1464} 1647}
1465 1648
1466/* 1649/*
1467 * CP calls this function, which flushes SIT entries including sit_journal, 1650 * CP calls this function, which flushes SIT entries including sit_journal,
1468 * and moves prefree segs to free segs. 1651 * and moves prefree segs to free segs.
1469 */ 1652 */
1470void flush_sit_entries(struct f2fs_sb_info *sbi) 1653void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1471{ 1654{
1472 struct sit_info *sit_i = SIT_I(sbi); 1655 struct sit_info *sit_i = SIT_I(sbi);
1473 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 1656 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1474 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1657 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1475 struct f2fs_summary_block *sum = curseg->sum_blk; 1658 struct f2fs_summary_block *sum = curseg->sum_blk;
1476 unsigned long nsegs = TOTAL_SEGS(sbi); 1659 struct sit_entry_set *ses, *tmp;
1477 struct page *page = NULL; 1660 struct list_head *head = &SM_I(sbi)->sit_entry_set;
1478 struct f2fs_sit_block *raw_sit = NULL; 1661 bool to_journal = true;
1479 unsigned int start = 0, end = 0; 1662 struct seg_entry *se;
1480 unsigned int segno;
1481 bool flushed;
1482 1663
1483 mutex_lock(&curseg->curseg_mutex); 1664 mutex_lock(&curseg->curseg_mutex);
1484 mutex_lock(&sit_i->sentry_lock); 1665 mutex_lock(&sit_i->sentry_lock);
1485 1666
1486 /* 1667 /*
1487 * "flushed" indicates whether sit entries in journal are flushed 1668 * add and account sit entries of dirty bitmap in sit entry
1488 * to the SIT area or not. 1669 * set temporarily
1489 */ 1670 */
1490 flushed = flush_sits_in_journal(sbi); 1671 add_sits_in_set(sbi);
1491 1672
1492 for_each_set_bit(segno, bitmap, nsegs) { 1673 /*
1493 struct seg_entry *se = get_seg_entry(sbi, segno); 1674 * if there are no enough space in journal to store dirty sit
1494 int sit_offset, offset; 1675 * entries, remove all entries from journal and add and account
1676 * them in sit entry set.
1677 */
1678 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1679 remove_sits_in_journal(sbi);
1495 1680
1496 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1681 if (!sit_i->dirty_sentries)
1682 goto out;
1497 1683
1498 /* add discard candidates */ 1684 /*
1499 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) 1685 * there are two steps to flush sit entries:
1500 add_discard_addrs(sbi, segno, se); 1686 * #1, flush sit entries to journal in current cold data summary block.
1687 * #2, flush sit entries to sit page.
1688 */
1689 list_for_each_entry_safe(ses, tmp, head, set_list) {
1690 struct page *page;
1691 struct f2fs_sit_block *raw_sit = NULL;
1692 unsigned int start_segno = ses->start_segno;
1693 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1694 (unsigned long)MAIN_SEGS(sbi));
1695 unsigned int segno = start_segno;
1696
1697 if (to_journal &&
1698 !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1699 to_journal = false;
1700
1701 if (!to_journal) {
1702 page = get_next_sit_page(sbi, start_segno);
1703 raw_sit = page_address(page);
1704 }
1501 1705
1502 if (flushed) 1706 /* flush dirty sit entries in region of current sit set */
1503 goto to_sit_page; 1707 for_each_set_bit_from(segno, bitmap, end) {
1708 int offset, sit_offset;
1504 1709
1505 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); 1710 se = get_seg_entry(sbi, segno);
1506 if (offset >= 0) { 1711
1507 segno_in_journal(sum, offset) = cpu_to_le32(segno); 1712 /* add discard candidates */
1508 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); 1713 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1509 goto flush_done; 1714 cpc->trim_start = segno;
1510 } 1715 add_discard_addrs(sbi, cpc);
1511to_sit_page:
1512 if (!page || (start > segno) || (segno > end)) {
1513 if (page) {
1514 f2fs_put_page(page, 1);
1515 page = NULL;
1516 } 1716 }
1517 1717
1518 start = START_SEGNO(sit_i, segno); 1718 if (to_journal) {
1519 end = start + SIT_ENTRY_PER_BLOCK - 1; 1719 offset = lookup_journal_in_cursum(sum,
1720 SIT_JOURNAL, segno, 1);
1721 f2fs_bug_on(sbi, offset < 0);
1722 segno_in_journal(sum, offset) =
1723 cpu_to_le32(segno);
1724 seg_info_to_raw_sit(se,
1725 &sit_in_journal(sum, offset));
1726 } else {
1727 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1728 seg_info_to_raw_sit(se,
1729 &raw_sit->entries[sit_offset]);
1730 }
1520 1731
1521 /* read sit block that will be updated */ 1732 __clear_bit(segno, bitmap);
1522 page = get_next_sit_page(sbi, start); 1733 sit_i->dirty_sentries--;
1523 raw_sit = page_address(page); 1734 ses->entry_cnt--;
1524 } 1735 }
1525 1736
1526 /* udpate entry in SIT block */ 1737 if (!to_journal)
1527 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); 1738 f2fs_put_page(page, 1);
1528flush_done: 1739
1529 __clear_bit(segno, bitmap); 1740 f2fs_bug_on(sbi, ses->entry_cnt);
1530 sit_i->dirty_sentries--; 1741 release_sit_entry_set(ses);
1742 }
1743
1744 f2fs_bug_on(sbi, !list_empty(head));
1745 f2fs_bug_on(sbi, sit_i->dirty_sentries);
1746out:
1747 if (cpc->reason == CP_DISCARD) {
1748 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1749 add_discard_addrs(sbi, cpc);
1531 } 1750 }
1532 mutex_unlock(&sit_i->sentry_lock); 1751 mutex_unlock(&sit_i->sentry_lock);
1533 mutex_unlock(&curseg->curseg_mutex); 1752 mutex_unlock(&curseg->curseg_mutex);
1534 1753
1535 /* writeout last modified SIT block */
1536 f2fs_put_page(page, 1);
1537
1538 set_prefree_as_free_segments(sbi); 1754 set_prefree_as_free_segments(sbi);
1539} 1755}
1540 1756
@@ -1554,16 +1770,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1554 1770
1555 SM_I(sbi)->sit_info = sit_i; 1771 SM_I(sbi)->sit_info = sit_i;
1556 1772
1557 sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); 1773 sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1558 if (!sit_i->sentries) 1774 if (!sit_i->sentries)
1559 return -ENOMEM; 1775 return -ENOMEM;
1560 1776
1561 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1777 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1562 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1778 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1563 if (!sit_i->dirty_sentries_bitmap) 1779 if (!sit_i->dirty_sentries_bitmap)
1564 return -ENOMEM; 1780 return -ENOMEM;
1565 1781
1566 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1782 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1567 sit_i->sentries[start].cur_valid_map 1783 sit_i->sentries[start].cur_valid_map
1568 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 1784 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1569 sit_i->sentries[start].ckpt_valid_map 1785 sit_i->sentries[start].ckpt_valid_map
@@ -1574,7 +1790,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1574 } 1790 }
1575 1791
1576 if (sbi->segs_per_sec > 1) { 1792 if (sbi->segs_per_sec > 1) {
1577 sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * 1793 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1578 sizeof(struct sec_entry)); 1794 sizeof(struct sec_entry));
1579 if (!sit_i->sec_entries) 1795 if (!sit_i->sec_entries)
1580 return -ENOMEM; 1796 return -ENOMEM;
@@ -1609,7 +1825,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1609 1825
1610static int build_free_segmap(struct f2fs_sb_info *sbi) 1826static int build_free_segmap(struct f2fs_sb_info *sbi)
1611{ 1827{
1612 struct f2fs_sm_info *sm_info = SM_I(sbi);
1613 struct free_segmap_info *free_i; 1828 struct free_segmap_info *free_i;
1614 unsigned int bitmap_size, sec_bitmap_size; 1829 unsigned int bitmap_size, sec_bitmap_size;
1615 1830
@@ -1620,12 +1835,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1620 1835
1621 SM_I(sbi)->free_info = free_i; 1836 SM_I(sbi)->free_info = free_i;
1622 1837
1623 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1838 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1624 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); 1839 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1625 if (!free_i->free_segmap) 1840 if (!free_i->free_segmap)
1626 return -ENOMEM; 1841 return -ENOMEM;
1627 1842
1628 sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1843 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1629 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); 1844 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1630 if (!free_i->free_secmap) 1845 if (!free_i->free_secmap)
1631 return -ENOMEM; 1846 return -ENOMEM;
@@ -1635,8 +1850,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1635 memset(free_i->free_secmap, 0xff, sec_bitmap_size); 1850 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1636 1851
1637 /* init free segmap information */ 1852 /* init free segmap information */
1638 free_i->start_segno = 1853 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1639 (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1640 free_i->free_segments = 0; 1854 free_i->free_segments = 0;
1641 free_i->free_sections = 0; 1855 free_i->free_sections = 0;
1642 rwlock_init(&free_i->segmap_lock); 1856 rwlock_init(&free_i->segmap_lock);
@@ -1673,7 +1887,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1673 int sit_blk_cnt = SIT_BLK_CNT(sbi); 1887 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1674 unsigned int i, start, end; 1888 unsigned int i, start, end;
1675 unsigned int readed, start_blk = 0; 1889 unsigned int readed, start_blk = 0;
1676 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1890 int nrpages = MAX_BIO_BLOCKS(sbi);
1677 1891
1678 do { 1892 do {
1679 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); 1893 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
@@ -1681,7 +1895,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1681 start = start_blk * sit_i->sents_per_block; 1895 start = start_blk * sit_i->sents_per_block;
1682 end = (start_blk + readed) * sit_i->sents_per_block; 1896 end = (start_blk + readed) * sit_i->sents_per_block;
1683 1897
1684 for (; start < end && start < TOTAL_SEGS(sbi); start++) { 1898 for (; start < end && start < MAIN_SEGS(sbi); start++) {
1685 struct seg_entry *se = &sit_i->sentries[start]; 1899 struct seg_entry *se = &sit_i->sentries[start];
1686 struct f2fs_sit_block *sit_blk; 1900 struct f2fs_sit_block *sit_blk;
1687 struct f2fs_sit_entry sit; 1901 struct f2fs_sit_entry sit;
@@ -1719,7 +1933,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
1719 unsigned int start; 1933 unsigned int start;
1720 int type; 1934 int type;
1721 1935
1722 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1936 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1723 struct seg_entry *sentry = get_seg_entry(sbi, start); 1937 struct seg_entry *sentry = get_seg_entry(sbi, start);
1724 if (!sentry->valid_blocks) 1938 if (!sentry->valid_blocks)
1725 __set_free(sbi, start); 1939 __set_free(sbi, start);
@@ -1736,18 +1950,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1736{ 1950{
1737 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1951 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1738 struct free_segmap_info *free_i = FREE_I(sbi); 1952 struct free_segmap_info *free_i = FREE_I(sbi);
1739 unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); 1953 unsigned int segno = 0, offset = 0;
1740 unsigned short valid_blocks; 1954 unsigned short valid_blocks;
1741 1955
1742 while (1) { 1956 while (1) {
1743 /* find dirty segment based on free segmap */ 1957 /* find dirty segment based on free segmap */
1744 segno = find_next_inuse(free_i, total_segs, offset); 1958 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1745 if (segno >= total_segs) 1959 if (segno >= MAIN_SEGS(sbi))
1746 break; 1960 break;
1747 offset = segno + 1; 1961 offset = segno + 1;
1748 valid_blocks = get_valid_blocks(sbi, segno, 0); 1962 valid_blocks = get_valid_blocks(sbi, segno, 0);
1749 if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) 1963 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
1964 continue;
1965 if (valid_blocks > sbi->blocks_per_seg) {
1966 f2fs_bug_on(sbi, 1);
1750 continue; 1967 continue;
1968 }
1751 mutex_lock(&dirty_i->seglist_lock); 1969 mutex_lock(&dirty_i->seglist_lock);
1752 __locate_dirty_segment(sbi, segno, DIRTY); 1970 __locate_dirty_segment(sbi, segno, DIRTY);
1753 mutex_unlock(&dirty_i->seglist_lock); 1971 mutex_unlock(&dirty_i->seglist_lock);
@@ -1757,7 +1975,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1757static int init_victim_secmap(struct f2fs_sb_info *sbi) 1975static int init_victim_secmap(struct f2fs_sb_info *sbi)
1758{ 1976{
1759 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1977 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1760 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); 1978 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1761 1979
1762 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); 1980 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
1763 if (!dirty_i->victim_secmap) 1981 if (!dirty_i->victim_secmap)
@@ -1778,7 +1996,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1778 SM_I(sbi)->dirty_info = dirty_i; 1996 SM_I(sbi)->dirty_info = dirty_i;
1779 mutex_init(&dirty_i->seglist_lock); 1997 mutex_init(&dirty_i->seglist_lock);
1780 1998
1781 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1999 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1782 2000
1783 for (i = 0; i < NR_DIRTY_TYPE; i++) { 2001 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1784 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); 2002 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
@@ -1802,7 +2020,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1802 2020
1803 sit_i->min_mtime = LLONG_MAX; 2021 sit_i->min_mtime = LLONG_MAX;
1804 2022
1805 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 2023 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
1806 unsigned int i; 2024 unsigned int i;
1807 unsigned long long mtime = 0; 2025 unsigned long long mtime = 0;
1808 2026
@@ -1840,13 +2058,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1840 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 2058 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1841 sm_info->rec_prefree_segments = sm_info->main_segments * 2059 sm_info->rec_prefree_segments = sm_info->main_segments *
1842 DEF_RECLAIM_PREFREE_SEGMENTS / 100; 2060 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
1843 sm_info->ipu_policy = F2FS_IPU_DISABLE; 2061 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
1844 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 2062 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2063 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
1845 2064
1846 INIT_LIST_HEAD(&sm_info->discard_list); 2065 INIT_LIST_HEAD(&sm_info->discard_list);
1847 sm_info->nr_discards = 0; 2066 sm_info->nr_discards = 0;
1848 sm_info->max_discards = 0; 2067 sm_info->max_discards = 0;
1849 2068
2069 INIT_LIST_HEAD(&sm_info->sit_entry_set);
2070
1850 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 2071 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
1851 err = create_flush_cmd_control(sbi); 2072 err = create_flush_cmd_control(sbi);
1852 if (err) 2073 if (err)
@@ -1942,7 +2163,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
1942 return; 2163 return;
1943 2164
1944 if (sit_i->sentries) { 2165 if (sit_i->sentries) {
1945 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 2166 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1946 kfree(sit_i->sentries[start].cur_valid_map); 2167 kfree(sit_i->sentries[start].cur_valid_map);
1947 kfree(sit_i->sentries[start].ckpt_valid_map); 2168 kfree(sit_i->sentries[start].ckpt_valid_map);
1948 } 2169 }
@@ -1976,11 +2197,30 @@ int __init create_segment_manager_caches(void)
1976 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 2197 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1977 sizeof(struct discard_entry)); 2198 sizeof(struct discard_entry));
1978 if (!discard_entry_slab) 2199 if (!discard_entry_slab)
1979 return -ENOMEM; 2200 goto fail;
2201
2202 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2203 sizeof(struct nat_entry_set));
2204 if (!sit_entry_set_slab)
2205 goto destory_discard_entry;
2206
2207 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2208 sizeof(struct inmem_pages));
2209 if (!inmem_entry_slab)
2210 goto destroy_sit_entry_set;
1980 return 0; 2211 return 0;
2212
2213destroy_sit_entry_set:
2214 kmem_cache_destroy(sit_entry_set_slab);
2215destory_discard_entry:
2216 kmem_cache_destroy(discard_entry_slab);
2217fail:
2218 return -ENOMEM;
1981} 2219}
1982 2220
1983void destroy_segment_manager_caches(void) 2221void destroy_segment_manager_caches(void)
1984{ 2222{
2223 kmem_cache_destroy(sit_entry_set_slab);
1985 kmem_cache_destroy(discard_entry_slab); 2224 kmem_cache_destroy(discard_entry_slab);
2225 kmem_cache_destroy(inmem_entry_slab);
1986} 2226}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ff483257283b..2495bec1c621 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -45,16 +45,26 @@
45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ 45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
46 sbi->segs_per_sec)) \ 46 sbi->segs_per_sec)) \
47 47
48#define START_BLOCK(sbi, segno) \ 48#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
49 (SM_I(sbi)->seg0_blkaddr + \ 49#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
50
51#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
52#define MAIN_SECS(sbi) (sbi->total_sections)
53
54#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
55#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
56
57#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
58#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
59 sbi->log_blocks_per_seg))
60
61#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
50 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) 62 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
63
51#define NEXT_FREE_BLKADDR(sbi, curseg) \ 64#define NEXT_FREE_BLKADDR(sbi, curseg) \
52 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) 65 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
53 66
54#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) 67#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
55
56#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 68#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 69 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ 70#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
@@ -77,23 +87,21 @@
77 87
78#define SIT_ENTRY_OFFSET(sit_i, segno) \ 88#define SIT_ENTRY_OFFSET(sit_i, segno) \
79 (segno % sit_i->sents_per_block) 89 (segno % sit_i->sents_per_block)
80#define SIT_BLOCK_OFFSET(sit_i, segno) \ 90#define SIT_BLOCK_OFFSET(segno) \
81 (segno / SIT_ENTRY_PER_BLOCK) 91 (segno / SIT_ENTRY_PER_BLOCK)
82#define START_SEGNO(sit_i, segno) \ 92#define START_SEGNO(segno) \
83 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) 93 (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
84#define SIT_BLK_CNT(sbi) \ 94#define SIT_BLK_CNT(sbi) \
85 ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) 95 ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
86#define f2fs_bitmap_size(nr) \ 96#define f2fs_bitmap_size(nr) \
87 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 97 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
88#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
89#define TOTAL_SECS(sbi) (sbi->total_sections)
90 98
91#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 99#define SECTOR_FROM_BLOCK(blk_addr) \
92 (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) 100 (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
93#define SECTOR_TO_BLOCK(sbi, sectors) \ 101#define SECTOR_TO_BLOCK(sectors) \
94 (sectors >> (sbi)->log_sectors_per_block) 102 (sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
95#define MAX_BIO_BLOCKS(max_hw_blocks) \ 103#define MAX_BIO_BLOCKS(sbi) \
96 (min((int)max_hw_blocks, BIO_MAX_PAGES)) 104 ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
97 105
98/* 106/*
99 * indicate a block allocation direction: RIGHT and LEFT. 107 * indicate a block allocation direction: RIGHT and LEFT.
@@ -167,6 +175,11 @@ struct segment_allocation {
167 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 175 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
168}; 176};
169 177
178struct inmem_pages {
179 struct list_head list;
180 struct page *page;
181};
182
170struct sit_info { 183struct sit_info {
171 const struct segment_allocation *s_ops; 184 const struct segment_allocation *s_ops;
172 185
@@ -237,6 +250,12 @@ struct curseg_info {
237 unsigned int next_segno; /* preallocated segment */ 250 unsigned int next_segno; /* preallocated segment */
238}; 251};
239 252
253struct sit_entry_set {
254 struct list_head set_list; /* link with all sit sets */
255 unsigned int start_segno; /* start segno of sits in set */
256 unsigned int entry_cnt; /* the # of sit entries in set */
257};
258
240/* 259/*
241 * inline functions 260 * inline functions
242 */ 261 */
@@ -316,7 +335,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
316 clear_bit(segno, free_i->free_segmap); 335 clear_bit(segno, free_i->free_segmap);
317 free_i->free_segments++; 336 free_i->free_segments++;
318 337
319 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); 338 next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno);
320 if (next >= start_segno + sbi->segs_per_sec) { 339 if (next >= start_segno + sbi->segs_per_sec) {
321 clear_bit(secno, free_i->free_secmap); 340 clear_bit(secno, free_i->free_secmap);
322 free_i->free_sections++; 341 free_i->free_sections++;
@@ -430,8 +449,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
430 449
431static inline bool need_SSR(struct f2fs_sb_info *sbi) 450static inline bool need_SSR(struct f2fs_sb_info *sbi)
432{ 451{
433 return (prefree_segments(sbi) / sbi->segs_per_sec) 452 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
434 + free_sections(sbi) < overprovision_sections(sbi); 453 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
454 return free_sections(sbi) <= (node_secs + 2 * dent_secs +
455 reserved_sections(sbi) + 1);
435} 456}
436 457
437static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) 458static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -466,48 +487,47 @@ static inline int utilization(struct f2fs_sb_info *sbi)
466 * F2FS_IPU_UTIL - if FS utilization is over threashold, 487 * F2FS_IPU_UTIL - if FS utilization is over threashold,
467 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over 488 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
468 * threashold, 489 * threashold,
490 * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash
491 * storages. IPU will be triggered only if the # of dirty
492 * pages over min_fsync_blocks.
469 * F2FS_IPUT_DISABLE - disable IPU. (=default option) 493 * F2FS_IPUT_DISABLE - disable IPU. (=default option)
470 */ 494 */
471#define DEF_MIN_IPU_UTIL 70 495#define DEF_MIN_IPU_UTIL 70
496#define DEF_MIN_FSYNC_BLOCKS 8
472 497
473enum { 498enum {
474 F2FS_IPU_FORCE, 499 F2FS_IPU_FORCE,
475 F2FS_IPU_SSR, 500 F2FS_IPU_SSR,
476 F2FS_IPU_UTIL, 501 F2FS_IPU_UTIL,
477 F2FS_IPU_SSR_UTIL, 502 F2FS_IPU_SSR_UTIL,
478 F2FS_IPU_DISABLE, 503 F2FS_IPU_FSYNC,
479}; 504};
480 505
481static inline bool need_inplace_update(struct inode *inode) 506static inline bool need_inplace_update(struct inode *inode)
482{ 507{
483 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 508 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
509 unsigned int policy = SM_I(sbi)->ipu_policy;
484 510
485 /* IPU can be done only for the user data */ 511 /* IPU can be done only for the user data */
486 if (S_ISDIR(inode->i_mode)) 512 if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
487 return false; 513 return false;
488 514
489 /* this is only set during fdatasync */ 515 if (policy & (0x1 << F2FS_IPU_FORCE))
490 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) 516 return true;
517 if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
518 return true;
519 if (policy & (0x1 << F2FS_IPU_UTIL) &&
520 utilization(sbi) > SM_I(sbi)->min_ipu_util)
521 return true;
522 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
523 utilization(sbi) > SM_I(sbi)->min_ipu_util)
491 return true; 524 return true;
492 525
493 switch (SM_I(sbi)->ipu_policy) { 526 /* this is only set during fdatasync */
494 case F2FS_IPU_FORCE: 527 if (policy & (0x1 << F2FS_IPU_FSYNC) &&
528 is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
495 return true; 529 return true;
496 case F2FS_IPU_SSR: 530
497 if (need_SSR(sbi))
498 return true;
499 break;
500 case F2FS_IPU_UTIL:
501 if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
502 return true;
503 break;
504 case F2FS_IPU_SSR_UTIL:
505 if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
506 return true;
507 break;
508 case F2FS_IPU_DISABLE:
509 break;
510 }
511 return false; 531 return false;
512} 532}
513 533
@@ -534,18 +554,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
534#ifdef CONFIG_F2FS_CHECK_FS 554#ifdef CONFIG_F2FS_CHECK_FS
535static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 555static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
536{ 556{
537 unsigned int end_segno = SM_I(sbi)->segment_count - 1; 557 BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
538 BUG_ON(segno > end_segno);
539} 558}
540 559
541static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 560static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
542{ 561{
543 struct f2fs_sm_info *sm_info = SM_I(sbi); 562 BUG_ON(blk_addr < SEG0_BLKADDR(sbi));
544 block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; 563 BUG_ON(blk_addr >= MAX_BLKADDR(sbi));
545 block_t start_addr = sm_info->seg0_blkaddr;
546 block_t end_addr = start_addr + total_blks - 1;
547 BUG_ON(blk_addr < start_addr);
548 BUG_ON(blk_addr > end_addr);
549} 564}
550 565
551/* 566/*
@@ -554,8 +569,6 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
554static inline void check_block_count(struct f2fs_sb_info *sbi, 569static inline void check_block_count(struct f2fs_sb_info *sbi,
555 int segno, struct f2fs_sit_entry *raw_sit) 570 int segno, struct f2fs_sit_entry *raw_sit)
556{ 571{
557 struct f2fs_sm_info *sm_info = SM_I(sbi);
558 unsigned int end_segno = sm_info->segment_count - 1;
559 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; 572 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
560 int valid_blocks = 0; 573 int valid_blocks = 0;
561 int cur_pos = 0, next_pos; 574 int cur_pos = 0, next_pos;
@@ -564,7 +577,7 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
564 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); 577 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
565 578
566 /* check boundary of a given segment number */ 579 /* check boundary of a given segment number */
567 BUG_ON(segno > end_segno); 580 BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
568 581
569 /* check bitmap with valid block count */ 582 /* check bitmap with valid block count */
570 do { 583 do {
@@ -583,16 +596,39 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
583 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 596 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
584} 597}
585#else 598#else
586#define check_seg_range(sbi, segno) 599static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
587#define verify_block_addr(sbi, blk_addr) 600{
588#define check_block_count(sbi, segno, raw_sit) 601 if (segno > TOTAL_SEGS(sbi) - 1)
602 sbi->need_fsck = true;
603}
604
605static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
606{
607 if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
608 sbi->need_fsck = true;
609}
610
611/*
612 * Summary block is always treated as an invalid block
613 */
614static inline void check_block_count(struct f2fs_sb_info *sbi,
615 int segno, struct f2fs_sit_entry *raw_sit)
616{
617 /* check segment usage */
618 if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
619 sbi->need_fsck = true;
620
621 /* check boundary of a given segment number */
622 if (segno > TOTAL_SEGS(sbi) - 1)
623 sbi->need_fsck = true;
624}
589#endif 625#endif
590 626
591static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 627static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
592 unsigned int start) 628 unsigned int start)
593{ 629{
594 struct sit_info *sit_i = SIT_I(sbi); 630 struct sit_info *sit_i = SIT_I(sbi);
595 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); 631 unsigned int offset = SIT_BLOCK_OFFSET(start);
596 block_t blk_addr = sit_i->sit_base_addr + offset; 632 block_t blk_addr = sit_i->sit_base_addr + offset;
597 633
598 check_seg_range(sbi, start); 634 check_seg_range(sbi, start);
@@ -619,7 +655,7 @@ static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
619 655
620static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) 656static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
621{ 657{
622 unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); 658 unsigned int block_off = SIT_BLOCK_OFFSET(start);
623 659
624 if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) 660 if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
625 f2fs_clear_bit(block_off, sit_i->sit_bitmap); 661 f2fs_clear_bit(block_off, sit_i->sit_bitmap);
@@ -666,7 +702,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
666{ 702{
667 struct block_device *bdev = sbi->sb->s_bdev; 703 struct block_device *bdev = sbi->sb->s_bdev;
668 struct request_queue *q = bdev_get_queue(bdev); 704 struct request_queue *q = bdev_get_queue(bdev);
669 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 705 return SECTOR_TO_BLOCK(queue_max_sectors(q));
670} 706}
671 707
672/* 708/*
@@ -683,7 +719,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
683 else if (type == NODE) 719 else if (type == NODE)
684 return 3 * sbi->blocks_per_seg; 720 return 3 * sbi->blocks_per_seg;
685 else if (type == META) 721 else if (type == META)
686 return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 722 return MAX_BIO_BLOCKS(sbi);
687 else 723 else
688 return 0; 724 return 0;
689} 725}
@@ -706,7 +742,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
706 else if (type == NODE) 742 else if (type == NODE)
707 desired = 3 * max_hw_blocks(sbi); 743 desired = 3 * max_hw_blocks(sbi);
708 else 744 else
709 desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 745 desired = MAX_BIO_BLOCKS(sbi);
710 746
711 wbc->nr_to_write = desired; 747 wbc->nr_to_write = desired;
712 return desired - nr_to_write; 748 return desired - nr_to_write;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41bdf511003d..41d6f700f4ee 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -190,6 +190,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
191F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 191F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
192F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 192F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
193F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
193F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 194F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
194F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 195F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
195F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 196F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
@@ -204,6 +205,7 @@ static struct attribute *f2fs_attrs[] = {
204 ATTR_LIST(max_small_discards), 205 ATTR_LIST(max_small_discards),
205 ATTR_LIST(ipu_policy), 206 ATTR_LIST(ipu_policy),
206 ATTR_LIST(min_ipu_util), 207 ATTR_LIST(min_ipu_util),
208 ATTR_LIST(min_fsync_blocks),
207 ATTR_LIST(max_victim_search), 209 ATTR_LIST(max_victim_search),
208 ATTR_LIST(dir_level), 210 ATTR_LIST(dir_level),
209 ATTR_LIST(ram_thresh), 211 ATTR_LIST(ram_thresh),
@@ -366,11 +368,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
366 368
367 /* Initialize f2fs-specific inode info */ 369 /* Initialize f2fs-specific inode info */
368 fi->vfs_inode.i_version = 1; 370 fi->vfs_inode.i_version = 1;
369 atomic_set(&fi->dirty_dents, 0); 371 atomic_set(&fi->dirty_pages, 0);
370 fi->i_current_depth = 1; 372 fi->i_current_depth = 1;
371 fi->i_advise = 0; 373 fi->i_advise = 0;
372 rwlock_init(&fi->ext.ext_lock); 374 rwlock_init(&fi->ext.ext_lock);
373 init_rwsem(&fi->i_sem); 375 init_rwsem(&fi->i_sem);
376 INIT_LIST_HEAD(&fi->inmem_pages);
377 mutex_init(&fi->inmem_lock);
374 378
375 set_inode_flag(fi, FI_NEW_INODE); 379 set_inode_flag(fi, FI_NEW_INODE);
376 380
@@ -432,14 +436,19 @@ static void f2fs_put_super(struct super_block *sb)
432 stop_gc_thread(sbi); 436 stop_gc_thread(sbi);
433 437
434 /* We don't need to do checkpoint when it's clean */ 438 /* We don't need to do checkpoint when it's clean */
435 if (sbi->s_dirty) 439 if (sbi->s_dirty) {
436 write_checkpoint(sbi, true); 440 struct cp_control cpc = {
441 .reason = CP_UMOUNT,
442 };
443 write_checkpoint(sbi, &cpc);
444 }
437 445
438 /* 446 /*
439 * normally superblock is clean, so we need to release this. 447 * normally superblock is clean, so we need to release this.
440 * In addition, EIO will skip do checkpoint, we need this as well. 448 * In addition, EIO will skip do checkpoint, we need this as well.
441 */ 449 */
442 release_dirty_inode(sbi); 450 release_dirty_inode(sbi);
451 release_discard_addrs(sbi);
443 452
444 iput(sbi->node_inode); 453 iput(sbi->node_inode);
445 iput(sbi->meta_inode); 454 iput(sbi->meta_inode);
@@ -464,8 +473,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
464 trace_f2fs_sync_fs(sb, sync); 473 trace_f2fs_sync_fs(sb, sync);
465 474
466 if (sync) { 475 if (sync) {
476 struct cp_control cpc = {
477 .reason = CP_SYNC,
478 };
467 mutex_lock(&sbi->gc_mutex); 479 mutex_lock(&sbi->gc_mutex);
468 write_checkpoint(sbi, false); 480 write_checkpoint(sbi, &cpc);
469 mutex_unlock(&sbi->gc_mutex); 481 mutex_unlock(&sbi->gc_mutex);
470 } else { 482 } else {
471 f2fs_balance_fs(sbi); 483 f2fs_balance_fs(sbi);
@@ -616,6 +628,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
616 org_mount_opt = sbi->mount_opt; 628 org_mount_opt = sbi->mount_opt;
617 active_logs = sbi->active_logs; 629 active_logs = sbi->active_logs;
618 630
631 sbi->mount_opt.opt = 0;
632 sbi->active_logs = NR_CURSEG_TYPE;
633
619 /* parse mount options */ 634 /* parse mount options */
620 err = parse_options(sb, data); 635 err = parse_options(sb, data);
621 if (err) 636 if (err)
@@ -786,14 +801,22 @@ static int sanity_check_raw_super(struct super_block *sb,
786 return 1; 801 return 1;
787 } 802 }
788 803
789 if (le32_to_cpu(raw_super->log_sectorsize) != 804 /* Currently, support 512/1024/2048/4096 bytes sector size */
790 F2FS_LOG_SECTOR_SIZE) { 805 if (le32_to_cpu(raw_super->log_sectorsize) >
791 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); 806 F2FS_MAX_LOG_SECTOR_SIZE ||
807 le32_to_cpu(raw_super->log_sectorsize) <
808 F2FS_MIN_LOG_SECTOR_SIZE) {
809 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
810 le32_to_cpu(raw_super->log_sectorsize));
792 return 1; 811 return 1;
793 } 812 }
794 if (le32_to_cpu(raw_super->log_sectors_per_block) != 813 if (le32_to_cpu(raw_super->log_sectors_per_block) +
795 F2FS_LOG_SECTORS_PER_BLOCK) { 814 le32_to_cpu(raw_super->log_sectorsize) !=
796 f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); 815 F2FS_MAX_LOG_SECTOR_SIZE) {
816 f2fs_msg(sb, KERN_INFO,
817 "Invalid log sectors per block(%u) log sectorsize(%u)",
818 le32_to_cpu(raw_super->log_sectors_per_block),
819 le32_to_cpu(raw_super->log_sectorsize));
797 return 1; 820 return 1;
798 } 821 }
799 return 0; 822 return 0;
@@ -849,6 +872,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
849 atomic_set(&sbi->nr_pages[i], 0); 872 atomic_set(&sbi->nr_pages[i], 0);
850 873
851 sbi->dir_level = DEF_DIR_LEVEL; 874 sbi->dir_level = DEF_DIR_LEVEL;
875 sbi->need_fsck = false;
852} 876}
853 877
854/* 878/*
@@ -1082,6 +1106,9 @@ try_onemore:
1082 if (err) 1106 if (err)
1083 goto free_proc; 1107 goto free_proc;
1084 1108
1109 if (!retry)
1110 sbi->need_fsck = true;
1111
1085 /* recover fsynced data */ 1112 /* recover fsynced data */
1086 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1113 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1087 err = recover_fsync_data(sbi); 1114 err = recover_fsync_data(sbi);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 728a5dc3dc16..deca8728117b 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -266,7 +266,7 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
266 266
267static void *read_all_xattrs(struct inode *inode, struct page *ipage) 267static void *read_all_xattrs(struct inode *inode, struct page *ipage)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 struct f2fs_xattr_header *header; 270 struct f2fs_xattr_header *header;
271 size_t size = PAGE_SIZE, inline_size = 0; 271 size_t size = PAGE_SIZE, inline_size = 0;
272 void *txattr_addr; 272 void *txattr_addr;
@@ -325,7 +325,7 @@ fail:
325static inline int write_all_xattrs(struct inode *inode, __u32 hsize, 325static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
326 void *txattr_addr, struct page *ipage) 326 void *txattr_addr, struct page *ipage)
327{ 327{
328 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 328 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
329 size_t inline_size = 0; 329 size_t inline_size = 0;
330 void *xattr_addr; 330 void *xattr_addr;
331 struct page *xpage; 331 struct page *xpage;
@@ -373,7 +373,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
373 alloc_nid_failed(sbi, new_nid); 373 alloc_nid_failed(sbi, new_nid);
374 return PTR_ERR(xpage); 374 return PTR_ERR(xpage);
375 } 375 }
376 f2fs_bug_on(new_nid); 376 f2fs_bug_on(sbi, new_nid);
377 f2fs_wait_on_page_writeback(xpage, NODE); 377 f2fs_wait_on_page_writeback(xpage, NODE);
378 } else { 378 } else {
379 struct dnode_of_data dn; 379 struct dnode_of_data dn;
@@ -596,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
596 const void *value, size_t size, 596 const void *value, size_t size,
597 struct page *ipage, int flags) 597 struct page *ipage, int flags)
598{ 598{
599 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 599 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
600 int err; 600 int err;
601 601
602 /* this case is only from init_inode_metadata */ 602 /* this case is only from init_inode_metadata */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 08ed2b0a96e6..860313a33a43 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -15,8 +15,9 @@
15#include <linux/types.h> 15#include <linux/types.h>
16 16
17#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ 17#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */
18#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ 18#define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */
19#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ 19#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */
20#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */
20#define F2FS_BLKSIZE 4096 /* support only 4KB block */ 21#define F2FS_BLKSIZE 4096 /* support only 4KB block */
21#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ 22#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
22#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) 23#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
@@ -85,6 +86,7 @@ struct f2fs_super_block {
85/* 86/*
86 * For checkpoint 87 * For checkpoint
87 */ 88 */
89#define CP_FSCK_FLAG 0x00000010
88#define CP_ERROR_FLAG 0x00000008 90#define CP_ERROR_FLAG 0x00000008
89#define CP_COMPACT_SUM_FLAG 0x00000004 91#define CP_COMPACT_SUM_FLAG 0x00000004
90#define CP_ORPHAN_PRESENT_FLAG 0x00000002 92#define CP_ORPHAN_PRESENT_FLAG 0x00000002
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index d06d44363fea..bbc4de9baef7 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -69,6 +69,12 @@
69 { GC_GREEDY, "Greedy" }, \ 69 { GC_GREEDY, "Greedy" }, \
70 { GC_CB, "Cost-Benefit" }) 70 { GC_CB, "Cost-Benefit" })
71 71
72#define show_cpreason(type) \
73 __print_symbolic(type, \
74 { CP_UMOUNT, "Umount" }, \
75 { CP_SYNC, "Sync" }, \
76 { CP_DISCARD, "Discard" })
77
72struct victim_sel_policy; 78struct victim_sel_policy;
73 79
74DECLARE_EVENT_CLASS(f2fs__inode, 80DECLARE_EVENT_CLASS(f2fs__inode,
@@ -944,25 +950,25 @@ TRACE_EVENT(f2fs_submit_page_mbio,
944 950
945TRACE_EVENT(f2fs_write_checkpoint, 951TRACE_EVENT(f2fs_write_checkpoint,
946 952
947 TP_PROTO(struct super_block *sb, bool is_umount, char *msg), 953 TP_PROTO(struct super_block *sb, int reason, char *msg),
948 954
949 TP_ARGS(sb, is_umount, msg), 955 TP_ARGS(sb, reason, msg),
950 956
951 TP_STRUCT__entry( 957 TP_STRUCT__entry(
952 __field(dev_t, dev) 958 __field(dev_t, dev)
953 __field(bool, is_umount) 959 __field(int, reason)
954 __field(char *, msg) 960 __field(char *, msg)
955 ), 961 ),
956 962
957 TP_fast_assign( 963 TP_fast_assign(
958 __entry->dev = sb->s_dev; 964 __entry->dev = sb->s_dev;
959 __entry->is_umount = is_umount; 965 __entry->reason = reason;
960 __entry->msg = msg; 966 __entry->msg = msg;
961 ), 967 ),
962 968
963 TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", 969 TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
964 show_dev(__entry), 970 show_dev(__entry),
965 __entry->is_umount ? "clean umount" : "consistency", 971 show_cpreason(__entry->reason),
966 __entry->msg) 972 __entry->msg)
967); 973);
968 974