aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 18:55:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 18:55:04 -0500
commit25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (patch)
tree4ecf60124fd87fbd655393a081beecaf88746eea
parent6053dc981449718d90a429933e99b441e1adaea6 (diff)
parent900f736251c81886f3064c9d489c85eddee921b7 (diff)
Merge tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This round introduces several interesting features such as on-disk NAT bitmaps, IO alignment, and a discard thread. And it includes a couple of major bug fixes as below. Enhancements: - introduce on-disk bitmaps to avoid scanning NAT blocks when getting free nids - support IO alignment to prepare open-channel SSD integration in future - introduce a discard thread to avoid long latency during checkpoint and fstrim - use SSR for warm node and enable inline_xattr by default - introduce in-memory bitmaps to check FS consistency for debugging - improve write_begin by avoiding needless read IO Bug fixes: - fix broken zone_reset behavior for SMR drive - fix wrong victim selection policy during GC - fix missing behavior when preparing discard commands - fix bugs in atomic write support and fiemap - workaround to handle multiple f2fs_add_link calls having same name ... and it includes a bunch of clean-up patches as well" * tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (97 commits) f2fs: avoid to flush nat journal entries f2fs: avoid to issue redundant discard commands f2fs: fix a plint compile warning f2fs: add f2fs_drop_inode tracepoint f2fs: Fix zoned block device support f2fs: remove redundant set_page_dirty() f2fs: fix to enlarge size of write_io_dummy mempool f2fs: fix memory leak of write_io_dummy mempool during umount f2fs: fix to update F2FS_{CP_}WB_DATA count correctly f2fs: use MAX_FREE_NIDS for the free nids target f2fs: introduce free nid bitmap f2fs: new helper cur_cp_crc() getting crc in f2fs_checkpoint f2fs: update the comment of default nr_pages to skipping f2fs: drop the duplicate pval in f2fs_getxattr f2fs: Don't update the xattr data that same as the exist f2fs: kill __is_extent_same f2fs: avoid bggc->fggc when enough free segments are avaliable after cp f2fs: select target segment with closer temperature in SSR mode f2fs: show simple call stack in fault injection message f2fs: no need lock_op in f2fs_write_inline_data ...
-rw-r--r--Documentation/filesystems/f2fs.txt7
-rw-r--r--fs/f2fs/checkpoint.c70
-rw-r--r--fs/f2fs/data.c191
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c38
-rw-r--r--fs/f2fs/extent_cache.c52
-rw-r--r--fs/f2fs/f2fs.h644
-rw-r--r--fs/f2fs/file.c36
-rw-r--r--fs/f2fs/gc.c79
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/namei.c18
-rw-r--r--fs/f2fs/node.c560
-rw-r--r--fs/f2fs/node.h33
-rw-r--r--fs/f2fs/recovery.c17
-rw-r--r--fs/f2fs/segment.c501
-rw-r--r--fs/f2fs/segment.h40
-rw-r--r--fs/f2fs/super.c138
-rw-r--r--fs/f2fs/xattr.c151
-rw-r--r--fs/f2fs/xattr.h7
-rw-r--r--include/linux/f2fs_fs.h8
-rw-r--r--include/trace/events/f2fs.h151
21 files changed, 1966 insertions, 810 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 753dd4f96afe..4f6531a4701b 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -125,13 +125,14 @@ active_logs=%u Support configuring the number of active logs. In the
125disable_ext_identify Disable the extension list configured by mkfs, so f2fs 125disable_ext_identify Disable the extension list configured by mkfs, so f2fs
126 does not aware of cold files such as media files. 126 does not aware of cold files such as media files.
127inline_xattr Enable the inline xattrs feature. 127inline_xattr Enable the inline xattrs feature.
128noinline_xattr Disable the inline xattrs feature.
128inline_data Enable the inline data feature: New created small(<~3.4k) 129inline_data Enable the inline data feature: New created small(<~3.4k)
129 files can be written into inode block. 130 files can be written into inode block.
130inline_dentry Enable the inline dir feature: data in new created 131inline_dentry Enable the inline dir feature: data in new created
131 directory entries can be written into inode block. The 132 directory entries can be written into inode block. The
132 space of inode block which is used to store inline 133 space of inode block which is used to store inline
133 dentries is limited to ~3.4k. 134 dentries is limited to ~3.4k.
134noinline_dentry Diable the inline dentry feature. 135noinline_dentry Disable the inline dentry feature.
135flush_merge Merge concurrent cache_flush commands as much as possible 136flush_merge Merge concurrent cache_flush commands as much as possible
136 to eliminate redundant command issues. If the underlying 137 to eliminate redundant command issues. If the underlying
137 device handles the cache_flush command relatively slowly, 138 device handles the cache_flush command relatively slowly,
@@ -157,6 +158,8 @@ data_flush Enable data flushing before checkpoint in order to
157mode=%s Control block allocation mode which supports "adaptive" 158mode=%s Control block allocation mode which supports "adaptive"
158 and "lfs". In "lfs" mode, there should be no random 159 and "lfs". In "lfs" mode, there should be no random
159 writes towards main area. 160 writes towards main area.
161io_bits=%u Set the bit size of write IO requests. It should be set
162 with "mode=lfs".
160 163
161================================================================================ 164================================================================================
162DEBUGFS ENTRIES 165DEBUGFS ENTRIES
@@ -174,7 +177,7 @@ f2fs. Each file shows the whole f2fs information.
174SYSFS ENTRIES 177SYSFS ENTRIES
175================================================================================ 178================================================================================
176 179
177Information about mounted f2f2 file systems can be found in 180Information about mounted f2fs file systems can be found in
178/sys/fs/f2fs. Each mounted filesystem will have a directory in 181/sys/fs/f2fs. Each mounted filesystem will have a directory in
179/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). 182/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
180The files in each per-device directory are shown in table below. 183The files in each per-device directory are shown in table below.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee9534d83..0339daf4ca02 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
249 dec_page_count(sbi, F2FS_DIRTY_META); 249 dec_page_count(sbi, F2FS_DIRTY_META);
250 250
251 if (wbc->for_reclaim) 251 if (wbc->for_reclaim)
252 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); 252 f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
253 0, page->index, META, WRITE);
253 254
254 unlock_page(page); 255 unlock_page(page);
255 256
@@ -493,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
493#ifdef CONFIG_F2FS_FAULT_INJECTION 494#ifdef CONFIG_F2FS_FAULT_INJECTION
494 if (time_to_inject(sbi, FAULT_ORPHAN)) { 495 if (time_to_inject(sbi, FAULT_ORPHAN)) {
495 spin_unlock(&im->ino_lock); 496 spin_unlock(&im->ino_lock);
497 f2fs_show_injection_info(FAULT_ORPHAN);
496 return -ENOSPC; 498 return -ENOSPC;
497 } 499 }
498#endif 500#endif
@@ -681,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
681 return -EINVAL; 683 return -EINVAL;
682 } 684 }
683 685
684 crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block 686 crc = cur_cp_crc(*cp_block);
685 + crc_offset)));
686 if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { 687 if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
687 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); 688 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
688 return -EINVAL; 689 return -EINVAL;
@@ -891,7 +892,7 @@ retry:
891 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 892 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
892 return 0; 893 return 0;
893 } 894 }
894 fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); 895 fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
895 inode = igrab(&fi->vfs_inode); 896 inode = igrab(&fi->vfs_inode);
896 spin_unlock(&sbi->inode_lock[type]); 897 spin_unlock(&sbi->inode_lock[type]);
897 if (inode) { 898 if (inode) {
@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
924 spin_unlock(&sbi->inode_lock[DIRTY_META]); 925 spin_unlock(&sbi->inode_lock[DIRTY_META]);
925 return 0; 926 return 0;
926 } 927 }
927 fi = list_entry(head->next, struct f2fs_inode_info, 928 fi = list_first_entry(head, struct f2fs_inode_info,
928 gdirty_list); 929 gdirty_list);
929 inode = igrab(&fi->vfs_inode); 930 inode = igrab(&fi->vfs_inode);
930 spin_unlock(&sbi->inode_lock[DIRTY_META]); 931 spin_unlock(&sbi->inode_lock[DIRTY_META]);
@@ -998,8 +999,6 @@ out:
998static void unblock_operations(struct f2fs_sb_info *sbi) 999static void unblock_operations(struct f2fs_sb_info *sbi)
999{ 1000{
1000 up_write(&sbi->node_write); 1001 up_write(&sbi->node_write);
1001
1002 build_free_nids(sbi, false);
1003 f2fs_unlock_all(sbi); 1002 f2fs_unlock_all(sbi);
1004} 1003}
1005 1004
@@ -1025,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1025 1024
1026 spin_lock(&sbi->cp_lock); 1025 spin_lock(&sbi->cp_lock);
1027 1026
1027 if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
1028 sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
1029 disable_nat_bits(sbi, false);
1030
1028 if (cpc->reason == CP_UMOUNT) 1031 if (cpc->reason == CP_UMOUNT)
1029 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1032 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1030 else 1033 else
@@ -1137,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1137 1140
1138 start_blk = __start_cp_next_addr(sbi); 1141 start_blk = __start_cp_next_addr(sbi);
1139 1142
1143 /* write nat bits */
1144 if (enabled_nat_bits(sbi, cpc)) {
1145 __u64 cp_ver = cur_cp_version(ckpt);
1146 unsigned int i;
1147 block_t blk;
1148
1149 cp_ver |= ((__u64)crc32 << 32);
1150 *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
1151
1152 blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
1153 for (i = 0; i < nm_i->nat_bits_blocks; i++)
1154 update_meta_page(sbi, nm_i->nat_bits +
1155 (i << F2FS_BLKSIZE_BITS), blk + i);
1156
1157 /* Flush all the NAT BITS pages */
1158 while (get_pages(sbi, F2FS_DIRTY_META)) {
1159 sync_meta_pages(sbi, META, LONG_MAX);
1160 if (unlikely(f2fs_cp_error(sbi)))
1161 return -EIO;
1162 }
1163 }
1164
1140 /* need to wait for end_io results */ 1165 /* need to wait for end_io results */
1141 wait_on_all_pages_writeback(sbi); 1166 wait_on_all_pages_writeback(sbi);
1142 if (unlikely(f2fs_cp_error(sbi))) 1167 if (unlikely(f2fs_cp_error(sbi)))
@@ -1248,15 +1273,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1248 f2fs_flush_merged_bios(sbi); 1273 f2fs_flush_merged_bios(sbi);
1249 1274
1250 /* this is the case of multiple fstrims without any changes */ 1275 /* this is the case of multiple fstrims without any changes */
1251 if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) { 1276 if (cpc->reason == CP_DISCARD) {
1252 f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt); 1277 if (!exist_trim_candidates(sbi, cpc)) {
1253 f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries); 1278 unblock_operations(sbi);
1254 f2fs_bug_on(sbi, prefree_segments(sbi)); 1279 goto out;
1255 flush_sit_entries(sbi, cpc); 1280 }
1256 clear_prefree_segments(sbi, cpc); 1281
1257 f2fs_wait_all_discard_bio(sbi); 1282 if (NM_I(sbi)->dirty_nat_cnt == 0 &&
1258 unblock_operations(sbi); 1283 SIT_I(sbi)->dirty_sentries == 0 &&
1259 goto out; 1284 prefree_segments(sbi) == 0) {
1285 flush_sit_entries(sbi, cpc);
1286 clear_prefree_segments(sbi, cpc);
1287 unblock_operations(sbi);
1288 goto out;
1289 }
1260 } 1290 }
1261 1291
1262 /* 1292 /*
@@ -1268,17 +1298,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1268 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1298 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1269 1299
1270 /* write cached NAT/SIT entries to NAT/SIT area */ 1300 /* write cached NAT/SIT entries to NAT/SIT area */
1271 flush_nat_entries(sbi); 1301 flush_nat_entries(sbi, cpc);
1272 flush_sit_entries(sbi, cpc); 1302 flush_sit_entries(sbi, cpc);
1273 1303
1274 /* unlock all the fs_lock[] in do_checkpoint() */ 1304 /* unlock all the fs_lock[] in do_checkpoint() */
1275 err = do_checkpoint(sbi, cpc); 1305 err = do_checkpoint(sbi, cpc);
1276 if (err) { 1306 if (err)
1277 release_discard_addrs(sbi); 1307 release_discard_addrs(sbi);
1278 } else { 1308 else
1279 clear_prefree_segments(sbi, cpc); 1309 clear_prefree_segments(sbi, cpc);
1280 f2fs_wait_all_discard_bio(sbi);
1281 }
1282 1310
1283 unblock_operations(sbi); 1311 unblock_operations(sbi);
1284 stat_inc_cp_count(sbi->stat_info); 1312 stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ac262564fa6..1375fef11146 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -55,8 +55,10 @@ static void f2fs_read_end_io(struct bio *bio)
55 int i; 55 int i;
56 56
57#ifdef CONFIG_F2FS_FAULT_INJECTION 57#ifdef CONFIG_F2FS_FAULT_INJECTION
58 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) 58 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
59 f2fs_show_injection_info(FAULT_IO);
59 bio->bi_error = -EIO; 60 bio->bi_error = -EIO;
61 }
60#endif 62#endif
61 63
62 if (f2fs_bio_encrypted(bio)) { 64 if (f2fs_bio_encrypted(bio)) {
@@ -93,6 +95,17 @@ static void f2fs_write_end_io(struct bio *bio)
93 struct page *page = bvec->bv_page; 95 struct page *page = bvec->bv_page;
94 enum count_type type = WB_DATA_TYPE(page); 96 enum count_type type = WB_DATA_TYPE(page);
95 97
98 if (IS_DUMMY_WRITTEN_PAGE(page)) {
99 set_page_private(page, (unsigned long)NULL);
100 ClearPagePrivate(page);
101 unlock_page(page);
102 mempool_free(page, sbi->write_io_dummy);
103
104 if (unlikely(bio->bi_error))
105 f2fs_stop_checkpoint(sbi, true);
106 continue;
107 }
108
96 fscrypt_pullback_bio_page(&page, true); 109 fscrypt_pullback_bio_page(&page, true);
97 110
98 if (unlikely(bio->bi_error)) { 111 if (unlikely(bio->bi_error)) {
@@ -171,10 +184,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
171 struct bio *bio, enum page_type type) 184 struct bio *bio, enum page_type type)
172{ 185{
173 if (!is_read_io(bio_op(bio))) { 186 if (!is_read_io(bio_op(bio))) {
187 unsigned int start;
188
174 if (f2fs_sb_mounted_blkzoned(sbi->sb) && 189 if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
175 current->plug && (type == DATA || type == NODE)) 190 current->plug && (type == DATA || type == NODE))
176 blk_finish_plug(current->plug); 191 blk_finish_plug(current->plug);
192
193 if (type != DATA && type != NODE)
194 goto submit_io;
195
196 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
197 start %= F2FS_IO_SIZE(sbi);
198
199 if (start == 0)
200 goto submit_io;
201
202 /* fill dummy pages */
203 for (; start < F2FS_IO_SIZE(sbi); start++) {
204 struct page *page =
205 mempool_alloc(sbi->write_io_dummy,
206 GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
207 f2fs_bug_on(sbi, !page);
208
209 SetPagePrivate(page);
210 set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
211 lock_page(page);
212 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
213 f2fs_bug_on(sbi, 1);
214 }
215 /*
216 * In the NODE case, we lose next block address chain. So, we
217 * need to do checkpoint in f2fs_sync_file.
218 */
219 if (type == NODE)
220 set_sbi_flag(sbi, SBI_NEED_CP);
177 } 221 }
222submit_io:
223 if (is_read_io(bio_op(bio)))
224 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
225 else
226 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
178 submit_bio(bio); 227 submit_bio(bio);
179} 228}
180 229
@@ -185,19 +234,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
185 if (!io->bio) 234 if (!io->bio)
186 return; 235 return;
187 236
237 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
238
188 if (is_read_io(fio->op)) 239 if (is_read_io(fio->op))
189 trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); 240 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
190 else 241 else
191 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 242 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
192
193 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
194 243
195 __submit_bio(io->sbi, io->bio, fio->type); 244 __submit_bio(io->sbi, io->bio, fio->type);
196 io->bio = NULL; 245 io->bio = NULL;
197} 246}
198 247
199static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, 248static bool __has_merged_page(struct f2fs_bio_info *io,
200 struct page *page, nid_t ino) 249 struct inode *inode, nid_t ino, pgoff_t idx)
201{ 250{
202 struct bio_vec *bvec; 251 struct bio_vec *bvec;
203 struct page *target; 252 struct page *target;
@@ -206,7 +255,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
206 if (!io->bio) 255 if (!io->bio)
207 return false; 256 return false;
208 257
209 if (!inode && !page && !ino) 258 if (!inode && !ino)
210 return true; 259 return true;
211 260
212 bio_for_each_segment_all(bvec, io->bio, i) { 261 bio_for_each_segment_all(bvec, io->bio, i) {
@@ -216,10 +265,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
216 else 265 else
217 target = fscrypt_control_page(bvec->bv_page); 266 target = fscrypt_control_page(bvec->bv_page);
218 267
268 if (idx != target->index)
269 continue;
270
219 if (inode && inode == target->mapping->host) 271 if (inode && inode == target->mapping->host)
220 return true; 272 return true;
221 if (page && page == target)
222 return true;
223 if (ino && ino == ino_of_node(target)) 273 if (ino && ino == ino_of_node(target))
224 return true; 274 return true;
225 } 275 }
@@ -228,22 +278,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
228} 278}
229 279
230static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, 280static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
231 struct page *page, nid_t ino, 281 nid_t ino, pgoff_t idx, enum page_type type)
232 enum page_type type)
233{ 282{
234 enum page_type btype = PAGE_TYPE_OF_BIO(type); 283 enum page_type btype = PAGE_TYPE_OF_BIO(type);
235 struct f2fs_bio_info *io = &sbi->write_io[btype]; 284 struct f2fs_bio_info *io = &sbi->write_io[btype];
236 bool ret; 285 bool ret;
237 286
238 down_read(&io->io_rwsem); 287 down_read(&io->io_rwsem);
239 ret = __has_merged_page(io, inode, page, ino); 288 ret = __has_merged_page(io, inode, ino, idx);
240 up_read(&io->io_rwsem); 289 up_read(&io->io_rwsem);
241 return ret; 290 return ret;
242} 291}
243 292
244static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, 293static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
245 struct inode *inode, struct page *page, 294 struct inode *inode, nid_t ino, pgoff_t idx,
246 nid_t ino, enum page_type type, int rw) 295 enum page_type type, int rw)
247{ 296{
248 enum page_type btype = PAGE_TYPE_OF_BIO(type); 297 enum page_type btype = PAGE_TYPE_OF_BIO(type);
249 struct f2fs_bio_info *io; 298 struct f2fs_bio_info *io;
@@ -252,16 +301,16 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
252 301
253 down_write(&io->io_rwsem); 302 down_write(&io->io_rwsem);
254 303
255 if (!__has_merged_page(io, inode, page, ino)) 304 if (!__has_merged_page(io, inode, ino, idx))
256 goto out; 305 goto out;
257 306
258 /* change META to META_FLUSH in the checkpoint procedure */ 307 /* change META to META_FLUSH in the checkpoint procedure */
259 if (type >= META_FLUSH) { 308 if (type >= META_FLUSH) {
260 io->fio.type = META_FLUSH; 309 io->fio.type = META_FLUSH;
261 io->fio.op = REQ_OP_WRITE; 310 io->fio.op = REQ_OP_WRITE;
262 io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; 311 io->fio.op_flags = REQ_META | REQ_PRIO;
263 if (!test_opt(sbi, NOBARRIER)) 312 if (!test_opt(sbi, NOBARRIER))
264 io->fio.op_flags |= REQ_FUA; 313 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
265 } 314 }
266 __submit_merged_bio(io); 315 __submit_merged_bio(io);
267out: 316out:
@@ -271,15 +320,15 @@ out:
271void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, 320void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
272 int rw) 321 int rw)
273{ 322{
274 __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw); 323 __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
275} 324}
276 325
277void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, 326void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
278 struct inode *inode, struct page *page, 327 struct inode *inode, nid_t ino, pgoff_t idx,
279 nid_t ino, enum page_type type, int rw) 328 enum page_type type, int rw)
280{ 329{
281 if (has_merged_page(sbi, inode, page, ino, type)) 330 if (has_merged_page(sbi, inode, ino, idx, type))
282 __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw); 331 __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
283} 332}
284 333
285void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) 334void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -315,13 +364,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
315 return 0; 364 return 0;
316} 365}
317 366
318void f2fs_submit_page_mbio(struct f2fs_io_info *fio) 367int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
319{ 368{
320 struct f2fs_sb_info *sbi = fio->sbi; 369 struct f2fs_sb_info *sbi = fio->sbi;
321 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 370 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
322 struct f2fs_bio_info *io; 371 struct f2fs_bio_info *io;
323 bool is_read = is_read_io(fio->op); 372 bool is_read = is_read_io(fio->op);
324 struct page *bio_page; 373 struct page *bio_page;
374 int err = 0;
325 375
326 io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 376 io = is_read ? &sbi->read_io : &sbi->write_io[btype];
327 377
@@ -331,6 +381,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
331 381
332 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; 382 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
333 383
384 /* set submitted = 1 as a return value */
385 fio->submitted = 1;
386
334 if (!is_read) 387 if (!is_read)
335 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 388 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
336 389
@@ -342,6 +395,13 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
342 __submit_merged_bio(io); 395 __submit_merged_bio(io);
343alloc_new: 396alloc_new:
344 if (io->bio == NULL) { 397 if (io->bio == NULL) {
398 if ((fio->type == DATA || fio->type == NODE) &&
399 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
400 err = -EAGAIN;
401 if (!is_read)
402 dec_page_count(sbi, WB_DATA_TYPE(bio_page));
403 goto out_fail;
404 }
345 io->bio = __bio_alloc(sbi, fio->new_blkaddr, 405 io->bio = __bio_alloc(sbi, fio->new_blkaddr,
346 BIO_MAX_PAGES, is_read); 406 BIO_MAX_PAGES, is_read);
347 io->fio = *fio; 407 io->fio = *fio;
@@ -355,9 +415,10 @@ alloc_new:
355 415
356 io->last_block_in_bio = fio->new_blkaddr; 416 io->last_block_in_bio = fio->new_blkaddr;
357 f2fs_trace_ios(fio, 0); 417 f2fs_trace_ios(fio, 0);
358 418out_fail:
359 up_write(&io->io_rwsem); 419 up_write(&io->io_rwsem);
360 trace_f2fs_submit_page_mbio(fio->page, fio); 420 trace_f2fs_submit_page_mbio(fio->page, fio);
421 return err;
361} 422}
362 423
363static void __set_data_blkaddr(struct dnode_of_data *dn) 424static void __set_data_blkaddr(struct dnode_of_data *dn)
@@ -453,7 +514,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
453 514
454int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) 515int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
455{ 516{
456 struct extent_info ei; 517 struct extent_info ei = {0,0,0};
457 struct inode *inode = dn->inode; 518 struct inode *inode = dn->inode;
458 519
459 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 520 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -470,7 +531,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
470 struct address_space *mapping = inode->i_mapping; 531 struct address_space *mapping = inode->i_mapping;
471 struct dnode_of_data dn; 532 struct dnode_of_data dn;
472 struct page *page; 533 struct page *page;
473 struct extent_info ei; 534 struct extent_info ei = {0,0,0};
474 int err; 535 int err;
475 struct f2fs_io_info fio = { 536 struct f2fs_io_info fio = {
476 .sbi = F2FS_I_SB(inode), 537 .sbi = F2FS_I_SB(inode),
@@ -694,6 +755,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
694 struct f2fs_map_blocks map; 755 struct f2fs_map_blocks map;
695 int err = 0; 756 int err = 0;
696 757
758 if (is_inode_flag_set(inode, FI_NO_PREALLOC))
759 return 0;
760
697 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 761 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
698 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); 762 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
699 if (map.m_len > map.m_lblk) 763 if (map.m_len > map.m_lblk)
@@ -742,7 +806,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
742 int err = 0, ofs = 1; 806 int err = 0, ofs = 1;
743 unsigned int ofs_in_node, last_ofs_in_node; 807 unsigned int ofs_in_node, last_ofs_in_node;
744 blkcnt_t prealloc; 808 blkcnt_t prealloc;
745 struct extent_info ei; 809 struct extent_info ei = {0,0,0};
746 block_t blkaddr; 810 block_t blkaddr;
747 811
748 if (!maxblocks) 812 if (!maxblocks)
@@ -806,7 +870,7 @@ next_block:
806 } 870 }
807 if (err) 871 if (err)
808 goto sync_out; 872 goto sync_out;
809 map->m_flags = F2FS_MAP_NEW; 873 map->m_flags |= F2FS_MAP_NEW;
810 blkaddr = dn.data_blkaddr; 874 blkaddr = dn.data_blkaddr;
811 } else { 875 } else {
812 if (flag == F2FS_GET_BLOCK_BMAP) { 876 if (flag == F2FS_GET_BLOCK_BMAP) {
@@ -906,7 +970,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
906 if (!err) { 970 if (!err) {
907 map_bh(bh, inode->i_sb, map.m_pblk); 971 map_bh(bh, inode->i_sb, map.m_pblk);
908 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; 972 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
909 bh->b_size = map.m_len << inode->i_blkbits; 973 bh->b_size = (u64)map.m_len << inode->i_blkbits;
910 } 974 }
911 return err; 975 return err;
912} 976}
@@ -1088,7 +1152,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
1088 1152
1089 prefetchw(&page->flags); 1153 prefetchw(&page->flags);
1090 if (pages) { 1154 if (pages) {
1091 page = list_entry(pages->prev, struct page, lru); 1155 page = list_last_entry(pages, struct page, lru);
1092 list_del(&page->lru); 1156 list_del(&page->lru);
1093 if (add_to_page_cache_lru(page, mapping, 1157 if (add_to_page_cache_lru(page, mapping,
1094 page->index, 1158 page->index,
@@ -1207,7 +1271,7 @@ static int f2fs_read_data_pages(struct file *file,
1207 struct list_head *pages, unsigned nr_pages) 1271 struct list_head *pages, unsigned nr_pages)
1208{ 1272{
1209 struct inode *inode = file->f_mapping->host; 1273 struct inode *inode = file->f_mapping->host;
1210 struct page *page = list_entry(pages->prev, struct page, lru); 1274 struct page *page = list_last_entry(pages, struct page, lru);
1211 1275
1212 trace_f2fs_readpages(inode, page, nr_pages); 1276 trace_f2fs_readpages(inode, page, nr_pages);
1213 1277
@@ -1288,8 +1352,8 @@ out_writepage:
1288 return err; 1352 return err;
1289} 1353}
1290 1354
1291static int f2fs_write_data_page(struct page *page, 1355static int __write_data_page(struct page *page, bool *submitted,
1292 struct writeback_control *wbc) 1356 struct writeback_control *wbc)
1293{ 1357{
1294 struct inode *inode = page->mapping->host; 1358 struct inode *inode = page->mapping->host;
1295 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1359 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1307,6 +1371,7 @@ static int f2fs_write_data_page(struct page *page,
1307 .op_flags = wbc_to_write_flags(wbc), 1371 .op_flags = wbc_to_write_flags(wbc),
1308 .page = page, 1372 .page = page,
1309 .encrypted_page = NULL, 1373 .encrypted_page = NULL,
1374 .submitted = false,
1310 }; 1375 };
1311 1376
1312 trace_f2fs_writepage(page, DATA); 1377 trace_f2fs_writepage(page, DATA);
@@ -1352,9 +1417,12 @@ write:
1352 goto redirty_out; 1417 goto redirty_out;
1353 1418
1354 err = -EAGAIN; 1419 err = -EAGAIN;
1355 f2fs_lock_op(sbi); 1420 if (f2fs_has_inline_data(inode)) {
1356 if (f2fs_has_inline_data(inode))
1357 err = f2fs_write_inline_data(inode, page); 1421 err = f2fs_write_inline_data(inode, page);
1422 if (!err)
1423 goto out;
1424 }
1425 f2fs_lock_op(sbi);
1358 if (err == -EAGAIN) 1426 if (err == -EAGAIN)
1359 err = do_write_data_page(&fio); 1427 err = do_write_data_page(&fio);
1360 if (F2FS_I(inode)->last_disk_size < psize) 1428 if (F2FS_I(inode)->last_disk_size < psize)
@@ -1370,15 +1438,22 @@ out:
1370 ClearPageUptodate(page); 1438 ClearPageUptodate(page);
1371 1439
1372 if (wbc->for_reclaim) { 1440 if (wbc->for_reclaim) {
1373 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE); 1441 f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
1442 DATA, WRITE);
1374 remove_dirty_inode(inode); 1443 remove_dirty_inode(inode);
1444 submitted = NULL;
1375 } 1445 }
1376 1446
1377 unlock_page(page); 1447 unlock_page(page);
1378 f2fs_balance_fs(sbi, need_balance_fs); 1448 f2fs_balance_fs(sbi, need_balance_fs);
1379 1449
1380 if (unlikely(f2fs_cp_error(sbi))) 1450 if (unlikely(f2fs_cp_error(sbi))) {
1381 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1451 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1452 submitted = NULL;
1453 }
1454
1455 if (submitted)
1456 *submitted = fio.submitted;
1382 1457
1383 return 0; 1458 return 0;
1384 1459
@@ -1390,6 +1465,12 @@ redirty_out:
1390 return err; 1465 return err;
1391} 1466}
1392 1467
1468static int f2fs_write_data_page(struct page *page,
1469 struct writeback_control *wbc)
1470{
1471 return __write_data_page(page, NULL, wbc);
1472}
1473
1393/* 1474/*
1394 * This function was copied from write_cche_pages from mm/page-writeback.c. 1475 * This function was copied from write_cche_pages from mm/page-writeback.c.
1395 * The major change is making write step of cold data page separately from 1476 * The major change is making write step of cold data page separately from
@@ -1406,10 +1487,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
1406 pgoff_t index; 1487 pgoff_t index;
1407 pgoff_t end; /* Inclusive */ 1488 pgoff_t end; /* Inclusive */
1408 pgoff_t done_index; 1489 pgoff_t done_index;
1490 pgoff_t last_idx = ULONG_MAX;
1409 int cycled; 1491 int cycled;
1410 int range_whole = 0; 1492 int range_whole = 0;
1411 int tag; 1493 int tag;
1412 int nwritten = 0;
1413 1494
1414 pagevec_init(&pvec, 0); 1495 pagevec_init(&pvec, 0);
1415 1496
@@ -1446,6 +1527,7 @@ retry:
1446 1527
1447 for (i = 0; i < nr_pages; i++) { 1528 for (i = 0; i < nr_pages; i++) {
1448 struct page *page = pvec.pages[i]; 1529 struct page *page = pvec.pages[i];
1530 bool submitted = false;
1449 1531
1450 if (page->index > end) { 1532 if (page->index > end) {
1451 done = 1; 1533 done = 1;
@@ -1479,7 +1561,7 @@ continue_unlock:
1479 if (!clear_page_dirty_for_io(page)) 1561 if (!clear_page_dirty_for_io(page))
1480 goto continue_unlock; 1562 goto continue_unlock;
1481 1563
1482 ret = mapping->a_ops->writepage(page, wbc); 1564 ret = __write_data_page(page, &submitted, wbc);
1483 if (unlikely(ret)) { 1565 if (unlikely(ret)) {
1484 /* 1566 /*
1485 * keep nr_to_write, since vfs uses this to 1567 * keep nr_to_write, since vfs uses this to
@@ -1493,8 +1575,8 @@ continue_unlock:
1493 done_index = page->index + 1; 1575 done_index = page->index + 1;
1494 done = 1; 1576 done = 1;
1495 break; 1577 break;
1496 } else { 1578 } else if (submitted) {
1497 nwritten++; 1579 last_idx = page->index;
1498 } 1580 }
1499 1581
1500 if (--wbc->nr_to_write <= 0 && 1582 if (--wbc->nr_to_write <= 0 &&
@@ -1516,9 +1598,9 @@ continue_unlock:
1516 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1598 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1517 mapping->writeback_index = done_index; 1599 mapping->writeback_index = done_index;
1518 1600
1519 if (nwritten) 1601 if (last_idx != ULONG_MAX)
1520 f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, 1602 f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
1521 NULL, 0, DATA, WRITE); 1603 0, last_idx, DATA, WRITE);
1522 1604
1523 return ret; 1605 return ret;
1524} 1606}
@@ -1591,14 +1673,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
1591 struct dnode_of_data dn; 1673 struct dnode_of_data dn;
1592 struct page *ipage; 1674 struct page *ipage;
1593 bool locked = false; 1675 bool locked = false;
1594 struct extent_info ei; 1676 struct extent_info ei = {0,0,0};
1595 int err = 0; 1677 int err = 0;
1596 1678
1597 /* 1679 /*
1598 * we already allocated all the blocks, so we don't need to get 1680 * we already allocated all the blocks, so we don't need to get
1599 * the block addresses when there is no need to fill the page. 1681 * the block addresses when there is no need to fill the page.
1600 */ 1682 */
1601 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE) 1683 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
1684 !is_inode_flag_set(inode, FI_NO_PREALLOC))
1602 return 0; 1685 return 0;
1603 1686
1604 if (f2fs_has_inline_data(inode) || 1687 if (f2fs_has_inline_data(inode) ||
@@ -1682,7 +1765,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1682 goto fail; 1765 goto fail;
1683 } 1766 }
1684repeat: 1767repeat:
1685 page = grab_cache_page_write_begin(mapping, index, flags); 1768 /*
1769 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
1770 * wait_for_stable_page. Will wait that below with our IO control.
1771 */
1772 page = pagecache_get_page(mapping, index,
1773 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
1686 if (!page) { 1774 if (!page) {
1687 err = -ENOMEM; 1775 err = -ENOMEM;
1688 goto fail; 1776 goto fail;
@@ -1715,6 +1803,11 @@ repeat:
1715 if (len == PAGE_SIZE || PageUptodate(page)) 1803 if (len == PAGE_SIZE || PageUptodate(page))
1716 return 0; 1804 return 0;
1717 1805
1806 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
1807 zero_user_segment(page, len, PAGE_SIZE);
1808 return 0;
1809 }
1810
1718 if (blkaddr == NEW_ADDR) { 1811 if (blkaddr == NEW_ADDR) {
1719 zero_user_segment(page, 0, PAGE_SIZE); 1812 zero_user_segment(page, 0, PAGE_SIZE);
1720 SetPageUptodate(page); 1813 SetPageUptodate(page);
@@ -1768,7 +1861,7 @@ static int f2fs_write_end(struct file *file,
1768 * let generic_perform_write() try to copy data again through copied=0. 1861 * let generic_perform_write() try to copy data again through copied=0.
1769 */ 1862 */
1770 if (!PageUptodate(page)) { 1863 if (!PageUptodate(page)) {
1771 if (unlikely(copied != PAGE_SIZE)) 1864 if (unlikely(copied != len))
1772 copied = 0; 1865 copied = 0;
1773 else 1866 else
1774 SetPageUptodate(page); 1867 SetPageUptodate(page);
@@ -1917,7 +2010,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
1917 if (!PageUptodate(page)) 2010 if (!PageUptodate(page))
1918 SetPageUptodate(page); 2011 SetPageUptodate(page);
1919 2012
1920 if (f2fs_is_atomic_file(inode)) { 2013 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
1921 if (!IS_ATOMIC_WRITTEN_PAGE(page)) { 2014 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
1922 register_inmem_page(inode, page); 2015 register_inmem_page(inode, page);
1923 return 1; 2016 return 1;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..a77df377e2e8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
53 si->aw_cnt = atomic_read(&sbi->aw_cnt);
54 si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
53 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); 55 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
54 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); 56 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
57 if (SM_I(sbi) && SM_I(sbi)->fcc_info)
58 si->nr_flush =
59 atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
60 if (SM_I(sbi) && SM_I(sbi)->dcc_info)
61 si->nr_discard =
62 atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
55 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 63 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
56 si->rsvd_segs = reserved_segments(sbi); 64 si->rsvd_segs = reserved_segments(sbi);
57 si->overp_segs = overprovision_segments(sbi); 65 si->overp_segs = overprovision_segments(sbi);
@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
62 si->inline_xattr = atomic_read(&sbi->inline_xattr); 70 si->inline_xattr = atomic_read(&sbi->inline_xattr);
63 si->inline_inode = atomic_read(&sbi->inline_inode); 71 si->inline_inode = atomic_read(&sbi->inline_inode);
64 si->inline_dir = atomic_read(&sbi->inline_dir); 72 si->inline_dir = atomic_read(&sbi->inline_dir);
73 si->append = sbi->im[APPEND_INO].ino_num;
74 si->update = sbi->im[UPDATE_INO].ino_num;
65 si->orphans = sbi->im[ORPHAN_INO].ino_num; 75 si->orphans = sbi->im[ORPHAN_INO].ino_num;
66 si->utilization = utilization(sbi); 76 si->utilization = utilization(sbi);
67 77
@@ -183,6 +193,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
183 /* build nm */ 193 /* build nm */
184 si->base_mem += sizeof(struct f2fs_nm_info); 194 si->base_mem += sizeof(struct f2fs_nm_info);
185 si->base_mem += __bitmap_size(sbi, NAT_BITMAP); 195 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
196 si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
197 si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
198 si->base_mem += NM_I(sbi)->nat_blocks / 8;
186 199
187get_cache: 200get_cache:
188 si->cache_mem = 0; 201 si->cache_mem = 0;
@@ -192,8 +205,10 @@ get_cache:
192 si->cache_mem += sizeof(struct f2fs_gc_kthread); 205 si->cache_mem += sizeof(struct f2fs_gc_kthread);
193 206
194 /* build merge flush thread */ 207 /* build merge flush thread */
195 if (SM_I(sbi)->cmd_control_info) 208 if (SM_I(sbi)->fcc_info)
196 si->cache_mem += sizeof(struct flush_cmd_control); 209 si->cache_mem += sizeof(struct flush_cmd_control);
210 if (SM_I(sbi)->dcc_info)
211 si->cache_mem += sizeof(struct discard_cmd_control);
197 212
198 /* free nids */ 213 /* free nids */
199 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + 214 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -254,8 +269,8 @@ static int stat_show(struct seq_file *s, void *v)
254 si->inline_inode); 269 si->inline_inode);
255 seq_printf(s, " - Inline_dentry Inode: %u\n", 270 seq_printf(s, " - Inline_dentry Inode: %u\n",
256 si->inline_dir); 271 si->inline_dir);
257 seq_printf(s, " - Orphan Inode: %u\n", 272 seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
258 si->orphans); 273 si->orphans, si->append, si->update);
259 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 274 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
260 si->main_area_segs, si->main_area_sections, 275 si->main_area_segs, si->main_area_sections,
261 si->main_area_zones); 276 si->main_area_zones);
@@ -314,8 +329,11 @@ static int stat_show(struct seq_file *s, void *v)
314 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 329 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
315 si->ext_tree, si->zombie_tree, si->ext_node); 330 si->ext_tree, si->zombie_tree, si->ext_node);
316 seq_puts(s, "\nBalancing F2FS Async:\n"); 331 seq_puts(s, "\nBalancing F2FS Async:\n");
317 seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n", 332 seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
318 si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data); 333 si->nr_wb_cp_data, si->nr_wb_data,
334 si->nr_flush, si->nr_discard);
335 seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
336 si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
319 seq_printf(s, " - nodes: %4d in %4d\n", 337 seq_printf(s, " - nodes: %4d in %4d\n",
320 si->ndirty_node, si->node_pages); 338 si->ndirty_node, si->node_pages);
321 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", 339 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -414,6 +432,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
414 atomic_set(&sbi->inline_dir, 0); 432 atomic_set(&sbi->inline_dir, 0);
415 atomic_set(&sbi->inplace_count, 0); 433 atomic_set(&sbi->inplace_count, 0);
416 434
435 atomic_set(&sbi->aw_cnt, 0);
436 atomic_set(&sbi->max_aw_cnt, 0);
437
417 mutex_lock(&f2fs_stat_mutex); 438 mutex_lock(&f2fs_stat_mutex);
418 list_add_tail(&si->stat_list, &f2fs_stat_list); 439 list_add_tail(&si->stat_list, &f2fs_stat_list);
419 mutex_unlock(&f2fs_stat_mutex); 440 mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 18607fc5240d..4650c9b85de7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
207 f2fs_put_page(dentry_page, 0); 207 f2fs_put_page(dentry_page, 0);
208 } 208 }
209 209
210 if (!de && room && F2FS_I(dir)->chash != namehash) { 210 /* This is to increase the speed of f2fs_create */
211 F2FS_I(dir)->chash = namehash; 211 if (!de && room) {
212 F2FS_I(dir)->clevel = level; 212 F2FS_I(dir)->task = current;
213 if (F2FS_I(dir)->chash != namehash) {
214 F2FS_I(dir)->chash = namehash;
215 F2FS_I(dir)->clevel = level;
216 }
213 } 217 }
214 218
215 return de; 219 return de;
@@ -548,8 +552,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
548 552
549start: 553start:
550#ifdef CONFIG_F2FS_FAULT_INJECTION 554#ifdef CONFIG_F2FS_FAULT_INJECTION
551 if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) 555 if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
556 f2fs_show_injection_info(FAULT_DIR_DEPTH);
552 return -ENOSPC; 557 return -ENOSPC;
558 }
553#endif 559#endif
554 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) 560 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
555 return -ENOSPC; 561 return -ENOSPC;
@@ -646,14 +652,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
646 struct inode *inode, nid_t ino, umode_t mode) 652 struct inode *inode, nid_t ino, umode_t mode)
647{ 653{
648 struct fscrypt_name fname; 654 struct fscrypt_name fname;
655 struct page *page = NULL;
656 struct f2fs_dir_entry *de = NULL;
649 int err; 657 int err;
650 658
651 err = fscrypt_setup_filename(dir, name, 0, &fname); 659 err = fscrypt_setup_filename(dir, name, 0, &fname);
652 if (err) 660 if (err)
653 return err; 661 return err;
654 662
655 err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); 663 /*
656 664 * An immature stakable filesystem shows a race condition between lookup
665 * and create. If we have same task when doing lookup and create, it's
666 * definitely fine as expected by VFS normally. Otherwise, let's just
667 * verify on-disk dentry one more time, which guarantees filesystem
668 * consistency more.
669 */
670 if (current != F2FS_I(dir)->task) {
671 de = __f2fs_find_entry(dir, &fname, &page);
672 F2FS_I(dir)->task = NULL;
673 }
674 if (de) {
675 f2fs_dentry_kunmap(dir, page);
676 f2fs_put_page(page, 0);
677 err = -EEXIST;
678 } else if (IS_ERR(page)) {
679 err = PTR_ERR(page);
680 } else {
681 err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
682 }
657 fscrypt_free_filename(&fname); 683 fscrypt_free_filename(&fname);
658 return err; 684 return err;
659} 685}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 4db44da7ef69..c6934f014e0f 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
77 struct extent_tree *et; 77 struct extent_tree *et;
78 nid_t ino = inode->i_ino; 78 nid_t ino = inode->i_ino;
79 79
80 down_write(&sbi->extent_tree_lock); 80 mutex_lock(&sbi->extent_tree_lock);
81 et = radix_tree_lookup(&sbi->extent_tree_root, ino); 81 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
82 if (!et) { 82 if (!et) {
83 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); 83 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
94 atomic_dec(&sbi->total_zombie_tree); 94 atomic_dec(&sbi->total_zombie_tree);
95 list_del_init(&et->list); 95 list_del_init(&et->list);
96 } 96 }
97 up_write(&sbi->extent_tree_lock); 97 mutex_unlock(&sbi->extent_tree_lock);
98 98
99 /* never died until evict_inode */ 99 /* never died until evict_inode */
100 F2FS_I(inode)->extent_tree = et; 100 F2FS_I(inode)->extent_tree = et;
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
311 tmp_node = parent; 311 tmp_node = parent;
312 if (parent && fofs > en->ei.fofs) 312 if (parent && fofs > en->ei.fofs)
313 tmp_node = rb_next(parent); 313 tmp_node = rb_next(parent);
314 *next_ex = tmp_node ? 314 *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
315 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
316 315
317 tmp_node = parent; 316 tmp_node = parent;
318 if (parent && fofs < en->ei.fofs) 317 if (parent && fofs < en->ei.fofs)
319 tmp_node = rb_prev(parent); 318 tmp_node = rb_prev(parent);
320 *prev_ex = tmp_node ? 319 *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
321 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
322 return NULL; 320 return NULL;
323 321
324lookup_neighbors: 322lookup_neighbors:
325 if (fofs == en->ei.fofs) { 323 if (fofs == en->ei.fofs) {
326 /* lookup prev node for merging backward later */ 324 /* lookup prev node for merging backward later */
327 tmp_node = rb_prev(&en->rb_node); 325 tmp_node = rb_prev(&en->rb_node);
328 *prev_ex = tmp_node ? 326 *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
329 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
330 } 327 }
331 if (fofs == en->ei.fofs + en->ei.len - 1) { 328 if (fofs == en->ei.fofs + en->ei.len - 1) {
332 /* lookup next node for merging frontward later */ 329 /* lookup next node for merging frontward later */
333 tmp_node = rb_next(&en->rb_node); 330 tmp_node = rb_next(&en->rb_node);
334 *next_ex = tmp_node ? 331 *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
335 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
336 } 332 }
337 return en; 333 return en;
338} 334}
@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
352 } 348 }
353 349
354 if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { 350 if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
355 if (en)
356 __release_extent_node(sbi, et, prev_ex);
357 next_ex->ei.fofs = ei->fofs; 351 next_ex->ei.fofs = ei->fofs;
358 next_ex->ei.blk = ei->blk; 352 next_ex->ei.blk = ei->blk;
359 next_ex->ei.len += ei->len; 353 next_ex->ei.len += ei->len;
354 if (en)
355 __release_extent_node(sbi, et, prev_ex);
356
360 en = next_ex; 357 en = next_ex;
361 } 358 }
362 359
@@ -416,7 +413,7 @@ do_insert:
416 return en; 413 return en;
417} 414}
418 415
419static unsigned int f2fs_update_extent_tree_range(struct inode *inode, 416static void f2fs_update_extent_tree_range(struct inode *inode,
420 pgoff_t fofs, block_t blkaddr, unsigned int len) 417 pgoff_t fofs, block_t blkaddr, unsigned int len)
421{ 418{
422 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 419 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -429,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
429 unsigned int pos = (unsigned int)fofs; 426 unsigned int pos = (unsigned int)fofs;
430 427
431 if (!et) 428 if (!et)
432 return false; 429 return;
433 430
434 trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); 431 trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
435 432
@@ -437,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
437 434
438 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 435 if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
439 write_unlock(&et->lock); 436 write_unlock(&et->lock);
440 return false; 437 return;
441 } 438 }
442 439
443 prev = et->largest; 440 prev = et->largest;
@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
492 if (!next_en) { 489 if (!next_en) {
493 struct rb_node *node = rb_next(&en->rb_node); 490 struct rb_node *node = rb_next(&en->rb_node);
494 491
495 next_en = node ? 492 next_en = rb_entry_safe(node, struct extent_node,
496 rb_entry(node, struct extent_node, rb_node) 493 rb_node);
497 : NULL;
498 } 494 }
499 495
500 if (parts) 496 if (parts)
@@ -535,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
535 __free_extent_tree(sbi, et); 531 __free_extent_tree(sbi, et);
536 532
537 write_unlock(&et->lock); 533 write_unlock(&et->lock);
538
539 return !__is_extent_same(&prev, &et->largest);
540} 534}
541 535
542unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 536unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -552,7 +546,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
552 if (!atomic_read(&sbi->total_zombie_tree)) 546 if (!atomic_read(&sbi->total_zombie_tree))
553 goto free_node; 547 goto free_node;
554 548
555 if (!down_write_trylock(&sbi->extent_tree_lock)) 549 if (!mutex_trylock(&sbi->extent_tree_lock))
556 goto out; 550 goto out;
557 551
558 /* 1. remove unreferenced extent tree */ 552 /* 1. remove unreferenced extent tree */
@@ -574,11 +568,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
574 goto unlock_out; 568 goto unlock_out;
575 cond_resched(); 569 cond_resched();
576 } 570 }
577 up_write(&sbi->extent_tree_lock); 571 mutex_unlock(&sbi->extent_tree_lock);
578 572
579free_node: 573free_node:
580 /* 2. remove LRU extent entries */ 574 /* 2. remove LRU extent entries */
581 if (!down_write_trylock(&sbi->extent_tree_lock)) 575 if (!mutex_trylock(&sbi->extent_tree_lock))
582 goto out; 576 goto out;
583 577
584 remained = nr_shrink - (node_cnt + tree_cnt); 578 remained = nr_shrink - (node_cnt + tree_cnt);
@@ -608,7 +602,7 @@ free_node:
608 spin_unlock(&sbi->extent_lock); 602 spin_unlock(&sbi->extent_lock);
609 603
610unlock_out: 604unlock_out:
611 up_write(&sbi->extent_tree_lock); 605 mutex_unlock(&sbi->extent_tree_lock);
612out: 606out:
613 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); 607 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
614 608
@@ -655,10 +649,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
655 649
656 if (inode->i_nlink && !is_bad_inode(inode) && 650 if (inode->i_nlink && !is_bad_inode(inode) &&
657 atomic_read(&et->node_cnt)) { 651 atomic_read(&et->node_cnt)) {
658 down_write(&sbi->extent_tree_lock); 652 mutex_lock(&sbi->extent_tree_lock);
659 list_add_tail(&et->list, &sbi->zombie_list); 653 list_add_tail(&et->list, &sbi->zombie_list);
660 atomic_inc(&sbi->total_zombie_tree); 654 atomic_inc(&sbi->total_zombie_tree);
661 up_write(&sbi->extent_tree_lock); 655 mutex_unlock(&sbi->extent_tree_lock);
662 return; 656 return;
663 } 657 }
664 658
@@ -666,12 +660,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
666 node_cnt = f2fs_destroy_extent_node(inode); 660 node_cnt = f2fs_destroy_extent_node(inode);
667 661
668 /* delete extent tree entry in radix tree */ 662 /* delete extent tree entry in radix tree */
669 down_write(&sbi->extent_tree_lock); 663 mutex_lock(&sbi->extent_tree_lock);
670 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 664 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
671 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 665 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
672 kmem_cache_free(extent_tree_slab, et); 666 kmem_cache_free(extent_tree_slab, et);
673 atomic_dec(&sbi->total_ext_tree); 667 atomic_dec(&sbi->total_ext_tree);
674 up_write(&sbi->extent_tree_lock); 668 mutex_unlock(&sbi->extent_tree_lock);
675 669
676 F2FS_I(inode)->extent_tree = NULL; 670 F2FS_I(inode)->extent_tree = NULL;
677 671
@@ -718,7 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
718void init_extent_cache_info(struct f2fs_sb_info *sbi) 712void init_extent_cache_info(struct f2fs_sb_info *sbi)
719{ 713{
720 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); 714 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
721 init_rwsem(&sbi->extent_tree_lock); 715 mutex_init(&sbi->extent_tree_lock);
722 INIT_LIST_HEAD(&sbi->extent_list); 716 INIT_LIST_HEAD(&sbi->extent_list);
723 spin_lock_init(&sbi->extent_lock); 717 spin_lock_init(&sbi->extent_lock);
724 atomic_set(&sbi->total_ext_tree, 0); 718 atomic_set(&sbi->total_ext_tree, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 069fc7277d8d..d1483136fed6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -112,9 +112,9 @@ struct f2fs_mount_info {
112#define F2FS_HAS_FEATURE(sb, mask) \ 112#define F2FS_HAS_FEATURE(sb, mask) \
113 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) 113 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
114#define F2FS_SET_FEATURE(sb, mask) \ 114#define F2FS_SET_FEATURE(sb, mask) \
115 F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) 115 (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
116#define F2FS_CLEAR_FEATURE(sb, mask) \ 116#define F2FS_CLEAR_FEATURE(sb, mask) \
117 F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) 117 (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
118 118
119/* 119/*
120 * For checkpoint manager 120 * For checkpoint manager
@@ -132,11 +132,14 @@ enum {
132 CP_DISCARD, 132 CP_DISCARD,
133}; 133};
134 134
135#define DEF_BATCHED_TRIM_SECTIONS 2 135#define DEF_BATCHED_TRIM_SECTIONS 2048
136#define BATCHED_TRIM_SEGMENTS(sbi) \ 136#define BATCHED_TRIM_SEGMENTS(sbi) \
137 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) 137 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
138#define BATCHED_TRIM_BLOCKS(sbi) \ 138#define BATCHED_TRIM_BLOCKS(sbi) \
139 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 139 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
140#define MAX_DISCARD_BLOCKS(sbi) \
141 ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec)
142#define DISCARD_ISSUE_RATE 8
140#define DEF_CP_INTERVAL 60 /* 60 secs */ 143#define DEF_CP_INTERVAL 60 /* 60 secs */
141#define DEF_IDLE_INTERVAL 5 /* 5 secs */ 144#define DEF_IDLE_INTERVAL 5 /* 5 secs */
142 145
@@ -185,11 +188,30 @@ struct discard_entry {
185 int len; /* # of consecutive blocks of the discard */ 188 int len; /* # of consecutive blocks of the discard */
186}; 189};
187 190
188struct bio_entry { 191enum {
189 struct list_head list; 192 D_PREP,
190 struct bio *bio; 193 D_SUBMIT,
191 struct completion event; 194 D_DONE,
192 int error; 195};
196
197struct discard_cmd {
198 struct list_head list; /* command list */
199 struct completion wait; /* compleation */
200 block_t lstart; /* logical start address */
201 block_t len; /* length */
202 struct bio *bio; /* bio */
203 int state; /* state */
204};
205
206struct discard_cmd_control {
207 struct task_struct *f2fs_issue_discard; /* discard thread */
208 struct list_head discard_entry_list; /* 4KB discard entry list */
209 int nr_discards; /* # of discards in the list */
210 struct list_head discard_cmd_list; /* discard cmd list */
211 wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
212 struct mutex cmd_lock;
213 int max_discards; /* max. discards to be issued */
214 atomic_t submit_discard; /* # of issued discard */
193}; 215};
194 216
195/* for the list of fsync inodes, used only during recovery */ 217/* for the list of fsync inodes, used only during recovery */
@@ -214,6 +236,7 @@ struct fsync_inode_entry {
214static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i) 236static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
215{ 237{
216 int before = nats_in_cursum(journal); 238 int before = nats_in_cursum(journal);
239
217 journal->n_nats = cpu_to_le16(before + i); 240 journal->n_nats = cpu_to_le16(before + i);
218 return before; 241 return before;
219} 242}
@@ -221,6 +244,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
221static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i) 244static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
222{ 245{
223 int before = sits_in_cursum(journal); 246 int before = sits_in_cursum(journal);
247
224 journal->n_sits = cpu_to_le16(before + i); 248 journal->n_sits = cpu_to_le16(before + i);
225 return before; 249 return before;
226} 250}
@@ -306,12 +330,14 @@ static inline void make_dentry_ptr(struct inode *inode,
306 330
307 if (type == 1) { 331 if (type == 1) {
308 struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; 332 struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
333
309 d->max = NR_DENTRY_IN_BLOCK; 334 d->max = NR_DENTRY_IN_BLOCK;
310 d->bitmap = &t->dentry_bitmap; 335 d->bitmap = &t->dentry_bitmap;
311 d->dentry = t->dentry; 336 d->dentry = t->dentry;
312 d->filename = t->filename; 337 d->filename = t->filename;
313 } else { 338 } else {
314 struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; 339 struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
340
315 d->max = NR_INLINE_DENTRY; 341 d->max = NR_INLINE_DENTRY;
316 d->bitmap = &t->dentry_bitmap; 342 d->bitmap = &t->dentry_bitmap;
317 d->dentry = t->dentry; 343 d->dentry = t->dentry;
@@ -438,8 +464,8 @@ struct f2fs_inode_info {
438 atomic_t dirty_pages; /* # of dirty pages */ 464 atomic_t dirty_pages; /* # of dirty pages */
439 f2fs_hash_t chash; /* hash value of given file name */ 465 f2fs_hash_t chash; /* hash value of given file name */
440 unsigned int clevel; /* maximum level of given file name */ 466 unsigned int clevel; /* maximum level of given file name */
467 struct task_struct *task; /* lookup and create consistency */
441 nid_t i_xattr_nid; /* node id that contains xattrs */ 468 nid_t i_xattr_nid; /* node id that contains xattrs */
442 unsigned long long xattr_ver; /* cp version of xattr modification */
443 loff_t last_disk_size; /* lastly written file size */ 469 loff_t last_disk_size; /* lastly written file size */
444 470
445 struct list_head dirty_list; /* dirty list for dirs and files */ 471 struct list_head dirty_list; /* dirty list for dirs and files */
@@ -474,13 +500,6 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
474 ei->len = len; 500 ei->len = len;
475} 501}
476 502
477static inline bool __is_extent_same(struct extent_info *ei1,
478 struct extent_info *ei2)
479{
480 return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
481 ei1->len == ei2->len);
482}
483
484static inline bool __is_extent_mergeable(struct extent_info *back, 503static inline bool __is_extent_mergeable(struct extent_info *back,
485 struct extent_info *front) 504 struct extent_info *front)
486{ 505{
@@ -500,7 +519,7 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
500 return __is_extent_mergeable(cur, front); 519 return __is_extent_mergeable(cur, front);
501} 520}
502 521
503extern void f2fs_mark_inode_dirty_sync(struct inode *, bool); 522extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
504static inline void __try_update_largest_extent(struct inode *inode, 523static inline void __try_update_largest_extent(struct inode *inode,
505 struct extent_tree *et, struct extent_node *en) 524 struct extent_tree *et, struct extent_node *en)
506{ 525{
@@ -532,6 +551,7 @@ struct f2fs_nm_info {
532 struct list_head nat_entries; /* cached nat entry list (clean) */ 551 struct list_head nat_entries; /* cached nat entry list (clean) */
533 unsigned int nat_cnt; /* the # of cached nat entries */ 552 unsigned int nat_cnt; /* the # of cached nat entries */
534 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 553 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
554 unsigned int nat_blocks; /* # of nat blocks */
535 555
536 /* free node ids management */ 556 /* free node ids management */
537 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 557 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -539,9 +559,19 @@ struct f2fs_nm_info {
539 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ 559 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
540 spinlock_t nid_list_lock; /* protect nid lists ops */ 560 spinlock_t nid_list_lock; /* protect nid lists ops */
541 struct mutex build_lock; /* lock for build free nids */ 561 struct mutex build_lock; /* lock for build free nids */
562 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
563 unsigned char *nat_block_bitmap;
542 564
543 /* for checkpoint */ 565 /* for checkpoint */
544 char *nat_bitmap; /* NAT bitmap pointer */ 566 char *nat_bitmap; /* NAT bitmap pointer */
567
568 unsigned int nat_bits_blocks; /* # of nat bits blocks */
569 unsigned char *nat_bits; /* NAT bits blocks */
570 unsigned char *full_nat_bits; /* full NAT pages */
571 unsigned char *empty_nat_bits; /* empty NAT pages */
572#ifdef CONFIG_F2FS_CHECK_FS
573 char *nat_bitmap_mir; /* NAT bitmap mirror */
574#endif
545 int bitmap_size; /* bitmap size */ 575 int bitmap_size; /* bitmap size */
546}; 576};
547 577
@@ -632,12 +662,6 @@ struct f2fs_sm_info {
632 /* a threshold to reclaim prefree segments */ 662 /* a threshold to reclaim prefree segments */
633 unsigned int rec_prefree_segments; 663 unsigned int rec_prefree_segments;
634 664
635 /* for small discard management */
636 struct list_head discard_list; /* 4KB discard list */
637 struct list_head wait_list; /* linked with issued discard bio */
638 int nr_discards; /* # of discards in the list */
639 int max_discards; /* max. discards to be issued */
640
641 /* for batched trimming */ 665 /* for batched trimming */
642 unsigned int trim_sections; /* # of sections to trim */ 666 unsigned int trim_sections; /* # of sections to trim */
643 667
@@ -648,8 +672,10 @@ struct f2fs_sm_info {
648 unsigned int min_fsync_blocks; /* threshold for fsync */ 672 unsigned int min_fsync_blocks; /* threshold for fsync */
649 673
650 /* for flush command control */ 674 /* for flush command control */
651 struct flush_cmd_control *cmd_control_info; 675 struct flush_cmd_control *fcc_info;
652 676
677 /* for discard command control */
678 struct discard_cmd_control *dcc_info;
653}; 679};
654 680
655/* 681/*
@@ -708,6 +734,7 @@ struct f2fs_io_info {
708 block_t old_blkaddr; /* old block address before Cow */ 734 block_t old_blkaddr; /* old block address before Cow */
709 struct page *page; /* page to be written */ 735 struct page *page; /* page to be written */
710 struct page *encrypted_page; /* encrypted page */ 736 struct page *encrypted_page; /* encrypted page */
737 bool submitted; /* indicate IO submission */
711}; 738};
712 739
713#define is_read_io(rw) (rw == READ) 740#define is_read_io(rw) (rw == READ)
@@ -787,6 +814,8 @@ struct f2fs_sb_info {
787 struct f2fs_bio_info read_io; /* for read bios */ 814 struct f2fs_bio_info read_io; /* for read bios */
788 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 815 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
789 struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ 816 struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */
817 int write_io_size_bits; /* Write IO size bits */
818 mempool_t *write_io_dummy; /* Dummy pages */
790 819
791 /* for checkpoint */ 820 /* for checkpoint */
792 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 821 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -811,7 +840,7 @@ struct f2fs_sb_info {
811 840
812 /* for extent tree cache */ 841 /* for extent tree cache */
813 struct radix_tree_root extent_tree_root;/* cache extent cache entries */ 842 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
814 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ 843 struct mutex extent_tree_lock; /* locking extent radix tree */
815 struct list_head extent_list; /* lru list for shrinker */ 844 struct list_head extent_list; /* lru list for shrinker */
816 spinlock_t extent_lock; /* locking extent lru list */ 845 spinlock_t extent_lock; /* locking extent lru list */
817 atomic_t total_ext_tree; /* extent tree count */ 846 atomic_t total_ext_tree; /* extent tree count */
@@ -858,6 +887,9 @@ struct f2fs_sb_info {
858 struct f2fs_gc_kthread *gc_thread; /* GC thread */ 887 struct f2fs_gc_kthread *gc_thread; /* GC thread */
859 unsigned int cur_victim_sec; /* current victim section num */ 888 unsigned int cur_victim_sec; /* current victim section num */
860 889
890 /* threshold for converting bg victims for fg */
891 u64 fggc_threshold;
892
861 /* maximum # of trials to find a victim segment for SSR and GC */ 893 /* maximum # of trials to find a victim segment for SSR and GC */
862 unsigned int max_victim_search; 894 unsigned int max_victim_search;
863 895
@@ -877,6 +909,8 @@ struct f2fs_sb_info {
877 atomic_t inline_xattr; /* # of inline_xattr inodes */ 909 atomic_t inline_xattr; /* # of inline_xattr inodes */
878 atomic_t inline_inode; /* # of inline_data inodes */ 910 atomic_t inline_inode; /* # of inline_data inodes */
879 atomic_t inline_dir; /* # of inline_dentry inodes */ 911 atomic_t inline_dir; /* # of inline_dentry inodes */
912 atomic_t aw_cnt; /* # of atomic writes */
913 atomic_t max_aw_cnt; /* max # of atomic writes */
880 int bg_gc; /* background gc calls */ 914 int bg_gc; /* background gc calls */
881 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ 915 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
882#endif 916#endif
@@ -908,6 +942,10 @@ struct f2fs_sb_info {
908}; 942};
909 943
910#ifdef CONFIG_F2FS_FAULT_INJECTION 944#ifdef CONFIG_F2FS_FAULT_INJECTION
945#define f2fs_show_injection_info(type) \
946 printk("%sF2FS-fs : inject %s in %s of %pF\n", \
947 KERN_INFO, fault_name[type], \
948 __func__, __builtin_return_address(0))
911static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) 949static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
912{ 950{
913 struct f2fs_fault_info *ffi = &sbi->fault_info; 951 struct f2fs_fault_info *ffi = &sbi->fault_info;
@@ -921,10 +959,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
921 atomic_inc(&ffi->inject_ops); 959 atomic_inc(&ffi->inject_ops);
922 if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { 960 if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
923 atomic_set(&ffi->inject_ops, 0); 961 atomic_set(&ffi->inject_ops, 0);
924 printk("%sF2FS-fs : inject %s in %pF\n",
925 KERN_INFO,
926 fault_name[type],
927 __builtin_return_address(0));
928 return true; 962 return true;
929 } 963 }
930 return false; 964 return false;
@@ -1089,6 +1123,12 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
1089 return le64_to_cpu(cp->checkpoint_ver); 1123 return le64_to_cpu(cp->checkpoint_ver);
1090} 1124}
1091 1125
1126static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
1127{
1128 size_t crc_offset = le32_to_cpu(cp->checksum_offset);
1129 return le32_to_cpu(*((__le32 *)((unsigned char *)cp + crc_offset)));
1130}
1131
1092static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) 1132static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
1093{ 1133{
1094 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); 1134 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
@@ -1133,6 +1173,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
1133 spin_unlock(&sbi->cp_lock); 1173 spin_unlock(&sbi->cp_lock);
1134} 1174}
1135 1175
1176static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
1177{
1178 set_sbi_flag(sbi, SBI_NEED_FSCK);
1179
1180 if (lock)
1181 spin_lock(&sbi->cp_lock);
1182 __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
1183 kfree(NM_I(sbi)->nat_bits);
1184 NM_I(sbi)->nat_bits = NULL;
1185 if (lock)
1186 spin_unlock(&sbi->cp_lock);
1187}
1188
1189static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
1190 struct cp_control *cpc)
1191{
1192 bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
1193
1194 return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
1195}
1196
1136static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) 1197static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
1137{ 1198{
1138 down_read(&sbi->cp_rwsem); 1199 down_read(&sbi->cp_rwsem);
@@ -1212,8 +1273,10 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
1212 blkcnt_t diff; 1273 blkcnt_t diff;
1213 1274
1214#ifdef CONFIG_F2FS_FAULT_INJECTION 1275#ifdef CONFIG_F2FS_FAULT_INJECTION
1215 if (time_to_inject(sbi, FAULT_BLOCK)) 1276 if (time_to_inject(sbi, FAULT_BLOCK)) {
1277 f2fs_show_injection_info(FAULT_BLOCK);
1216 return false; 1278 return false;
1279 }
1217#endif 1280#endif
1218 /* 1281 /*
1219 * let's increase this in prior to actual block count change in order 1282 * let's increase this in prior to actual block count change in order
@@ -1449,11 +1512,14 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
1449{ 1512{
1450#ifdef CONFIG_F2FS_FAULT_INJECTION 1513#ifdef CONFIG_F2FS_FAULT_INJECTION
1451 struct page *page = find_lock_page(mapping, index); 1514 struct page *page = find_lock_page(mapping, index);
1515
1452 if (page) 1516 if (page)
1453 return page; 1517 return page;
1454 1518
1455 if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) 1519 if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
1520 f2fs_show_injection_info(FAULT_PAGE_ALLOC);
1456 return NULL; 1521 return NULL;
1522 }
1457#endif 1523#endif
1458 if (!for_write) 1524 if (!for_write)
1459 return grab_cache_page(mapping, index); 1525 return grab_cache_page(mapping, index);
@@ -1532,6 +1598,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
1532static inline bool IS_INODE(struct page *page) 1598static inline bool IS_INODE(struct page *page)
1533{ 1599{
1534 struct f2fs_node *p = F2FS_NODE(page); 1600 struct f2fs_node *p = F2FS_NODE(page);
1601
1535 return RAW_IS_INODE(p); 1602 return RAW_IS_INODE(p);
1536} 1603}
1537 1604
@@ -1545,6 +1612,7 @@ static inline block_t datablock_addr(struct page *node_page,
1545{ 1612{
1546 struct f2fs_node *raw_node; 1613 struct f2fs_node *raw_node;
1547 __le32 *addr_array; 1614 __le32 *addr_array;
1615
1548 raw_node = F2FS_NODE(node_page); 1616 raw_node = F2FS_NODE(node_page);
1549 addr_array = blkaddr_in_node(raw_node); 1617 addr_array = blkaddr_in_node(raw_node);
1550 return le32_to_cpu(addr_array[offset]); 1618 return le32_to_cpu(addr_array[offset]);
@@ -1628,6 +1696,7 @@ enum {
1628 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1696 FI_UPDATE_WRITE, /* inode has in-place-update data */
1629 FI_NEED_IPU, /* used for ipu per file */ 1697 FI_NEED_IPU, /* used for ipu per file */
1630 FI_ATOMIC_FILE, /* indicate atomic file */ 1698 FI_ATOMIC_FILE, /* indicate atomic file */
1699 FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
1631 FI_VOLATILE_FILE, /* indicate volatile file */ 1700 FI_VOLATILE_FILE, /* indicate volatile file */
1632 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ 1701 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
1633 FI_DROP_CACHE, /* drop dirty page cache */ 1702 FI_DROP_CACHE, /* drop dirty page cache */
@@ -1635,6 +1704,7 @@ enum {
1635 FI_INLINE_DOTS, /* indicate inline dot dentries */ 1704 FI_INLINE_DOTS, /* indicate inline dot dentries */
1636 FI_DO_DEFRAG, /* indicate defragment is running */ 1705 FI_DO_DEFRAG, /* indicate defragment is running */
1637 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ 1706 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
1707 FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
1638}; 1708};
1639 1709
1640static inline void __mark_inode_dirty_flag(struct inode *inode, 1710static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -1779,6 +1849,7 @@ static inline unsigned int addrs_per_inode(struct inode *inode)
1779static inline void *inline_xattr_addr(struct page *page) 1849static inline void *inline_xattr_addr(struct page *page)
1780{ 1850{
1781 struct f2fs_inode *ri = F2FS_INODE(page); 1851 struct f2fs_inode *ri = F2FS_INODE(page);
1852
1782 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 1853 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
1783 F2FS_INLINE_XATTR_ADDRS]); 1854 F2FS_INLINE_XATTR_ADDRS]);
1784} 1855}
@@ -1817,6 +1888,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
1817 return is_inode_flag_set(inode, FI_ATOMIC_FILE); 1888 return is_inode_flag_set(inode, FI_ATOMIC_FILE);
1818} 1889}
1819 1890
1891static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
1892{
1893 return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
1894}
1895
1820static inline bool f2fs_is_volatile_file(struct inode *inode) 1896static inline bool f2fs_is_volatile_file(struct inode *inode)
1821{ 1897{
1822 return is_inode_flag_set(inode, FI_VOLATILE_FILE); 1898 return is_inode_flag_set(inode, FI_VOLATILE_FILE);
@@ -1835,6 +1911,7 @@ static inline bool f2fs_is_drop_cache(struct inode *inode)
1835static inline void *inline_data_addr(struct page *page) 1911static inline void *inline_data_addr(struct page *page)
1836{ 1912{
1837 struct f2fs_inode *ri = F2FS_INODE(page); 1913 struct f2fs_inode *ri = F2FS_INODE(page);
1914
1838 return (void *)&(ri->i_addr[1]); 1915 return (void *)&(ri->i_addr[1]);
1839} 1916}
1840 1917
@@ -1918,8 +1995,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
1918 size_t size, gfp_t flags) 1995 size_t size, gfp_t flags)
1919{ 1996{
1920#ifdef CONFIG_F2FS_FAULT_INJECTION 1997#ifdef CONFIG_F2FS_FAULT_INJECTION
1921 if (time_to_inject(sbi, FAULT_KMALLOC)) 1998 if (time_to_inject(sbi, FAULT_KMALLOC)) {
1999 f2fs_show_injection_info(FAULT_KMALLOC);
1922 return NULL; 2000 return NULL;
2001 }
1923#endif 2002#endif
1924 return kmalloc(size, flags); 2003 return kmalloc(size, flags);
1925} 2004}
@@ -1957,29 +2036,30 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
1957/* 2036/*
1958 * file.c 2037 * file.c
1959 */ 2038 */
1960int f2fs_sync_file(struct file *, loff_t, loff_t, int); 2039int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
1961void truncate_data_blocks(struct dnode_of_data *); 2040void truncate_data_blocks(struct dnode_of_data *dn);
1962int truncate_blocks(struct inode *, u64, bool); 2041int truncate_blocks(struct inode *inode, u64 from, bool lock);
1963int f2fs_truncate(struct inode *); 2042int f2fs_truncate(struct inode *inode);
1964int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2043int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1965int f2fs_setattr(struct dentry *, struct iattr *); 2044 struct kstat *stat);
1966int truncate_hole(struct inode *, pgoff_t, pgoff_t); 2045int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
1967int truncate_data_blocks_range(struct dnode_of_data *, int); 2046int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
1968long f2fs_ioctl(struct file *, unsigned int, unsigned long); 2047int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
1969long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); 2048long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
2049long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1970 2050
1971/* 2051/*
1972 * inode.c 2052 * inode.c
1973 */ 2053 */
1974void f2fs_set_inode_flags(struct inode *); 2054void f2fs_set_inode_flags(struct inode *inode);
1975struct inode *f2fs_iget(struct super_block *, unsigned long); 2055struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
1976struct inode *f2fs_iget_retry(struct super_block *, unsigned long); 2056struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
1977int try_to_free_nats(struct f2fs_sb_info *, int); 2057int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
1978int update_inode(struct inode *, struct page *); 2058int update_inode(struct inode *inode, struct page *node_page);
1979int update_inode_page(struct inode *); 2059int update_inode_page(struct inode *inode);
1980int f2fs_write_inode(struct inode *, struct writeback_control *); 2060int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
1981void f2fs_evict_inode(struct inode *); 2061void f2fs_evict_inode(struct inode *inode);
1982void handle_failed_inode(struct inode *); 2062void handle_failed_inode(struct inode *inode);
1983 2063
1984/* 2064/*
1985 * namei.c 2065 * namei.c
@@ -1989,40 +2069,47 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1989/* 2069/*
1990 * dir.c 2070 * dir.c
1991 */ 2071 */
1992void set_de_type(struct f2fs_dir_entry *, umode_t); 2072void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
1993unsigned char get_de_type(struct f2fs_dir_entry *); 2073unsigned char get_de_type(struct f2fs_dir_entry *de);
1994struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 2074struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
1995 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 2075 f2fs_hash_t namehash, int *max_slots,
1996int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 2076 struct f2fs_dentry_ptr *d);
1997 unsigned int, struct fscrypt_str *); 2077int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
1998void do_make_empty_dir(struct inode *, struct inode *, 2078 unsigned int start_pos, struct fscrypt_str *fstr);
1999 struct f2fs_dentry_ptr *); 2079void do_make_empty_dir(struct inode *inode, struct inode *parent,
2000struct page *init_inode_metadata(struct inode *, struct inode *, 2080 struct f2fs_dentry_ptr *d);
2001 const struct qstr *, const struct qstr *, struct page *); 2081struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
2002void update_parent_metadata(struct inode *, struct inode *, unsigned int); 2082 const struct qstr *new_name,
2003int room_for_filename(const void *, int, int); 2083 const struct qstr *orig_name, struct page *dpage);
2004void f2fs_drop_nlink(struct inode *, struct inode *); 2084void update_parent_metadata(struct inode *dir, struct inode *inode,
2005struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *, 2085 unsigned int current_depth);
2006 struct page **); 2086int room_for_filename(const void *bitmap, int slots, int max_slots);
2007struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *, 2087void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
2008 struct page **); 2088struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
2009struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); 2089 struct fscrypt_name *fname, struct page **res_page);
2010ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **); 2090struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
2011void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 2091 const struct qstr *child, struct page **res_page);
2012 struct page *, struct inode *); 2092struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
2013int update_dent_inode(struct inode *, struct inode *, const struct qstr *); 2093ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
2014void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, 2094 struct page **page);
2015 const struct qstr *, f2fs_hash_t , unsigned int); 2095void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
2016int f2fs_add_regular_entry(struct inode *, const struct qstr *, 2096 struct page *page, struct inode *inode);
2017 const struct qstr *, struct inode *, nid_t, umode_t); 2097int update_dent_inode(struct inode *inode, struct inode *to,
2018int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *, 2098 const struct qstr *name);
2019 nid_t, umode_t); 2099void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
2020int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, 2100 const struct qstr *name, f2fs_hash_t name_hash,
2021 umode_t); 2101 unsigned int bit_pos);
2022void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 2102int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
2023 struct inode *); 2103 const struct qstr *orig_name,
2024int f2fs_do_tmpfile(struct inode *, struct inode *); 2104 struct inode *inode, nid_t ino, umode_t mode);
2025bool f2fs_empty_dir(struct inode *); 2105int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
2106 struct inode *inode, nid_t ino, umode_t mode);
2107int __f2fs_add_link(struct inode *dir, const struct qstr *name,
2108 struct inode *inode, nid_t ino, umode_t mode);
2109void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
2110 struct inode *dir, struct inode *inode);
2111int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
2112bool f2fs_empty_dir(struct inode *dir);
2026 2113
2027static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) 2114static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
2028{ 2115{
@@ -2033,18 +2120,18 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
2033/* 2120/*
2034 * super.c 2121 * super.c
2035 */ 2122 */
2036int f2fs_inode_dirtied(struct inode *, bool); 2123int f2fs_inode_dirtied(struct inode *inode, bool sync);
2037void f2fs_inode_synced(struct inode *); 2124void f2fs_inode_synced(struct inode *inode);
2038int f2fs_commit_super(struct f2fs_sb_info *, bool); 2125int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
2039int f2fs_sync_fs(struct super_block *, int); 2126int f2fs_sync_fs(struct super_block *sb, int sync);
2040extern __printf(3, 4) 2127extern __printf(3, 4)
2041void f2fs_msg(struct super_block *, const char *, const char *, ...); 2128void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
2042int sanity_check_ckpt(struct f2fs_sb_info *sbi); 2129int sanity_check_ckpt(struct f2fs_sb_info *sbi);
2043 2130
2044/* 2131/*
2045 * hash.c 2132 * hash.c
2046 */ 2133 */
2047f2fs_hash_t f2fs_dentry_hash(const struct qstr *); 2134f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info);
2048 2135
2049/* 2136/*
2050 * node.c 2137 * node.c
@@ -2052,163 +2139,183 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
2052struct dnode_of_data; 2139struct dnode_of_data;
2053struct node_info; 2140struct node_info;
2054 2141
2055bool available_free_memory(struct f2fs_sb_info *, int); 2142bool available_free_memory(struct f2fs_sb_info *sbi, int type);
2056int need_dentry_mark(struct f2fs_sb_info *, nid_t); 2143int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
2057bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); 2144bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
2058bool need_inode_block_update(struct f2fs_sb_info *, nid_t); 2145bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
2059void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 2146void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
2060pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t); 2147pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
2061int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 2148int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
2062int truncate_inode_blocks(struct inode *, pgoff_t); 2149int truncate_inode_blocks(struct inode *inode, pgoff_t from);
2063int truncate_xattr_node(struct inode *, struct page *); 2150int truncate_xattr_node(struct inode *inode, struct page *page);
2064int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 2151int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
2065int remove_inode_page(struct inode *); 2152int remove_inode_page(struct inode *inode);
2066struct page *new_inode_page(struct inode *); 2153struct page *new_inode_page(struct inode *inode);
2067struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 2154struct page *new_node_page(struct dnode_of_data *dn,
2068void ra_node_page(struct f2fs_sb_info *, nid_t); 2155 unsigned int ofs, struct page *ipage);
2069struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 2156void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
2070struct page *get_node_page_ra(struct page *, int); 2157struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
2071void move_node_page(struct page *, int); 2158struct page *get_node_page_ra(struct page *parent, int start);
2072int fsync_node_pages(struct f2fs_sb_info *, struct inode *, 2159void move_node_page(struct page *node_page, int gc_type);
2073 struct writeback_control *, bool); 2160int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
2074int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); 2161 struct writeback_control *wbc, bool atomic);
2075void build_free_nids(struct f2fs_sb_info *, bool); 2162int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
2076bool alloc_nid(struct f2fs_sb_info *, nid_t *); 2163void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
2077void alloc_nid_done(struct f2fs_sb_info *, nid_t); 2164bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
2078void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 2165void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
2079int try_to_free_nids(struct f2fs_sb_info *, int); 2166void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
2080void recover_inline_xattr(struct inode *, struct page *); 2167int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
2081void recover_xattr_data(struct inode *, struct page *, block_t); 2168void recover_inline_xattr(struct inode *inode, struct page *page);
2082int recover_inode_page(struct f2fs_sb_info *, struct page *); 2169int recover_xattr_data(struct inode *inode, struct page *page,
2083int restore_node_summary(struct f2fs_sb_info *, unsigned int, 2170 block_t blkaddr);
2084 struct f2fs_summary_block *); 2171int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
2085void flush_nat_entries(struct f2fs_sb_info *); 2172int restore_node_summary(struct f2fs_sb_info *sbi,
2086int build_node_manager(struct f2fs_sb_info *); 2173 unsigned int segno, struct f2fs_summary_block *sum);
2087void destroy_node_manager(struct f2fs_sb_info *); 2174void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2175int build_node_manager(struct f2fs_sb_info *sbi);
2176void destroy_node_manager(struct f2fs_sb_info *sbi);
2088int __init create_node_manager_caches(void); 2177int __init create_node_manager_caches(void);
2089void destroy_node_manager_caches(void); 2178void destroy_node_manager_caches(void);
2090 2179
2091/* 2180/*
2092 * segment.c 2181 * segment.c
2093 */ 2182 */
2094void register_inmem_page(struct inode *, struct page *); 2183void register_inmem_page(struct inode *inode, struct page *page);
2095void drop_inmem_pages(struct inode *); 2184void drop_inmem_pages(struct inode *inode);
2096int commit_inmem_pages(struct inode *); 2185int commit_inmem_pages(struct inode *inode);
2097void f2fs_balance_fs(struct f2fs_sb_info *, bool); 2186void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
2098void f2fs_balance_fs_bg(struct f2fs_sb_info *); 2187void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
2099int f2fs_issue_flush(struct f2fs_sb_info *); 2188int f2fs_issue_flush(struct f2fs_sb_info *sbi);
2100int create_flush_cmd_control(struct f2fs_sb_info *); 2189int create_flush_cmd_control(struct f2fs_sb_info *sbi);
2101void destroy_flush_cmd_control(struct f2fs_sb_info *, bool); 2190void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
2102void invalidate_blocks(struct f2fs_sb_info *, block_t); 2191void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
2103bool is_checkpointed_data(struct f2fs_sb_info *, block_t); 2192bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
2104void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 2193void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
2105void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); 2194void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
2106void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 2195void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2107void release_discard_addrs(struct f2fs_sb_info *); 2196void release_discard_addrs(struct f2fs_sb_info *sbi);
2108int npages_for_summary_flush(struct f2fs_sb_info *, bool); 2197int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
2109void allocate_new_segments(struct f2fs_sb_info *); 2198void allocate_new_segments(struct f2fs_sb_info *sbi);
2110int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 2199int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
2111struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 2200bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2112void update_meta_page(struct f2fs_sb_info *, void *, block_t); 2201struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
2113void write_meta_page(struct f2fs_sb_info *, struct page *); 2202void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
2114void write_node_page(unsigned int, struct f2fs_io_info *); 2203void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
2115void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); 2204void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
2116void rewrite_data_page(struct f2fs_io_info *); 2205void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
2117void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, 2206void rewrite_data_page(struct f2fs_io_info *fio);
2118 block_t, block_t, bool, bool); 2207void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2119void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, 2208 block_t old_blkaddr, block_t new_blkaddr,
2120 block_t, block_t, unsigned char, bool, bool); 2209 bool recover_curseg, bool recover_newaddr);
2121void allocate_data_block(struct f2fs_sb_info *, struct page *, 2210void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
2122 block_t, block_t *, struct f2fs_summary *, int); 2211 block_t old_addr, block_t new_addr,
2123void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); 2212 unsigned char version, bool recover_curseg,
2124void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t); 2213 bool recover_newaddr);
2125void write_data_summaries(struct f2fs_sb_info *, block_t); 2214void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2126void write_node_summaries(struct f2fs_sb_info *, block_t); 2215 block_t old_blkaddr, block_t *new_blkaddr,
2127int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int); 2216 struct f2fs_summary *sum, int type);
2128void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); 2217void f2fs_wait_on_page_writeback(struct page *page,
2129int build_segment_manager(struct f2fs_sb_info *); 2218 enum page_type type, bool ordered);
2130void destroy_segment_manager(struct f2fs_sb_info *); 2219void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
2220 block_t blkaddr);
2221void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
2222void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
2223int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
2224 unsigned int val, int alloc);
2225void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2226int build_segment_manager(struct f2fs_sb_info *sbi);
2227void destroy_segment_manager(struct f2fs_sb_info *sbi);
2131int __init create_segment_manager_caches(void); 2228int __init create_segment_manager_caches(void);
2132void destroy_segment_manager_caches(void); 2229void destroy_segment_manager_caches(void);
2133 2230
2134/* 2231/*
2135 * checkpoint.c 2232 * checkpoint.c
2136 */ 2233 */
2137void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool); 2234void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
2138struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 2235struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
2139struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 2236struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
2140struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); 2237struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
2141bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); 2238bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
2142int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); 2239int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
2143void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); 2240 int type, bool sync);
2144long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 2241void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
2145void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); 2242long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
2146void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); 2243 long nr_to_write);
2147void release_ino_entry(struct f2fs_sb_info *, bool); 2244void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2148bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 2245void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2149int f2fs_sync_inode_meta(struct f2fs_sb_info *); 2246void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
2150int acquire_orphan_inode(struct f2fs_sb_info *); 2247bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
2151void release_orphan_inode(struct f2fs_sb_info *); 2248int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
2152void add_orphan_inode(struct inode *); 2249int acquire_orphan_inode(struct f2fs_sb_info *sbi);
2153void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 2250void release_orphan_inode(struct f2fs_sb_info *sbi);
2154int recover_orphan_inodes(struct f2fs_sb_info *); 2251void add_orphan_inode(struct inode *inode);
2155int get_valid_checkpoint(struct f2fs_sb_info *); 2252void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
2156void update_dirty_page(struct inode *, struct page *); 2253int recover_orphan_inodes(struct f2fs_sb_info *sbi);
2157void remove_dirty_inode(struct inode *); 2254int get_valid_checkpoint(struct f2fs_sb_info *sbi);
2158int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); 2255void update_dirty_page(struct inode *inode, struct page *page);
2159int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); 2256void remove_dirty_inode(struct inode *inode);
2160void init_ino_entry_info(struct f2fs_sb_info *); 2257int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
2258int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2259void init_ino_entry_info(struct f2fs_sb_info *sbi);
2161int __init create_checkpoint_caches(void); 2260int __init create_checkpoint_caches(void);
2162void destroy_checkpoint_caches(void); 2261void destroy_checkpoint_caches(void);
2163 2262
2164/* 2263/*
2165 * data.c 2264 * data.c
2166 */ 2265 */
2167void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); 2266void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
2168void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *, 2267 int rw);
2169 struct page *, nid_t, enum page_type, int); 2268void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
2170void f2fs_flush_merged_bios(struct f2fs_sb_info *); 2269 struct inode *inode, nid_t ino, pgoff_t idx,
2171int f2fs_submit_page_bio(struct f2fs_io_info *); 2270 enum page_type type, int rw);
2172void f2fs_submit_page_mbio(struct f2fs_io_info *); 2271void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
2173struct block_device *f2fs_target_device(struct f2fs_sb_info *, 2272int f2fs_submit_page_bio(struct f2fs_io_info *fio);
2174 block_t, struct bio *); 2273int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
2175int f2fs_target_device_index(struct f2fs_sb_info *, block_t); 2274struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
2176void set_data_blkaddr(struct dnode_of_data *); 2275 block_t blk_addr, struct bio *bio);
2177void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 2276int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
2178int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); 2277void set_data_blkaddr(struct dnode_of_data *dn);
2179int reserve_new_block(struct dnode_of_data *); 2278void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
2180int f2fs_get_block(struct dnode_of_data *, pgoff_t); 2279int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
2181int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); 2280int reserve_new_block(struct dnode_of_data *dn);
2182int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 2281int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
2183struct page *get_read_data_page(struct inode *, pgoff_t, int, bool); 2282int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
2184struct page *find_data_page(struct inode *, pgoff_t); 2283int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
2185struct page *get_lock_data_page(struct inode *, pgoff_t, bool); 2284struct page *get_read_data_page(struct inode *inode, pgoff_t index,
2186struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 2285 int op_flags, bool for_write);
2187int do_write_data_page(struct f2fs_io_info *); 2286struct page *find_data_page(struct inode *inode, pgoff_t index);
2188int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); 2287struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
2189int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 2288 bool for_write);
2190void f2fs_set_page_dirty_nobuffers(struct page *); 2289struct page *get_new_data_page(struct inode *inode,
2191void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 2290 struct page *ipage, pgoff_t index, bool new_i_size);
2192int f2fs_release_page(struct page *, gfp_t); 2291int do_write_data_page(struct f2fs_io_info *fio);
2292int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
2293 int create, int flag);
2294int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2295 u64 start, u64 len);
2296void f2fs_set_page_dirty_nobuffers(struct page *page);
2297void f2fs_invalidate_page(struct page *page, unsigned int offset,
2298 unsigned int length);
2299int f2fs_release_page(struct page *page, gfp_t wait);
2193#ifdef CONFIG_MIGRATION 2300#ifdef CONFIG_MIGRATION
2194int f2fs_migrate_page(struct address_space *, struct page *, struct page *, 2301int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
2195 enum migrate_mode); 2302 struct page *page, enum migrate_mode mode);
2196#endif 2303#endif
2197 2304
2198/* 2305/*
2199 * gc.c 2306 * gc.c
2200 */ 2307 */
2201int start_gc_thread(struct f2fs_sb_info *); 2308int start_gc_thread(struct f2fs_sb_info *sbi);
2202void stop_gc_thread(struct f2fs_sb_info *); 2309void stop_gc_thread(struct f2fs_sb_info *sbi);
2203block_t start_bidx_of_node(unsigned int, struct inode *); 2310block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
2204int f2fs_gc(struct f2fs_sb_info *, bool, bool); 2311int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
2205void build_gc_manager(struct f2fs_sb_info *); 2312void build_gc_manager(struct f2fs_sb_info *sbi);
2206 2313
2207/* 2314/*
2208 * recovery.c 2315 * recovery.c
2209 */ 2316 */
2210int recover_fsync_data(struct f2fs_sb_info *, bool); 2317int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
2211bool space_for_roll_forward(struct f2fs_sb_info *); 2318bool space_for_roll_forward(struct f2fs_sb_info *sbi);
2212 2319
2213/* 2320/*
2214 * debug.c 2321 * debug.c
@@ -2227,8 +2334,9 @@ struct f2fs_stat_info {
2227 unsigned int ndirty_dirs, ndirty_files, ndirty_all; 2334 unsigned int ndirty_dirs, ndirty_files, ndirty_all;
2228 int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; 2335 int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
2229 int total_count, utilization; 2336 int total_count, utilization;
2230 int bg_gc, nr_wb_cp_data, nr_wb_data; 2337 int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
2231 int inline_xattr, inline_inode, inline_dir, orphans; 2338 int inline_xattr, inline_inode, inline_dir, append, update, orphans;
2339 int aw_cnt, max_aw_cnt;
2232 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; 2340 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
2233 unsigned int bimodal, avg_vblocks; 2341 unsigned int bimodal, avg_vblocks;
2234 int util_free, util_valid, util_invalid; 2342 int util_free, util_valid, util_invalid;
@@ -2300,6 +2408,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
2300 ((sbi)->block_count[(curseg)->alloc_type]++) 2408 ((sbi)->block_count[(curseg)->alloc_type]++)
2301#define stat_inc_inplace_blocks(sbi) \ 2409#define stat_inc_inplace_blocks(sbi) \
2302 (atomic_inc(&(sbi)->inplace_count)) 2410 (atomic_inc(&(sbi)->inplace_count))
2411#define stat_inc_atomic_write(inode) \
2412 (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
2413#define stat_dec_atomic_write(inode) \
2414 (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
2415#define stat_update_max_atomic_write(inode) \
2416 do { \
2417 int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \
2418 int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
2419 if (cur > max) \
2420 atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
2421 } while (0)
2303#define stat_inc_seg_count(sbi, type, gc_type) \ 2422#define stat_inc_seg_count(sbi, type, gc_type) \
2304 do { \ 2423 do { \
2305 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 2424 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -2332,8 +2451,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
2332 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ 2451 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
2333 } while (0) 2452 } while (0)
2334 2453
2335int f2fs_build_stats(struct f2fs_sb_info *); 2454int f2fs_build_stats(struct f2fs_sb_info *sbi);
2336void f2fs_destroy_stats(struct f2fs_sb_info *); 2455void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
2337int __init f2fs_create_root_stats(void); 2456int __init f2fs_create_root_stats(void);
2338void f2fs_destroy_root_stats(void); 2457void f2fs_destroy_root_stats(void);
2339#else 2458#else
@@ -2353,6 +2472,9 @@ void f2fs_destroy_root_stats(void);
2353#define stat_dec_inline_inode(inode) 2472#define stat_dec_inline_inode(inode)
2354#define stat_inc_inline_dir(inode) 2473#define stat_inc_inline_dir(inode)
2355#define stat_dec_inline_dir(inode) 2474#define stat_dec_inline_dir(inode)
2475#define stat_inc_atomic_write(inode)
2476#define stat_dec_atomic_write(inode)
2477#define stat_update_max_atomic_write(inode)
2356#define stat_inc_seg_type(sbi, curseg) 2478#define stat_inc_seg_type(sbi, curseg)
2357#define stat_inc_block_count(sbi, curseg) 2479#define stat_inc_block_count(sbi, curseg)
2358#define stat_inc_inplace_blocks(sbi) 2480#define stat_inc_inplace_blocks(sbi)
@@ -2382,49 +2504,55 @@ extern struct kmem_cache *inode_entry_slab;
2382/* 2504/*
2383 * inline.c 2505 * inline.c
2384 */ 2506 */
2385bool f2fs_may_inline_data(struct inode *); 2507bool f2fs_may_inline_data(struct inode *inode);
2386bool f2fs_may_inline_dentry(struct inode *); 2508bool f2fs_may_inline_dentry(struct inode *inode);
2387void read_inline_data(struct page *, struct page *); 2509void read_inline_data(struct page *page, struct page *ipage);
2388bool truncate_inline_inode(struct page *, u64); 2510bool truncate_inline_inode(struct page *ipage, u64 from);
2389int f2fs_read_inline_data(struct inode *, struct page *); 2511int f2fs_read_inline_data(struct inode *inode, struct page *page);
2390int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 2512int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
2391int f2fs_convert_inline_inode(struct inode *); 2513int f2fs_convert_inline_inode(struct inode *inode);
2392int f2fs_write_inline_data(struct inode *, struct page *); 2514int f2fs_write_inline_data(struct inode *inode, struct page *page);
2393bool recover_inline_data(struct inode *, struct page *); 2515bool recover_inline_data(struct inode *inode, struct page *npage);
2394struct f2fs_dir_entry *find_in_inline_dir(struct inode *, 2516struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
2395 struct fscrypt_name *, struct page **); 2517 struct fscrypt_name *fname, struct page **res_page);
2396int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); 2518int make_empty_inline_dir(struct inode *inode, struct inode *parent,
2397int f2fs_add_inline_entry(struct inode *, const struct qstr *, 2519 struct page *ipage);
2398 const struct qstr *, struct inode *, nid_t, umode_t); 2520int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
2399void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, 2521 const struct qstr *orig_name,
2400 struct inode *, struct inode *); 2522 struct inode *inode, nid_t ino, umode_t mode);
2401bool f2fs_empty_inline_dir(struct inode *); 2523void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
2402int f2fs_read_inline_dir(struct file *, struct dir_context *, 2524 struct inode *dir, struct inode *inode);
2403 struct fscrypt_str *); 2525bool f2fs_empty_inline_dir(struct inode *dir);
2404int f2fs_inline_data_fiemap(struct inode *, 2526int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
2405 struct fiemap_extent_info *, __u64, __u64); 2527 struct fscrypt_str *fstr);
2528int f2fs_inline_data_fiemap(struct inode *inode,
2529 struct fiemap_extent_info *fieinfo,
2530 __u64 start, __u64 len);
2406 2531
2407/* 2532/*
2408 * shrinker.c 2533 * shrinker.c
2409 */ 2534 */
2410unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *); 2535unsigned long f2fs_shrink_count(struct shrinker *shrink,
2411unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *); 2536 struct shrink_control *sc);
2412void f2fs_join_shrinker(struct f2fs_sb_info *); 2537unsigned long f2fs_shrink_scan(struct shrinker *shrink,
2413void f2fs_leave_shrinker(struct f2fs_sb_info *); 2538 struct shrink_control *sc);
2539void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
2540void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
2414 2541
2415/* 2542/*
2416 * extent_cache.c 2543 * extent_cache.c
2417 */ 2544 */
2418unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); 2545unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
2419bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); 2546bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
2420void f2fs_drop_extent_tree(struct inode *); 2547void f2fs_drop_extent_tree(struct inode *inode);
2421unsigned int f2fs_destroy_extent_node(struct inode *); 2548unsigned int f2fs_destroy_extent_node(struct inode *inode);
2422void f2fs_destroy_extent_tree(struct inode *); 2549void f2fs_destroy_extent_tree(struct inode *inode);
2423bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); 2550bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
2424void f2fs_update_extent_cache(struct dnode_of_data *); 2551 struct extent_info *ei);
2552void f2fs_update_extent_cache(struct dnode_of_data *dn);
2425void f2fs_update_extent_cache_range(struct dnode_of_data *dn, 2553void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
2426 pgoff_t, block_t, unsigned int); 2554 pgoff_t fofs, block_t blkaddr, unsigned int len);
2427void init_extent_cache_info(struct f2fs_sb_info *); 2555void init_extent_cache_info(struct f2fs_sb_info *sbi);
2428int __init create_extent_cache(void); 2556int __init create_extent_cache(void);
2429void destroy_extent_cache(void); 2557void destroy_extent_cache(void);
2430 2558
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 1edc86e874e3..78e65288f2b2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/pagevec.h> 22#include <linux/pagevec.h>
23#include <linux/uio.h>
23#include <linux/uuid.h> 24#include <linux/uuid.h>
24#include <linux/file.h> 25#include <linux/file.h>
25 26
@@ -140,8 +141,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
140 need_cp = true; 141 need_cp = true;
141 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 142 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
142 need_cp = true; 143 need_cp = true;
143 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
144 need_cp = true;
145 else if (test_opt(sbi, FASTBOOT)) 144 else if (test_opt(sbi, FASTBOOT))
146 need_cp = true; 145 need_cp = true;
147 else if (sbi->active_logs == 2) 146 else if (sbi->active_logs == 2)
@@ -167,7 +166,6 @@ static void try_to_fix_pino(struct inode *inode)
167 nid_t pino; 166 nid_t pino;
168 167
169 down_write(&fi->i_sem); 168 down_write(&fi->i_sem);
170 fi->xattr_ver = 0;
171 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 169 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
172 get_parent_ino(inode, &pino)) { 170 get_parent_ino(inode, &pino)) {
173 f2fs_i_pino_write(inode, pino); 171 f2fs_i_pino_write(inode, pino);
@@ -276,7 +274,8 @@ sync_nodes:
276flush_out: 274flush_out:
277 remove_ino_entry(sbi, ino, UPDATE_INO); 275 remove_ino_entry(sbi, ino, UPDATE_INO);
278 clear_inode_flag(inode, FI_UPDATE_WRITE); 276 clear_inode_flag(inode, FI_UPDATE_WRITE);
279 ret = f2fs_issue_flush(sbi); 277 if (!atomic)
278 ret = f2fs_issue_flush(sbi);
280 f2fs_update_time(sbi, REQ_TIME); 279 f2fs_update_time(sbi, REQ_TIME);
281out: 280out:
282 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 281 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -567,8 +566,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
567 } 566 }
568 567
569 if (f2fs_has_inline_data(inode)) { 568 if (f2fs_has_inline_data(inode)) {
570 if (truncate_inline_inode(ipage, from)) 569 truncate_inline_inode(ipage, from);
571 set_page_dirty(ipage); 570 if (from == 0)
571 clear_inode_flag(inode, FI_DATA_EXIST);
572 f2fs_put_page(ipage, 1); 572 f2fs_put_page(ipage, 1);
573 truncate_page = true; 573 truncate_page = true;
574 goto out; 574 goto out;
@@ -1541,6 +1541,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1541 if (ret) 1541 if (ret)
1542 clear_inode_flag(inode, FI_ATOMIC_FILE); 1542 clear_inode_flag(inode, FI_ATOMIC_FILE);
1543out: 1543out:
1544 stat_inc_atomic_write(inode);
1545 stat_update_max_atomic_write(inode);
1544 inode_unlock(inode); 1546 inode_unlock(inode);
1545 mnt_drop_write_file(filp); 1547 mnt_drop_write_file(filp);
1546 return ret; 1548 return ret;
@@ -1564,15 +1566,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1564 goto err_out; 1566 goto err_out;
1565 1567
1566 if (f2fs_is_atomic_file(inode)) { 1568 if (f2fs_is_atomic_file(inode)) {
1567 clear_inode_flag(inode, FI_ATOMIC_FILE);
1568 ret = commit_inmem_pages(inode); 1569 ret = commit_inmem_pages(inode);
1569 if (ret) { 1570 if (ret)
1570 set_inode_flag(inode, FI_ATOMIC_FILE);
1571 goto err_out; 1571 goto err_out;
1572
1573 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1574 if (!ret) {
1575 clear_inode_flag(inode, FI_ATOMIC_FILE);
1576 stat_dec_atomic_write(inode);
1572 } 1577 }
1578 } else {
1579 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1573 } 1580 }
1574
1575 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1576err_out: 1581err_out:
1577 inode_unlock(inode); 1582 inode_unlock(inode);
1578 mnt_drop_write_file(filp); 1583 mnt_drop_write_file(filp);
@@ -1870,7 +1875,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
1870{ 1875{
1871 struct inode *inode = file_inode(filp); 1876 struct inode *inode = file_inode(filp);
1872 struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; 1877 struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
1873 struct extent_info ei; 1878 struct extent_info ei = {0,0,0};
1874 pgoff_t pg_start, pg_end; 1879 pgoff_t pg_start, pg_end;
1875 unsigned int blk_per_seg = sbi->blocks_per_seg; 1880 unsigned int blk_per_seg = sbi->blocks_per_seg;
1876 unsigned int total = 0, sec_num; 1881 unsigned int total = 0, sec_num;
@@ -2250,8 +2255,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2250 inode_lock(inode); 2255 inode_lock(inode);
2251 ret = generic_write_checks(iocb, from); 2256 ret = generic_write_checks(iocb, from);
2252 if (ret > 0) { 2257 if (ret > 0) {
2253 int err = f2fs_preallocate_blocks(iocb, from); 2258 int err;
2259
2260 if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
2261 set_inode_flag(inode, FI_NO_PREALLOC);
2254 2262
2263 err = f2fs_preallocate_blocks(iocb, from);
2255 if (err) { 2264 if (err) {
2256 inode_unlock(inode); 2265 inode_unlock(inode);
2257 return err; 2266 return err;
@@ -2259,6 +2268,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2259 blk_start_plug(&plug); 2268 blk_start_plug(&plug);
2260 ret = __generic_file_write_iter(iocb, from); 2269 ret = __generic_file_write_iter(iocb, from);
2261 blk_finish_plug(&plug); 2270 blk_finish_plug(&plug);
2271 clear_inode_flag(inode, FI_NO_PREALLOC);
2262 } 2272 }
2263 inode_unlock(inode); 2273 inode_unlock(inode);
2264 2274
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88bfc3dff496..418fd9881646 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
48 } 48 }
49 49
50#ifdef CONFIG_F2FS_FAULT_INJECTION 50#ifdef CONFIG_F2FS_FAULT_INJECTION
51 if (time_to_inject(sbi, FAULT_CHECKPOINT)) 51 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
52 f2fs_show_injection_info(FAULT_CHECKPOINT);
52 f2fs_stop_checkpoint(sbi, false); 53 f2fs_stop_checkpoint(sbi, false);
54 }
53#endif 55#endif
54 56
55 /* 57 /*
@@ -166,7 +168,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
166 p->ofs_unit = sbi->segs_per_sec; 168 p->ofs_unit = sbi->segs_per_sec;
167 } 169 }
168 170
169 if (p->max_search > sbi->max_victim_search) 171 /* we need to check every dirty segments in the FG_GC case */
172 if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
170 p->max_search = sbi->max_victim_search; 173 p->max_search = sbi->max_victim_search;
171 174
172 p->offset = sbi->last_victim[p->gc_mode]; 175 p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +202,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
199 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 202 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
200 if (sec_usage_check(sbi, secno)) 203 if (sec_usage_check(sbi, secno))
201 continue; 204 continue;
205
206 if (no_fggc_candidate(sbi, secno))
207 continue;
208
202 clear_bit(secno, dirty_i->victim_secmap); 209 clear_bit(secno, dirty_i->victim_secmap);
203 return secno * sbi->segs_per_sec; 210 return secno * sbi->segs_per_sec;
204 } 211 }
@@ -237,6 +244,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
237 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 244 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
238} 245}
239 246
247static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
248 unsigned int segno)
249{
250 unsigned int valid_blocks =
251 get_valid_blocks(sbi, segno, sbi->segs_per_sec);
252
253 return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
254 valid_blocks * 2 : valid_blocks;
255}
256
240static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 257static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
241 unsigned int segno, struct victim_sel_policy *p) 258 unsigned int segno, struct victim_sel_policy *p)
242{ 259{
@@ -245,7 +262,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
245 262
246 /* alloc_mode == LFS */ 263 /* alloc_mode == LFS */
247 if (p->gc_mode == GC_GREEDY) 264 if (p->gc_mode == GC_GREEDY)
248 return get_valid_blocks(sbi, segno, sbi->segs_per_sec); 265 return get_greedy_cost(sbi, segno);
249 else 266 else
250 return get_cb_cost(sbi, segno); 267 return get_cb_cost(sbi, segno);
251} 268}
@@ -322,13 +339,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
322 nsearched++; 339 nsearched++;
323 } 340 }
324 341
325
326 secno = GET_SECNO(sbi, segno); 342 secno = GET_SECNO(sbi, segno);
327 343
328 if (sec_usage_check(sbi, secno)) 344 if (sec_usage_check(sbi, secno))
329 goto next; 345 goto next;
330 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) 346 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
331 goto next; 347 goto next;
348 if (gc_type == FG_GC && p.alloc_mode == LFS &&
349 no_fggc_candidate(sbi, secno))
350 goto next;
332 351
333 cost = get_gc_cost(sbi, segno, &p); 352 cost = get_gc_cost(sbi, segno, &p);
334 353
@@ -569,6 +588,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
569 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) 588 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
570 goto out; 589 goto out;
571 590
591 if (f2fs_is_atomic_file(inode))
592 goto out;
593
572 set_new_dnode(&dn, inode, NULL, NULL, 0); 594 set_new_dnode(&dn, inode, NULL, NULL, 0);
573 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 595 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
574 if (err) 596 if (err)
@@ -661,6 +683,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
661 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) 683 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
662 goto out; 684 goto out;
663 685
686 if (f2fs_is_atomic_file(inode))
687 goto out;
688
664 if (gc_type == BG_GC) { 689 if (gc_type == BG_GC) {
665 if (PageWriteback(page)) 690 if (PageWriteback(page))
666 goto out; 691 goto out;
@@ -921,8 +946,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
921 946
922 cpc.reason = __get_cp_reason(sbi); 947 cpc.reason = __get_cp_reason(sbi);
923gc_more: 948gc_more:
924 segno = NULL_SEGNO;
925
926 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 949 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
927 goto stop; 950 goto stop;
928 if (unlikely(f2fs_cp_error(sbi))) { 951 if (unlikely(f2fs_cp_error(sbi))) {
@@ -930,30 +953,23 @@ gc_more:
930 goto stop; 953 goto stop;
931 } 954 }
932 955
933 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) { 956 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
934 gc_type = FG_GC;
935 /* 957 /*
936 * If there is no victim and no prefree segment but still not 958 * For example, if there are many prefree_segments below given
937 * enough free sections, we should flush dent/node blocks and do 959 * threshold, we can make them free by checkpoint. Then, we
938 * garbage collections. 960 * secure free segments which doesn't need fggc any more.
939 */ 961 */
940 if (__get_victim(sbi, &segno, gc_type) || 962 ret = write_checkpoint(sbi, &cpc);
941 prefree_segments(sbi)) { 963 if (ret)
942 ret = write_checkpoint(sbi, &cpc); 964 goto stop;
943 if (ret) 965 if (has_not_enough_free_secs(sbi, 0, 0))
944 goto stop; 966 gc_type = FG_GC;
945 segno = NULL_SEGNO;
946 } else if (has_not_enough_free_secs(sbi, 0, 0)) {
947 ret = write_checkpoint(sbi, &cpc);
948 if (ret)
949 goto stop;
950 }
951 } else if (gc_type == BG_GC && !background) {
952 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
953 goto stop;
954 } 967 }
955 968
956 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) 969 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
970 if (gc_type == BG_GC && !background)
971 goto stop;
972 if (!__get_victim(sbi, &segno, gc_type))
957 goto stop; 973 goto stop;
958 ret = 0; 974 ret = 0;
959 975
@@ -983,5 +999,16 @@ stop:
983 999
984void build_gc_manager(struct f2fs_sb_info *sbi) 1000void build_gc_manager(struct f2fs_sb_info *sbi)
985{ 1001{
1002 u64 main_count, resv_count, ovp_count, blocks_per_sec;
1003
986 DIRTY_I(sbi)->v_ops = &default_v_ops; 1004 DIRTY_I(sbi)->v_ops = &default_v_ops;
1005
1006 /* threshold of # of valid blocks in a section for victims of FG_GC */
1007 main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
1008 resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
1009 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
1010 blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
1011
1012 sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
1013 (main_count - resv_count));
987} 1014}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index af06bda51a54..24bb8213d974 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
373 goto no_delete; 373 goto no_delete;
374 374
375#ifdef CONFIG_F2FS_FAULT_INJECTION 375#ifdef CONFIG_F2FS_FAULT_INJECTION
376 if (time_to_inject(sbi, FAULT_EVICT_INODE)) 376 if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
377 f2fs_show_injection_info(FAULT_EVICT_INODE);
377 goto no_delete; 378 goto no_delete;
379 }
378#endif 380#endif
379 381
380 remove_ino_entry(sbi, inode->i_ino, APPEND_INO); 382 remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 11cabcadb1a3..98f00a3a7f50 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
321 if (err) 321 if (err)
322 goto err_out; 322 goto err_out;
323 } 323 }
324 if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) && 324 if (f2fs_encrypted_inode(dir) &&
325 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && 325 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
326 !fscrypt_has_permitted_context(dir, inode)) { 326 !fscrypt_has_permitted_context(dir, inode)) {
327 bool nokey = f2fs_encrypted_inode(inode) && 327 bool nokey = f2fs_encrypted_inode(inode) &&
328 !fscrypt_has_encryption_key(inode); 328 !fscrypt_has_encryption_key(inode);
329 err = nokey ? -ENOKEY : -EPERM; 329 err = nokey ? -ENOKEY : -EPERM;
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
663 bool is_old_inline = f2fs_has_inline_dentry(old_dir); 663 bool is_old_inline = f2fs_has_inline_dentry(old_dir);
664 int err = -ENOENT; 664 int err = -ENOENT;
665 665
666 if ((f2fs_encrypted_inode(old_dir) &&
667 !fscrypt_has_encryption_key(old_dir)) ||
668 (f2fs_encrypted_inode(new_dir) &&
669 !fscrypt_has_encryption_key(new_dir)))
670 return -ENOKEY;
671
666 if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && 672 if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
667 !fscrypt_has_permitted_context(new_dir, old_inode)) { 673 !fscrypt_has_permitted_context(new_dir, old_inode)) {
668 err = -EPERM; 674 err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
843 int old_nlink = 0, new_nlink = 0; 849 int old_nlink = 0, new_nlink = 0;
844 int err = -ENOENT; 850 int err = -ENOENT;
845 851
852 if ((f2fs_encrypted_inode(old_dir) &&
853 !fscrypt_has_encryption_key(old_dir)) ||
854 (f2fs_encrypted_inode(new_dir) &&
855 !fscrypt_has_encryption_key(new_dir)))
856 return -ENOKEY;
857
846 if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && 858 if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
847 (old_dir != new_dir) && 859 (old_dir != new_dir) &&
848 (!fscrypt_has_permitted_context(new_dir, old_inode) || 860 (!fscrypt_has_permitted_context(new_dir, old_inode) ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9078fdb3743..94967171dee8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -245,12 +245,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
245 return need_update; 245 return need_update;
246} 246}
247 247
248static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 248static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
249 bool no_fail)
249{ 250{
250 struct nat_entry *new; 251 struct nat_entry *new;
251 252
252 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); 253 if (no_fail) {
253 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 254 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
255 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
256 } else {
257 new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
258 if (!new)
259 return NULL;
260 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
261 kmem_cache_free(nat_entry_slab, new);
262 return NULL;
263 }
264 }
265
254 memset(new, 0, sizeof(struct nat_entry)); 266 memset(new, 0, sizeof(struct nat_entry));
255 nat_set_nid(new, nid); 267 nat_set_nid(new, nid);
256 nat_reset_flag(new); 268 nat_reset_flag(new);
@@ -267,8 +279,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
267 279
268 e = __lookup_nat_cache(nm_i, nid); 280 e = __lookup_nat_cache(nm_i, nid);
269 if (!e) { 281 if (!e) {
270 e = grab_nat_entry(nm_i, nid); 282 e = grab_nat_entry(nm_i, nid, false);
271 node_info_from_raw_nat(&e->ni, ne); 283 if (e)
284 node_info_from_raw_nat(&e->ni, ne);
272 } else { 285 } else {
273 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || 286 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
274 nat_get_blkaddr(e) != 287 nat_get_blkaddr(e) !=
@@ -286,7 +299,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
286 down_write(&nm_i->nat_tree_lock); 299 down_write(&nm_i->nat_tree_lock);
287 e = __lookup_nat_cache(nm_i, ni->nid); 300 e = __lookup_nat_cache(nm_i, ni->nid);
288 if (!e) { 301 if (!e) {
289 e = grab_nat_entry(nm_i, ni->nid); 302 e = grab_nat_entry(nm_i, ni->nid, true);
290 copy_node_info(&e->ni, ni); 303 copy_node_info(&e->ni, ni);
291 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 304 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
292 } else if (new_blkaddr == NEW_ADDR) { 305 } else if (new_blkaddr == NEW_ADDR) {
@@ -325,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
325 set_nat_flag(e, IS_CHECKPOINTED, false); 338 set_nat_flag(e, IS_CHECKPOINTED, false);
326 __set_nat_cache_dirty(nm_i, e); 339 __set_nat_cache_dirty(nm_i, e);
327 340
341 if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
342 clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
343
328 /* update fsync_mark if its inode nat entry is still alive */ 344 /* update fsync_mark if its inode nat entry is still alive */
329 if (ni->nid != ni->ino) 345 if (ni->nid != ni->ino)
330 e = __lookup_nat_cache(nm_i, ni->ino); 346 e = __lookup_nat_cache(nm_i, ni->ino);
@@ -958,9 +974,6 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
958 974
959 f2fs_i_xnid_write(inode, 0); 975 f2fs_i_xnid_write(inode, 0);
960 976
961 /* need to do checkpoint during fsync */
962 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
963
964 set_new_dnode(&dn, inode, page, npage, nid); 977 set_new_dnode(&dn, inode, page, npage, nid);
965 978
966 if (page) 979 if (page)
@@ -1018,7 +1031,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
1018 unsigned int ofs, struct page *ipage) 1031 unsigned int ofs, struct page *ipage)
1019{ 1032{
1020 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1033 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1021 struct node_info old_ni, new_ni; 1034 struct node_info new_ni;
1022 struct page *page; 1035 struct page *page;
1023 int err; 1036 int err;
1024 1037
@@ -1033,13 +1046,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
1033 err = -ENOSPC; 1046 err = -ENOSPC;
1034 goto fail; 1047 goto fail;
1035 } 1048 }
1036 1049#ifdef CONFIG_F2FS_CHECK_FS
1037 get_node_info(sbi, dn->nid, &old_ni); 1050 get_node_info(sbi, dn->nid, &new_ni);
1038 1051 f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
1039 /* Reinitialize old_ni with new node page */ 1052#endif
1040 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); 1053 new_ni.nid = dn->nid;
1041 new_ni = old_ni;
1042 new_ni.ino = dn->inode->i_ino; 1054 new_ni.ino = dn->inode->i_ino;
1055 new_ni.blk_addr = NULL_ADDR;
1056 new_ni.flag = 0;
1057 new_ni.version = 0;
1043 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1058 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1044 1059
1045 f2fs_wait_on_page_writeback(page, NODE, true); 1060 f2fs_wait_on_page_writeback(page, NODE, true);
@@ -1305,16 +1320,99 @@ continue_unlock:
1305 return last_page; 1320 return last_page;
1306} 1321}
1307 1322
1323static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1324 struct writeback_control *wbc)
1325{
1326 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1327 nid_t nid;
1328 struct node_info ni;
1329 struct f2fs_io_info fio = {
1330 .sbi = sbi,
1331 .type = NODE,
1332 .op = REQ_OP_WRITE,
1333 .op_flags = wbc_to_write_flags(wbc),
1334 .page = page,
1335 .encrypted_page = NULL,
1336 .submitted = false,
1337 };
1338
1339 trace_f2fs_writepage(page, NODE);
1340
1341 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1342 goto redirty_out;
1343 if (unlikely(f2fs_cp_error(sbi)))
1344 goto redirty_out;
1345
1346 /* get old block addr of this node page */
1347 nid = nid_of_node(page);
1348 f2fs_bug_on(sbi, page->index != nid);
1349
1350 if (wbc->for_reclaim) {
1351 if (!down_read_trylock(&sbi->node_write))
1352 goto redirty_out;
1353 } else {
1354 down_read(&sbi->node_write);
1355 }
1356
1357 get_node_info(sbi, nid, &ni);
1358
1359 /* This page is already truncated */
1360 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1361 ClearPageUptodate(page);
1362 dec_page_count(sbi, F2FS_DIRTY_NODES);
1363 up_read(&sbi->node_write);
1364 unlock_page(page);
1365 return 0;
1366 }
1367
1368 if (atomic && !test_opt(sbi, NOBARRIER))
1369 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1370
1371 set_page_writeback(page);
1372 fio.old_blkaddr = ni.blk_addr;
1373 write_node_page(nid, &fio);
1374 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1375 dec_page_count(sbi, F2FS_DIRTY_NODES);
1376 up_read(&sbi->node_write);
1377
1378 if (wbc->for_reclaim) {
1379 f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
1380 page->index, NODE, WRITE);
1381 submitted = NULL;
1382 }
1383
1384 unlock_page(page);
1385
1386 if (unlikely(f2fs_cp_error(sbi))) {
1387 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1388 submitted = NULL;
1389 }
1390 if (submitted)
1391 *submitted = fio.submitted;
1392
1393 return 0;
1394
1395redirty_out:
1396 redirty_page_for_writepage(wbc, page);
1397 return AOP_WRITEPAGE_ACTIVATE;
1398}
1399
1400static int f2fs_write_node_page(struct page *page,
1401 struct writeback_control *wbc)
1402{
1403 return __write_node_page(page, false, NULL, wbc);
1404}
1405
1308int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1406int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1309 struct writeback_control *wbc, bool atomic) 1407 struct writeback_control *wbc, bool atomic)
1310{ 1408{
1311 pgoff_t index, end; 1409 pgoff_t index, end;
1410 pgoff_t last_idx = ULONG_MAX;
1312 struct pagevec pvec; 1411 struct pagevec pvec;
1313 int ret = 0; 1412 int ret = 0;
1314 struct page *last_page = NULL; 1413 struct page *last_page = NULL;
1315 bool marked = false; 1414 bool marked = false;
1316 nid_t ino = inode->i_ino; 1415 nid_t ino = inode->i_ino;
1317 int nwritten = 0;
1318 1416
1319 if (atomic) { 1417 if (atomic) {
1320 last_page = last_fsync_dnode(sbi, ino); 1418 last_page = last_fsync_dnode(sbi, ino);
@@ -1336,6 +1434,7 @@ retry:
1336 1434
1337 for (i = 0; i < nr_pages; i++) { 1435 for (i = 0; i < nr_pages; i++) {
1338 struct page *page = pvec.pages[i]; 1436 struct page *page = pvec.pages[i];
1437 bool submitted = false;
1339 1438
1340 if (unlikely(f2fs_cp_error(sbi))) { 1439 if (unlikely(f2fs_cp_error(sbi))) {
1341 f2fs_put_page(last_page, 0); 1440 f2fs_put_page(last_page, 0);
@@ -1384,13 +1483,15 @@ continue_unlock:
1384 if (!clear_page_dirty_for_io(page)) 1483 if (!clear_page_dirty_for_io(page))
1385 goto continue_unlock; 1484 goto continue_unlock;
1386 1485
1387 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1486 ret = __write_node_page(page, atomic &&
1487 page == last_page,
1488 &submitted, wbc);
1388 if (ret) { 1489 if (ret) {
1389 unlock_page(page); 1490 unlock_page(page);
1390 f2fs_put_page(last_page, 0); 1491 f2fs_put_page(last_page, 0);
1391 break; 1492 break;
1392 } else { 1493 } else if (submitted) {
1393 nwritten++; 1494 last_idx = page->index;
1394 } 1495 }
1395 1496
1396 if (page == last_page) { 1497 if (page == last_page) {
@@ -1416,8 +1517,9 @@ continue_unlock:
1416 goto retry; 1517 goto retry;
1417 } 1518 }
1418out: 1519out:
1419 if (nwritten) 1520 if (last_idx != ULONG_MAX)
1420 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); 1521 f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
1522 NODE, WRITE);
1421 return ret ? -EIO: 0; 1523 return ret ? -EIO: 0;
1422} 1524}
1423 1525
@@ -1445,6 +1547,7 @@ next_step:
1445 1547
1446 for (i = 0; i < nr_pages; i++) { 1548 for (i = 0; i < nr_pages; i++) {
1447 struct page *page = pvec.pages[i]; 1549 struct page *page = pvec.pages[i];
1550 bool submitted = false;
1448 1551
1449 if (unlikely(f2fs_cp_error(sbi))) { 1552 if (unlikely(f2fs_cp_error(sbi))) {
1450 pagevec_release(&pvec); 1553 pagevec_release(&pvec);
@@ -1498,9 +1601,10 @@ continue_unlock:
1498 set_fsync_mark(page, 0); 1601 set_fsync_mark(page, 0);
1499 set_dentry_mark(page, 0); 1602 set_dentry_mark(page, 0);
1500 1603
1501 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1604 ret = __write_node_page(page, false, &submitted, wbc);
1605 if (ret)
1502 unlock_page(page); 1606 unlock_page(page);
1503 else 1607 else if (submitted)
1504 nwritten++; 1608 nwritten++;
1505 1609
1506 if (--wbc->nr_to_write == 0) 1610 if (--wbc->nr_to_write == 0)
@@ -1564,72 +1668,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1564 return ret; 1668 return ret;
1565} 1669}
1566 1670
1567static int f2fs_write_node_page(struct page *page,
1568 struct writeback_control *wbc)
1569{
1570 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1571 nid_t nid;
1572 struct node_info ni;
1573 struct f2fs_io_info fio = {
1574 .sbi = sbi,
1575 .type = NODE,
1576 .op = REQ_OP_WRITE,
1577 .op_flags = wbc_to_write_flags(wbc),
1578 .page = page,
1579 .encrypted_page = NULL,
1580 };
1581
1582 trace_f2fs_writepage(page, NODE);
1583
1584 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1585 goto redirty_out;
1586 if (unlikely(f2fs_cp_error(sbi)))
1587 goto redirty_out;
1588
1589 /* get old block addr of this node page */
1590 nid = nid_of_node(page);
1591 f2fs_bug_on(sbi, page->index != nid);
1592
1593 if (wbc->for_reclaim) {
1594 if (!down_read_trylock(&sbi->node_write))
1595 goto redirty_out;
1596 } else {
1597 down_read(&sbi->node_write);
1598 }
1599
1600 get_node_info(sbi, nid, &ni);
1601
1602 /* This page is already truncated */
1603 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1604 ClearPageUptodate(page);
1605 dec_page_count(sbi, F2FS_DIRTY_NODES);
1606 up_read(&sbi->node_write);
1607 unlock_page(page);
1608 return 0;
1609 }
1610
1611 set_page_writeback(page);
1612 fio.old_blkaddr = ni.blk_addr;
1613 write_node_page(nid, &fio);
1614 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1615 dec_page_count(sbi, F2FS_DIRTY_NODES);
1616 up_read(&sbi->node_write);
1617
1618 if (wbc->for_reclaim)
1619 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
1620
1621 unlock_page(page);
1622
1623 if (unlikely(f2fs_cp_error(sbi)))
1624 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1625
1626 return 0;
1627
1628redirty_out:
1629 redirty_page_for_writepage(wbc, page);
1630 return AOP_WRITEPAGE_ACTIVATE;
1631}
1632
1633static int f2fs_write_node_pages(struct address_space *mapping, 1671static int f2fs_write_node_pages(struct address_space *mapping,
1634 struct writeback_control *wbc) 1672 struct writeback_control *wbc)
1635{ 1673{
@@ -1727,7 +1765,8 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
1727 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1765 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1728} 1766}
1729 1767
1730static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1768/* return if the nid is recognized as free */
1769static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1731{ 1770{
1732 struct f2fs_nm_info *nm_i = NM_I(sbi); 1771 struct f2fs_nm_info *nm_i = NM_I(sbi);
1733 struct free_nid *i; 1772 struct free_nid *i;
@@ -1736,14 +1775,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1736 1775
1737 /* 0 nid should not be used */ 1776 /* 0 nid should not be used */
1738 if (unlikely(nid == 0)) 1777 if (unlikely(nid == 0))
1739 return 0; 1778 return false;
1740 1779
1741 if (build) { 1780 if (build) {
1742 /* do not add allocated nids */ 1781 /* do not add allocated nids */
1743 ne = __lookup_nat_cache(nm_i, nid); 1782 ne = __lookup_nat_cache(nm_i, nid);
1744 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1783 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1745 nat_get_blkaddr(ne) != NULL_ADDR)) 1784 nat_get_blkaddr(ne) != NULL_ADDR))
1746 return 0; 1785 return false;
1747 } 1786 }
1748 1787
1749 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1788 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1752,7 +1791,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1752 1791
1753 if (radix_tree_preload(GFP_NOFS)) { 1792 if (radix_tree_preload(GFP_NOFS)) {
1754 kmem_cache_free(free_nid_slab, i); 1793 kmem_cache_free(free_nid_slab, i);
1755 return 0; 1794 return true;
1756 } 1795 }
1757 1796
1758 spin_lock(&nm_i->nid_list_lock); 1797 spin_lock(&nm_i->nid_list_lock);
@@ -1761,9 +1800,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1761 radix_tree_preload_end(); 1800 radix_tree_preload_end();
1762 if (err) { 1801 if (err) {
1763 kmem_cache_free(free_nid_slab, i); 1802 kmem_cache_free(free_nid_slab, i);
1764 return 0; 1803 return true;
1765 } 1804 }
1766 return 1; 1805 return true;
1767} 1806}
1768 1807
1769static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) 1808static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1784,17 +1823,36 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1784 kmem_cache_free(free_nid_slab, i); 1823 kmem_cache_free(free_nid_slab, i);
1785} 1824}
1786 1825
1826void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
1827{
1828 struct f2fs_nm_info *nm_i = NM_I(sbi);
1829 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
1830 unsigned int nid_ofs = nid - START_NID(nid);
1831
1832 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1833 return;
1834
1835 if (set)
1836 set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1837 else
1838 clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1839}
1840
1787static void scan_nat_page(struct f2fs_sb_info *sbi, 1841static void scan_nat_page(struct f2fs_sb_info *sbi,
1788 struct page *nat_page, nid_t start_nid) 1842 struct page *nat_page, nid_t start_nid)
1789{ 1843{
1790 struct f2fs_nm_info *nm_i = NM_I(sbi); 1844 struct f2fs_nm_info *nm_i = NM_I(sbi);
1791 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1845 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1792 block_t blk_addr; 1846 block_t blk_addr;
1847 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
1793 int i; 1848 int i;
1794 1849
1850 set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
1851
1795 i = start_nid % NAT_ENTRY_PER_BLOCK; 1852 i = start_nid % NAT_ENTRY_PER_BLOCK;
1796 1853
1797 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1854 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1855 bool freed = false;
1798 1856
1799 if (unlikely(start_nid >= nm_i->max_nid)) 1857 if (unlikely(start_nid >= nm_i->max_nid))
1800 break; 1858 break;
@@ -1802,11 +1860,106 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1802 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1860 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1803 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1861 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1804 if (blk_addr == NULL_ADDR) 1862 if (blk_addr == NULL_ADDR)
1805 add_free_nid(sbi, start_nid, true); 1863 freed = add_free_nid(sbi, start_nid, true);
1864 update_free_nid_bitmap(sbi, start_nid, freed);
1865 }
1866}
1867
1868static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1869{
1870 struct f2fs_nm_info *nm_i = NM_I(sbi);
1871 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1872 struct f2fs_journal *journal = curseg->journal;
1873 unsigned int i, idx;
1874
1875 down_read(&nm_i->nat_tree_lock);
1876
1877 for (i = 0; i < nm_i->nat_blocks; i++) {
1878 if (!test_bit_le(i, nm_i->nat_block_bitmap))
1879 continue;
1880 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1881 nid_t nid;
1882
1883 if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
1884 continue;
1885
1886 nid = i * NAT_ENTRY_PER_BLOCK + idx;
1887 add_free_nid(sbi, nid, true);
1888
1889 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1890 goto out;
1891 }
1892 }
1893out:
1894 down_read(&curseg->journal_rwsem);
1895 for (i = 0; i < nats_in_cursum(journal); i++) {
1896 block_t addr;
1897 nid_t nid;
1898
1899 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1900 nid = le32_to_cpu(nid_in_journal(journal, i));
1901 if (addr == NULL_ADDR)
1902 add_free_nid(sbi, nid, true);
1903 else
1904 remove_free_nid(sbi, nid);
1806 } 1905 }
1906 up_read(&curseg->journal_rwsem);
1907 up_read(&nm_i->nat_tree_lock);
1807} 1908}
1808 1909
1809static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync) 1910static int scan_nat_bits(struct f2fs_sb_info *sbi)
1911{
1912 struct f2fs_nm_info *nm_i = NM_I(sbi);
1913 struct page *page;
1914 unsigned int i = 0;
1915 nid_t nid;
1916
1917 if (!enabled_nat_bits(sbi, NULL))
1918 return -EAGAIN;
1919
1920 down_read(&nm_i->nat_tree_lock);
1921check_empty:
1922 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
1923 if (i >= nm_i->nat_blocks) {
1924 i = 0;
1925 goto check_partial;
1926 }
1927
1928 for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
1929 nid++) {
1930 if (unlikely(nid >= nm_i->max_nid))
1931 break;
1932 add_free_nid(sbi, nid, true);
1933 }
1934
1935 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1936 goto out;
1937 i++;
1938 goto check_empty;
1939
1940check_partial:
1941 i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
1942 if (i >= nm_i->nat_blocks) {
1943 disable_nat_bits(sbi, true);
1944 up_read(&nm_i->nat_tree_lock);
1945 return -EINVAL;
1946 }
1947
1948 nid = i * NAT_ENTRY_PER_BLOCK;
1949 page = get_current_nat_page(sbi, nid);
1950 scan_nat_page(sbi, page, nid);
1951 f2fs_put_page(page, 1);
1952
1953 if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
1954 i++;
1955 goto check_partial;
1956 }
1957out:
1958 up_read(&nm_i->nat_tree_lock);
1959 return 0;
1960}
1961
1962static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1810{ 1963{
1811 struct f2fs_nm_info *nm_i = NM_I(sbi); 1964 struct f2fs_nm_info *nm_i = NM_I(sbi);
1812 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1965 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1821,6 +1974,29 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1821 if (!sync && !available_free_memory(sbi, FREE_NIDS)) 1974 if (!sync && !available_free_memory(sbi, FREE_NIDS))
1822 return; 1975 return;
1823 1976
1977 if (!mount) {
1978 /* try to find free nids in free_nid_bitmap */
1979 scan_free_nid_bits(sbi);
1980
1981 if (nm_i->nid_cnt[FREE_NID_LIST])
1982 return;
1983
1984 /* try to find free nids with nat_bits */
1985 if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
1986 return;
1987 }
1988
1989 /* find next valid candidate */
1990 if (enabled_nat_bits(sbi, NULL)) {
1991 int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
1992 nm_i->nat_blocks, 0);
1993
1994 if (idx >= nm_i->nat_blocks)
1995 set_sbi_flag(sbi, SBI_NEED_FSCK);
1996 else
1997 nid = idx * NAT_ENTRY_PER_BLOCK;
1998 }
1999
1824 /* readahead nat pages to be scanned */ 2000 /* readahead nat pages to be scanned */
1825 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 2001 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
1826 META_NAT, true); 2002 META_NAT, true);
@@ -1863,10 +2039,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1863 nm_i->ra_nid_pages, META_NAT, false); 2039 nm_i->ra_nid_pages, META_NAT, false);
1864} 2040}
1865 2041
1866void build_free_nids(struct f2fs_sb_info *sbi, bool sync) 2042void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1867{ 2043{
1868 mutex_lock(&NM_I(sbi)->build_lock); 2044 mutex_lock(&NM_I(sbi)->build_lock);
1869 __build_free_nids(sbi, sync); 2045 __build_free_nids(sbi, sync, mount);
1870 mutex_unlock(&NM_I(sbi)->build_lock); 2046 mutex_unlock(&NM_I(sbi)->build_lock);
1871} 2047}
1872 2048
@@ -1881,8 +2057,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1881 struct free_nid *i = NULL; 2057 struct free_nid *i = NULL;
1882retry: 2058retry:
1883#ifdef CONFIG_F2FS_FAULT_INJECTION 2059#ifdef CONFIG_F2FS_FAULT_INJECTION
1884 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 2060 if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
2061 f2fs_show_injection_info(FAULT_ALLOC_NID);
1885 return false; 2062 return false;
2063 }
1886#endif 2064#endif
1887 spin_lock(&nm_i->nid_list_lock); 2065 spin_lock(&nm_i->nid_list_lock);
1888 2066
@@ -1902,13 +2080,16 @@ retry:
1902 i->state = NID_ALLOC; 2080 i->state = NID_ALLOC;
1903 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); 2081 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
1904 nm_i->available_nids--; 2082 nm_i->available_nids--;
2083
2084 update_free_nid_bitmap(sbi, *nid, false);
2085
1905 spin_unlock(&nm_i->nid_list_lock); 2086 spin_unlock(&nm_i->nid_list_lock);
1906 return true; 2087 return true;
1907 } 2088 }
1908 spin_unlock(&nm_i->nid_list_lock); 2089 spin_unlock(&nm_i->nid_list_lock);
1909 2090
1910 /* Let's scan nat pages and its caches to get free nids */ 2091 /* Let's scan nat pages and its caches to get free nids */
1911 build_free_nids(sbi, true); 2092 build_free_nids(sbi, true, false);
1912 goto retry; 2093 goto retry;
1913} 2094}
1914 2095
@@ -1956,6 +2137,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1956 2137
1957 nm_i->available_nids++; 2138 nm_i->available_nids++;
1958 2139
2140 update_free_nid_bitmap(sbi, nid, true);
2141
1959 spin_unlock(&nm_i->nid_list_lock); 2142 spin_unlock(&nm_i->nid_list_lock);
1960 2143
1961 if (need_free) 2144 if (need_free)
@@ -2018,18 +2201,18 @@ update_inode:
2018 f2fs_put_page(ipage, 1); 2201 f2fs_put_page(ipage, 1);
2019} 2202}
2020 2203
2021void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 2204int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2022{ 2205{
2023 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2206 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2024 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 2207 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2025 nid_t new_xnid = nid_of_node(page); 2208 nid_t new_xnid = nid_of_node(page);
2026 struct node_info ni; 2209 struct node_info ni;
2210 struct page *xpage;
2027 2211
2028 /* 1: invalidate the previous xattr nid */
2029 if (!prev_xnid) 2212 if (!prev_xnid)
2030 goto recover_xnid; 2213 goto recover_xnid;
2031 2214
2032 /* Deallocate node address */ 2215 /* 1: invalidate the previous xattr nid */
2033 get_node_info(sbi, prev_xnid, &ni); 2216 get_node_info(sbi, prev_xnid, &ni);
2034 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 2217 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
2035 invalidate_blocks(sbi, ni.blk_addr); 2218 invalidate_blocks(sbi, ni.blk_addr);
@@ -2037,19 +2220,27 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2037 set_node_addr(sbi, &ni, NULL_ADDR, false); 2220 set_node_addr(sbi, &ni, NULL_ADDR, false);
2038 2221
2039recover_xnid: 2222recover_xnid:
2040 /* 2: allocate new xattr nid */ 2223 /* 2: update xattr nid in inode */
2224 remove_free_nid(sbi, new_xnid);
2225 f2fs_i_xnid_write(inode, new_xnid);
2041 if (unlikely(!inc_valid_node_count(sbi, inode))) 2226 if (unlikely(!inc_valid_node_count(sbi, inode)))
2042 f2fs_bug_on(sbi, 1); 2227 f2fs_bug_on(sbi, 1);
2228 update_inode_page(inode);
2229
2230 /* 3: update and set xattr node page dirty */
2231 xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid);
2232 if (!xpage)
2233 return -ENOMEM;
2234
2235 memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE);
2043 2236
2044 remove_free_nid(sbi, new_xnid);
2045 get_node_info(sbi, new_xnid, &ni); 2237 get_node_info(sbi, new_xnid, &ni);
2046 ni.ino = inode->i_ino; 2238 ni.ino = inode->i_ino;
2047 set_node_addr(sbi, &ni, NEW_ADDR, false); 2239 set_node_addr(sbi, &ni, NEW_ADDR, false);
2048 f2fs_i_xnid_write(inode, new_xnid); 2240 set_page_dirty(xpage);
2241 f2fs_put_page(xpage, 1);
2049 2242
2050 /* 3: update xattr blkaddr */ 2243 return 0;
2051 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
2052 set_node_addr(sbi, &ni, blkaddr, false);
2053} 2244}
2054 2245
2055int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 2246int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2152,7 +2343,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2152 2343
2153 ne = __lookup_nat_cache(nm_i, nid); 2344 ne = __lookup_nat_cache(nm_i, nid);
2154 if (!ne) { 2345 if (!ne) {
2155 ne = grab_nat_entry(nm_i, nid); 2346 ne = grab_nat_entry(nm_i, nid, true);
2156 node_info_from_raw_nat(&ne->ni, &raw_ne); 2347 node_info_from_raw_nat(&ne->ni, &raw_ne);
2157 } 2348 }
2158 2349
@@ -2192,8 +2383,39 @@ add_out:
2192 list_add_tail(&nes->set_list, head); 2383 list_add_tail(&nes->set_list, head);
2193} 2384}
2194 2385
2386void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2387 struct page *page)
2388{
2389 struct f2fs_nm_info *nm_i = NM_I(sbi);
2390 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2391 struct f2fs_nat_block *nat_blk = page_address(page);
2392 int valid = 0;
2393 int i;
2394
2395 if (!enabled_nat_bits(sbi, NULL))
2396 return;
2397
2398 for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
2399 if (start_nid == 0 && i == 0)
2400 valid++;
2401 if (nat_blk->entries[i].block_addr)
2402 valid++;
2403 }
2404 if (valid == 0) {
2405 set_bit_le(nat_index, nm_i->empty_nat_bits);
2406 clear_bit_le(nat_index, nm_i->full_nat_bits);
2407 return;
2408 }
2409
2410 clear_bit_le(nat_index, nm_i->empty_nat_bits);
2411 if (valid == NAT_ENTRY_PER_BLOCK)
2412 set_bit_le(nat_index, nm_i->full_nat_bits);
2413 else
2414 clear_bit_le(nat_index, nm_i->full_nat_bits);
2415}
2416
2195static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2417static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2196 struct nat_entry_set *set) 2418 struct nat_entry_set *set, struct cp_control *cpc)
2197{ 2419{
2198 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2420 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2199 struct f2fs_journal *journal = curseg->journal; 2421 struct f2fs_journal *journal = curseg->journal;
@@ -2208,7 +2430,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2208 * #1, flush nat entries to journal in current hot data summary block. 2430 * #1, flush nat entries to journal in current hot data summary block.
2209 * #2, flush nat entries to nat page. 2431 * #2, flush nat entries to nat page.
2210 */ 2432 */
2211 if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 2433 if (enabled_nat_bits(sbi, cpc) ||
2434 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
2212 to_journal = false; 2435 to_journal = false;
2213 2436
2214 if (to_journal) { 2437 if (to_journal) {
@@ -2244,14 +2467,21 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2244 add_free_nid(sbi, nid, false); 2467 add_free_nid(sbi, nid, false);
2245 spin_lock(&NM_I(sbi)->nid_list_lock); 2468 spin_lock(&NM_I(sbi)->nid_list_lock);
2246 NM_I(sbi)->available_nids++; 2469 NM_I(sbi)->available_nids++;
2470 update_free_nid_bitmap(sbi, nid, true);
2471 spin_unlock(&NM_I(sbi)->nid_list_lock);
2472 } else {
2473 spin_lock(&NM_I(sbi)->nid_list_lock);
2474 update_free_nid_bitmap(sbi, nid, false);
2247 spin_unlock(&NM_I(sbi)->nid_list_lock); 2475 spin_unlock(&NM_I(sbi)->nid_list_lock);
2248 } 2476 }
2249 } 2477 }
2250 2478
2251 if (to_journal) 2479 if (to_journal) {
2252 up_write(&curseg->journal_rwsem); 2480 up_write(&curseg->journal_rwsem);
2253 else 2481 } else {
2482 __update_nat_bits(sbi, start_nid, page);
2254 f2fs_put_page(page, 1); 2483 f2fs_put_page(page, 1);
2484 }
2255 2485
2256 f2fs_bug_on(sbi, set->entry_cnt); 2486 f2fs_bug_on(sbi, set->entry_cnt);
2257 2487
@@ -2262,7 +2492,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2262/* 2492/*
2263 * This function is called during the checkpointing process. 2493 * This function is called during the checkpointing process.
2264 */ 2494 */
2265void flush_nat_entries(struct f2fs_sb_info *sbi) 2495void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2266{ 2496{
2267 struct f2fs_nm_info *nm_i = NM_I(sbi); 2497 struct f2fs_nm_info *nm_i = NM_I(sbi);
2268 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2498 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2283,7 +2513,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2283 * entries, remove all entries from journal and merge them 2513 * entries, remove all entries from journal and merge them
2284 * into nat entry set. 2514 * into nat entry set.
2285 */ 2515 */
2286 if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 2516 if (enabled_nat_bits(sbi, cpc) ||
2517 !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
2287 remove_nats_in_journal(sbi); 2518 remove_nats_in_journal(sbi);
2288 2519
2289 while ((found = __gang_lookup_nat_set(nm_i, 2520 while ((found = __gang_lookup_nat_set(nm_i,
@@ -2297,27 +2528,69 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2297 2528
2298 /* flush dirty nats in nat entry set */ 2529 /* flush dirty nats in nat entry set */
2299 list_for_each_entry_safe(set, tmp, &sets, set_list) 2530 list_for_each_entry_safe(set, tmp, &sets, set_list)
2300 __flush_nat_entry_set(sbi, set); 2531 __flush_nat_entry_set(sbi, set, cpc);
2301 2532
2302 up_write(&nm_i->nat_tree_lock); 2533 up_write(&nm_i->nat_tree_lock);
2303 2534
2304 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2535 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
2305} 2536}
2306 2537
2538static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
2539{
2540 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2541 struct f2fs_nm_info *nm_i = NM_I(sbi);
2542 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
2543 unsigned int i;
2544 __u64 cp_ver = cur_cp_version(ckpt);
2545 block_t nat_bits_addr;
2546
2547 if (!enabled_nat_bits(sbi, NULL))
2548 return 0;
2549
2550 nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
2551 F2FS_BLKSIZE - 1);
2552 nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
2553 GFP_KERNEL);
2554 if (!nm_i->nat_bits)
2555 return -ENOMEM;
2556
2557 nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
2558 nm_i->nat_bits_blocks;
2559 for (i = 0; i < nm_i->nat_bits_blocks; i++) {
2560 struct page *page = get_meta_page(sbi, nat_bits_addr++);
2561
2562 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
2563 page_address(page), F2FS_BLKSIZE);
2564 f2fs_put_page(page, 1);
2565 }
2566
2567 cp_ver |= (cur_cp_crc(ckpt) << 32);
2568 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
2569 disable_nat_bits(sbi, true);
2570 return 0;
2571 }
2572
2573 nm_i->full_nat_bits = nm_i->nat_bits + 8;
2574 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
2575
2576 f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
2577 return 0;
2578}
2579
2307static int init_node_manager(struct f2fs_sb_info *sbi) 2580static int init_node_manager(struct f2fs_sb_info *sbi)
2308{ 2581{
2309 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2582 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
2310 struct f2fs_nm_info *nm_i = NM_I(sbi); 2583 struct f2fs_nm_info *nm_i = NM_I(sbi);
2311 unsigned char *version_bitmap; 2584 unsigned char *version_bitmap;
2312 unsigned int nat_segs, nat_blocks; 2585 unsigned int nat_segs;
2586 int err;
2313 2587
2314 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 2588 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
2315 2589
2316 /* segment_count_nat includes pair segment so divide to 2. */ 2590 /* segment_count_nat includes pair segment so divide to 2. */
2317 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 2591 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2318 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 2592 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
2319 2593 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
2320 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
2321 2594
2322 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2595 /* not used nids: 0, node, meta, (and root counted as valid node) */
2323 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - 2596 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2350,6 +2623,34 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2350 GFP_KERNEL); 2623 GFP_KERNEL);
2351 if (!nm_i->nat_bitmap) 2624 if (!nm_i->nat_bitmap)
2352 return -ENOMEM; 2625 return -ENOMEM;
2626
2627 err = __get_nat_bitmaps(sbi);
2628 if (err)
2629 return err;
2630
2631#ifdef CONFIG_F2FS_CHECK_FS
2632 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
2633 GFP_KERNEL);
2634 if (!nm_i->nat_bitmap_mir)
2635 return -ENOMEM;
2636#endif
2637
2638 return 0;
2639}
2640
2641int init_free_nid_cache(struct f2fs_sb_info *sbi)
2642{
2643 struct f2fs_nm_info *nm_i = NM_I(sbi);
2644
2645 nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
2646 NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
2647 if (!nm_i->free_nid_bitmap)
2648 return -ENOMEM;
2649
2650 nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
2651 GFP_KERNEL);
2652 if (!nm_i->nat_block_bitmap)
2653 return -ENOMEM;
2353 return 0; 2654 return 0;
2354} 2655}
2355 2656
@@ -2365,7 +2666,11 @@ int build_node_manager(struct f2fs_sb_info *sbi)
2365 if (err) 2666 if (err)
2366 return err; 2667 return err;
2367 2668
2368 build_free_nids(sbi, true); 2669 err = init_free_nid_cache(sbi);
2670 if (err)
2671 return err;
2672
2673 build_free_nids(sbi, true, true);
2369 return 0; 2674 return 0;
2370} 2675}
2371 2676
@@ -2423,7 +2728,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2423 } 2728 }
2424 up_write(&nm_i->nat_tree_lock); 2729 up_write(&nm_i->nat_tree_lock);
2425 2730
2731 kvfree(nm_i->nat_block_bitmap);
2732 kvfree(nm_i->free_nid_bitmap);
2733
2426 kfree(nm_i->nat_bitmap); 2734 kfree(nm_i->nat_bitmap);
2735 kfree(nm_i->nat_bits);
2736#ifdef CONFIG_F2FS_CHECK_FS
2737 kfree(nm_i->nat_bitmap_mir);
2738#endif
2427 sbi->nm_info = NULL; 2739 sbi->nm_info = NULL;
2428 kfree(nm_i); 2740 kfree(nm_i);
2429} 2741}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e7997e240366..2f9603fa85a5 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
174 spin_unlock(&nm_i->nid_list_lock); 174 spin_unlock(&nm_i->nid_list_lock);
175 return; 175 return;
176 } 176 }
177 fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next, 177 fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
178 struct free_nid, list); 178 struct free_nid, list);
179 *nid = fnid->nid; 179 *nid = fnid->nid;
180 spin_unlock(&nm_i->nid_list_lock); 180 spin_unlock(&nm_i->nid_list_lock);
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
186static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) 186static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
187{ 187{
188 struct f2fs_nm_info *nm_i = NM_I(sbi); 188 struct f2fs_nm_info *nm_i = NM_I(sbi);
189
190#ifdef CONFIG_F2FS_CHECK_FS
191 if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
192 nm_i->bitmap_size))
193 f2fs_bug_on(sbi, 1);
194#endif
189 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); 195 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
190} 196}
191 197
@@ -228,6 +234,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
228 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); 234 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
229 235
230 f2fs_change_bit(block_off, nm_i->nat_bitmap); 236 f2fs_change_bit(block_off, nm_i->nat_bitmap);
237#ifdef CONFIG_F2FS_CHECK_FS
238 f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
239#endif
231} 240}
232 241
233static inline nid_t ino_of_node(struct page *node_page) 242static inline nid_t ino_of_node(struct page *node_page)
@@ -291,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
291{ 300{
292 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 301 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
293 struct f2fs_node *rn = F2FS_NODE(page); 302 struct f2fs_node *rn = F2FS_NODE(page);
294 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); 303 __u64 cp_ver = cur_cp_version(ckpt);
295 __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); 304
305 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
306 cp_ver |= (cur_cp_crc(ckpt) << 32);
296 307
297 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
298 __u64 crc = le32_to_cpu(*((__le32 *)
299 ((unsigned char *)ckpt + crc_offset)));
300 cp_ver |= (crc << 32);
301 }
302 rn->footer.cp_ver = cpu_to_le64(cp_ver); 308 rn->footer.cp_ver = cpu_to_le64(cp_ver);
303 rn->footer.next_blkaddr = cpu_to_le32(blkaddr); 309 rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
304} 310}
@@ -306,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
306static inline bool is_recoverable_dnode(struct page *page) 312static inline bool is_recoverable_dnode(struct page *page)
307{ 313{
308 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 314 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
309 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
310 __u64 cp_ver = cur_cp_version(ckpt); 315 __u64 cp_ver = cur_cp_version(ckpt);
311 316
312 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { 317 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
313 __u64 crc = le32_to_cpu(*((__le32 *) 318 cp_ver |= (cur_cp_crc(ckpt) << 32);
314 ((unsigned char *)ckpt + crc_offset))); 319
315 cp_ver |= (crc << 32);
316 }
317 return cp_ver == cpver_of_node(page); 320 return cp_ver == cpver_of_node(page);
318} 321}
319 322
@@ -343,7 +346,7 @@ static inline bool IS_DNODE(struct page *node_page)
343 unsigned int ofs = ofs_of_node(node_page); 346 unsigned int ofs = ofs_of_node(node_page);
344 347
345 if (f2fs_has_xattr_block(ofs)) 348 if (f2fs_has_xattr_block(ofs))
346 return false; 349 return true;
347 350
348 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || 351 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
349 ofs == 5 + 2 * NIDS_PER_BLOCK) 352 ofs == 5 + 2 * NIDS_PER_BLOCK)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 981a9584b62f..d025aa83fb5b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
378 if (IS_INODE(page)) { 378 if (IS_INODE(page)) {
379 recover_inline_xattr(inode, page); 379 recover_inline_xattr(inode, page);
380 } else if (f2fs_has_xattr_block(ofs_of_node(page))) { 380 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
381 /* 381 err = recover_xattr_data(inode, page, blkaddr);
382 * Deprecated; xattr blocks should be found from cold log. 382 if (!err)
383 * But, we should remain this for backward compatibility. 383 recovered++;
384 */
385 recover_xattr_data(inode, page, blkaddr);
386 goto out; 384 goto out;
387 } 385 }
388 386
@@ -428,8 +426,9 @@ retry_dn:
428 } 426 }
429 427
430 if (!file_keep_isize(inode) && 428 if (!file_keep_isize(inode) &&
431 (i_size_read(inode) <= (start << PAGE_SHIFT))) 429 (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
432 f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); 430 f2fs_i_size_write(inode,
431 (loff_t)(start + 1) << PAGE_SHIFT);
433 432
434 /* 433 /*
435 * dest is reserved block, invalidate src block 434 * dest is reserved block, invalidate src block
@@ -552,10 +551,8 @@ next:
552 551
553int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) 552int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
554{ 553{
555 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
556 struct list_head inode_list; 554 struct list_head inode_list;
557 struct list_head dir_list; 555 struct list_head dir_list;
558 block_t blkaddr;
559 int err; 556 int err;
560 int ret = 0; 557 int ret = 0;
561 bool need_writecp = false; 558 bool need_writecp = false;
@@ -571,8 +568,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
571 /* prevent checkpoint */ 568 /* prevent checkpoint */
572 mutex_lock(&sbi->cp_mutex); 569 mutex_lock(&sbi->cp_mutex);
573 570
574 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
575
576 /* step #1: find fsynced inode numbers */ 571 /* step #1: find fsynced inode numbers */
577 err = find_fsync_dnodes(sbi, &inode_list); 572 err = find_fsync_dnodes(sbi, &inode_list);
578 if (err || list_empty(&inode_list)) 573 if (err || list_empty(&inode_list))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0d8802453758..4bd7a8b19332 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
26#define __reverse_ffz(x) __reverse_ffs(~(x)) 26#define __reverse_ffz(x) __reverse_ffs(~(x))
27 27
28static struct kmem_cache *discard_entry_slab; 28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *bio_entry_slab; 29static struct kmem_cache *discard_cmd_slab;
30static struct kmem_cache *sit_entry_set_slab; 30static struct kmem_cache *sit_entry_set_slab;
31static struct kmem_cache *inmem_entry_slab; 31static struct kmem_cache *inmem_entry_slab;
32 32
@@ -242,11 +242,12 @@ void drop_inmem_pages(struct inode *inode)
242{ 242{
243 struct f2fs_inode_info *fi = F2FS_I(inode); 243 struct f2fs_inode_info *fi = F2FS_I(inode);
244 244
245 clear_inode_flag(inode, FI_ATOMIC_FILE);
246
247 mutex_lock(&fi->inmem_lock); 245 mutex_lock(&fi->inmem_lock);
248 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 246 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
249 mutex_unlock(&fi->inmem_lock); 247 mutex_unlock(&fi->inmem_lock);
248
249 clear_inode_flag(inode, FI_ATOMIC_FILE);
250 stat_dec_atomic_write(inode);
250} 251}
251 252
252static int __commit_inmem_pages(struct inode *inode, 253static int __commit_inmem_pages(struct inode *inode,
@@ -262,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode,
262 .op_flags = REQ_SYNC | REQ_PRIO, 263 .op_flags = REQ_SYNC | REQ_PRIO,
263 .encrypted_page = NULL, 264 .encrypted_page = NULL,
264 }; 265 };
265 bool submit_bio = false; 266 pgoff_t last_idx = ULONG_MAX;
266 int err = 0; 267 int err = 0;
267 268
268 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 269 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
@@ -288,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode,
288 289
289 /* record old blkaddr for revoking */ 290 /* record old blkaddr for revoking */
290 cur->old_addr = fio.old_blkaddr; 291 cur->old_addr = fio.old_blkaddr;
291 292 last_idx = page->index;
292 submit_bio = true;
293 } 293 }
294 unlock_page(page); 294 unlock_page(page);
295 list_move_tail(&cur->list, revoke_list); 295 list_move_tail(&cur->list, revoke_list);
296 } 296 }
297 297
298 if (submit_bio) 298 if (last_idx != ULONG_MAX)
299 f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); 299 f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
300 DATA, WRITE);
300 301
301 if (!err) 302 if (!err)
302 __revoke_inmem_pages(inode, revoke_list, false, false); 303 __revoke_inmem_pages(inode, revoke_list, false, false);
@@ -315,6 +316,8 @@ int commit_inmem_pages(struct inode *inode)
315 f2fs_balance_fs(sbi, true); 316 f2fs_balance_fs(sbi, true);
316 f2fs_lock_op(sbi); 317 f2fs_lock_op(sbi);
317 318
319 set_inode_flag(inode, FI_ATOMIC_COMMIT);
320
318 mutex_lock(&fi->inmem_lock); 321 mutex_lock(&fi->inmem_lock);
319 err = __commit_inmem_pages(inode, &revoke_list); 322 err = __commit_inmem_pages(inode, &revoke_list);
320 if (err) { 323 if (err) {
@@ -336,6 +339,8 @@ int commit_inmem_pages(struct inode *inode)
336 } 339 }
337 mutex_unlock(&fi->inmem_lock); 340 mutex_unlock(&fi->inmem_lock);
338 341
342 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
343
339 f2fs_unlock_op(sbi); 344 f2fs_unlock_op(sbi);
340 return err; 345 return err;
341} 346}
@@ -347,8 +352,10 @@ int commit_inmem_pages(struct inode *inode)
347void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 352void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
348{ 353{
349#ifdef CONFIG_F2FS_FAULT_INJECTION 354#ifdef CONFIG_F2FS_FAULT_INJECTION
350 if (time_to_inject(sbi, FAULT_CHECKPOINT)) 355 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
356 f2fs_show_injection_info(FAULT_CHECKPOINT);
351 f2fs_stop_checkpoint(sbi, false); 357 f2fs_stop_checkpoint(sbi, false);
358 }
352#endif 359#endif
353 360
354 if (!need) 361 if (!need)
@@ -381,7 +388,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
381 if (!available_free_memory(sbi, FREE_NIDS)) 388 if (!available_free_memory(sbi, FREE_NIDS))
382 try_to_free_nids(sbi, MAX_FREE_NIDS); 389 try_to_free_nids(sbi, MAX_FREE_NIDS);
383 else 390 else
384 build_free_nids(sbi, false); 391 build_free_nids(sbi, false, false);
385 392
386 if (!is_idle(sbi)) 393 if (!is_idle(sbi))
387 return; 394 return;
@@ -423,6 +430,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
423 430
424 if (sbi->s_ndevs && !ret) { 431 if (sbi->s_ndevs && !ret) {
425 for (i = 1; i < sbi->s_ndevs; i++) { 432 for (i = 1; i < sbi->s_ndevs; i++) {
433 trace_f2fs_issue_flush(FDEV(i).bdev,
434 test_opt(sbi, NOBARRIER),
435 test_opt(sbi, FLUSH_MERGE));
426 ret = __submit_flush_wait(FDEV(i).bdev); 436 ret = __submit_flush_wait(FDEV(i).bdev);
427 if (ret) 437 if (ret)
428 break; 438 break;
@@ -434,7 +444,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
434static int issue_flush_thread(void *data) 444static int issue_flush_thread(void *data)
435{ 445{
436 struct f2fs_sb_info *sbi = data; 446 struct f2fs_sb_info *sbi = data;
437 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 447 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
438 wait_queue_head_t *q = &fcc->flush_wait_queue; 448 wait_queue_head_t *q = &fcc->flush_wait_queue;
439repeat: 449repeat:
440 if (kthread_should_stop()) 450 if (kthread_should_stop())
@@ -463,16 +473,16 @@ repeat:
463 473
464int f2fs_issue_flush(struct f2fs_sb_info *sbi) 474int f2fs_issue_flush(struct f2fs_sb_info *sbi)
465{ 475{
466 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 476 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
467 struct flush_cmd cmd; 477 struct flush_cmd cmd;
468 478
469 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
470 test_opt(sbi, FLUSH_MERGE));
471
472 if (test_opt(sbi, NOBARRIER)) 479 if (test_opt(sbi, NOBARRIER))
473 return 0; 480 return 0;
474 481
475 if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { 482 if (!test_opt(sbi, FLUSH_MERGE))
483 return submit_flush_wait(sbi);
484
485 if (!atomic_read(&fcc->submit_flush)) {
476 int ret; 486 int ret;
477 487
478 atomic_inc(&fcc->submit_flush); 488 atomic_inc(&fcc->submit_flush);
@@ -506,8 +516,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
506 struct flush_cmd_control *fcc; 516 struct flush_cmd_control *fcc;
507 int err = 0; 517 int err = 0;
508 518
509 if (SM_I(sbi)->cmd_control_info) { 519 if (SM_I(sbi)->fcc_info) {
510 fcc = SM_I(sbi)->cmd_control_info; 520 fcc = SM_I(sbi)->fcc_info;
511 goto init_thread; 521 goto init_thread;
512 } 522 }
513 523
@@ -517,14 +527,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
517 atomic_set(&fcc->submit_flush, 0); 527 atomic_set(&fcc->submit_flush, 0);
518 init_waitqueue_head(&fcc->flush_wait_queue); 528 init_waitqueue_head(&fcc->flush_wait_queue);
519 init_llist_head(&fcc->issue_list); 529 init_llist_head(&fcc->issue_list);
520 SM_I(sbi)->cmd_control_info = fcc; 530 SM_I(sbi)->fcc_info = fcc;
521init_thread: 531init_thread:
522 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 532 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
523 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 533 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
524 if (IS_ERR(fcc->f2fs_issue_flush)) { 534 if (IS_ERR(fcc->f2fs_issue_flush)) {
525 err = PTR_ERR(fcc->f2fs_issue_flush); 535 err = PTR_ERR(fcc->f2fs_issue_flush);
526 kfree(fcc); 536 kfree(fcc);
527 SM_I(sbi)->cmd_control_info = NULL; 537 SM_I(sbi)->fcc_info = NULL;
528 return err; 538 return err;
529 } 539 }
530 540
@@ -533,7 +543,7 @@ init_thread:
533 543
534void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 544void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
535{ 545{
536 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 546 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
537 547
538 if (fcc && fcc->f2fs_issue_flush) { 548 if (fcc && fcc->f2fs_issue_flush) {
539 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 549 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -543,7 +553,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
543 } 553 }
544 if (free) { 554 if (free) {
545 kfree(fcc); 555 kfree(fcc);
546 SM_I(sbi)->cmd_control_info = NULL; 556 SM_I(sbi)->fcc_info = NULL;
547 } 557 }
548} 558}
549 559
@@ -623,60 +633,144 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
623 mutex_unlock(&dirty_i->seglist_lock); 633 mutex_unlock(&dirty_i->seglist_lock);
624} 634}
625 635
626static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, 636static void __add_discard_cmd(struct f2fs_sb_info *sbi,
627 struct bio *bio) 637 struct bio *bio, block_t lstart, block_t len)
628{ 638{
629 struct list_head *wait_list = &(SM_I(sbi)->wait_list); 639 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
630 struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); 640 struct list_head *cmd_list = &(dcc->discard_cmd_list);
641 struct discard_cmd *dc;
631 642
632 INIT_LIST_HEAD(&be->list); 643 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
633 be->bio = bio; 644 INIT_LIST_HEAD(&dc->list);
634 init_completion(&be->event); 645 dc->bio = bio;
635 list_add_tail(&be->list, wait_list); 646 bio->bi_private = dc;
647 dc->lstart = lstart;
648 dc->len = len;
649 dc->state = D_PREP;
650 init_completion(&dc->wait);
636 651
637 return be; 652 mutex_lock(&dcc->cmd_lock);
653 list_add_tail(&dc->list, cmd_list);
654 mutex_unlock(&dcc->cmd_lock);
638} 655}
639 656
640void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) 657static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
641{ 658{
642 struct list_head *wait_list = &(SM_I(sbi)->wait_list); 659 int err = dc->bio->bi_error;
643 struct bio_entry *be, *tmp;
644 660
645 list_for_each_entry_safe(be, tmp, wait_list, list) { 661 if (dc->state == D_DONE)
646 struct bio *bio = be->bio; 662 atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
647 int err;
648 663
649 wait_for_completion_io(&be->event); 664 if (err == -EOPNOTSUPP)
650 err = be->error; 665 err = 0;
651 if (err == -EOPNOTSUPP)
652 err = 0;
653 666
654 if (err) 667 if (err)
655 f2fs_msg(sbi->sb, KERN_INFO, 668 f2fs_msg(sbi->sb, KERN_INFO,
656 "Issue discard failed, ret: %d", err); 669 "Issue discard failed, ret: %d", err);
670 bio_put(dc->bio);
671 list_del(&dc->list);
672 kmem_cache_free(discard_cmd_slab, dc);
673}
674
675/* This should be covered by global mutex, &sit_i->sentry_lock */
676void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
677{
678 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
679 struct list_head *wait_list = &(dcc->discard_cmd_list);
680 struct discard_cmd *dc, *tmp;
681 struct blk_plug plug;
682
683 mutex_lock(&dcc->cmd_lock);
657 684
658 bio_put(bio); 685 blk_start_plug(&plug);
659 list_del(&be->list); 686
660 kmem_cache_free(bio_entry_slab, be); 687 list_for_each_entry_safe(dc, tmp, wait_list, list) {
688
689 if (blkaddr == NULL_ADDR) {
690 if (dc->state == D_PREP) {
691 dc->state = D_SUBMIT;
692 submit_bio(dc->bio);
693 atomic_inc(&dcc->submit_discard);
694 }
695 continue;
696 }
697
698 if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
699 if (dc->state == D_SUBMIT)
700 wait_for_completion_io(&dc->wait);
701 else
702 __remove_discard_cmd(sbi, dc);
703 }
704 }
705 blk_finish_plug(&plug);
706
707 /* this comes from f2fs_put_super */
708 if (blkaddr == NULL_ADDR) {
709 list_for_each_entry_safe(dc, tmp, wait_list, list) {
710 wait_for_completion_io(&dc->wait);
711 __remove_discard_cmd(sbi, dc);
712 }
661 } 713 }
714 mutex_unlock(&dcc->cmd_lock);
715}
716
717static void f2fs_submit_discard_endio(struct bio *bio)
718{
719 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
720
721 complete(&dc->wait);
722 dc->state = D_DONE;
662} 723}
663 724
664static void f2fs_submit_bio_wait_endio(struct bio *bio) 725static int issue_discard_thread(void *data)
665{ 726{
666 struct bio_entry *be = (struct bio_entry *)bio->bi_private; 727 struct f2fs_sb_info *sbi = data;
728 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
729 wait_queue_head_t *q = &dcc->discard_wait_queue;
730 struct list_head *cmd_list = &dcc->discard_cmd_list;
731 struct discard_cmd *dc, *tmp;
732 struct blk_plug plug;
733 int iter = 0;
734repeat:
735 if (kthread_should_stop())
736 return 0;
737
738 blk_start_plug(&plug);
739
740 mutex_lock(&dcc->cmd_lock);
741 list_for_each_entry_safe(dc, tmp, cmd_list, list) {
742 if (dc->state == D_PREP) {
743 dc->state = D_SUBMIT;
744 submit_bio(dc->bio);
745 atomic_inc(&dcc->submit_discard);
746 if (iter++ > DISCARD_ISSUE_RATE)
747 break;
748 } else if (dc->state == D_DONE) {
749 __remove_discard_cmd(sbi, dc);
750 }
751 }
752 mutex_unlock(&dcc->cmd_lock);
753
754 blk_finish_plug(&plug);
755
756 iter = 0;
757 congestion_wait(BLK_RW_SYNC, HZ/50);
667 758
668 be->error = bio->bi_error; 759 wait_event_interruptible(*q,
669 complete(&be->event); 760 kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
761 goto repeat;
670} 762}
671 763
764
672/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 765/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
673static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, 766static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
674 struct block_device *bdev, block_t blkstart, block_t blklen) 767 struct block_device *bdev, block_t blkstart, block_t blklen)
675{ 768{
676 struct bio *bio = NULL; 769 struct bio *bio = NULL;
770 block_t lblkstart = blkstart;
677 int err; 771 int err;
678 772
679 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 773 trace_f2fs_issue_discard(bdev, blkstart, blklen);
680 774
681 if (sbi->s_ndevs) { 775 if (sbi->s_ndevs) {
682 int devi = f2fs_target_device_index(sbi, blkstart); 776 int devi = f2fs_target_device_index(sbi, blkstart);
@@ -688,14 +782,12 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
688 SECTOR_FROM_BLOCK(blklen), 782 SECTOR_FROM_BLOCK(blklen),
689 GFP_NOFS, 0, &bio); 783 GFP_NOFS, 0, &bio);
690 if (!err && bio) { 784 if (!err && bio) {
691 struct bio_entry *be = __add_bio_entry(sbi, bio); 785 bio->bi_end_io = f2fs_submit_discard_endio;
692
693 bio->bi_private = be;
694 bio->bi_end_io = f2fs_submit_bio_wait_endio;
695 bio->bi_opf |= REQ_SYNC; 786 bio->bi_opf |= REQ_SYNC;
696 submit_bio(bio);
697 }
698 787
788 __add_discard_cmd(sbi, bio, lblkstart, blklen);
789 wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
790 }
699 return err; 791 return err;
700} 792}
701 793
@@ -703,24 +795,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
703static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 795static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
704 struct block_device *bdev, block_t blkstart, block_t blklen) 796 struct block_device *bdev, block_t blkstart, block_t blklen)
705{ 797{
706 sector_t nr_sects = SECTOR_FROM_BLOCK(blklen); 798 sector_t sector, nr_sects;
707 sector_t sector;
708 int devi = 0; 799 int devi = 0;
709 800
710 if (sbi->s_ndevs) { 801 if (sbi->s_ndevs) {
711 devi = f2fs_target_device_index(sbi, blkstart); 802 devi = f2fs_target_device_index(sbi, blkstart);
712 blkstart -= FDEV(devi).start_blk; 803 blkstart -= FDEV(devi).start_blk;
713 } 804 }
714 sector = SECTOR_FROM_BLOCK(blkstart);
715
716 if (sector & (bdev_zone_sectors(bdev) - 1) ||
717 nr_sects != bdev_zone_sectors(bdev)) {
718 f2fs_msg(sbi->sb, KERN_INFO,
719 "(%d) %s: Unaligned discard attempted (block %x + %x)",
720 devi, sbi->s_ndevs ? FDEV(devi).path: "",
721 blkstart, blklen);
722 return -EIO;
723 }
724 805
725 /* 806 /*
726 * We need to know the type of the zone: for conventional zones, 807 * We need to know the type of the zone: for conventional zones,
@@ -735,7 +816,18 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
735 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); 816 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
736 case BLK_ZONE_TYPE_SEQWRITE_REQ: 817 case BLK_ZONE_TYPE_SEQWRITE_REQ:
737 case BLK_ZONE_TYPE_SEQWRITE_PREF: 818 case BLK_ZONE_TYPE_SEQWRITE_PREF:
738 trace_f2fs_issue_reset_zone(sbi->sb, blkstart); 819 sector = SECTOR_FROM_BLOCK(blkstart);
820 nr_sects = SECTOR_FROM_BLOCK(blklen);
821
822 if (sector & (bdev_zone_sectors(bdev) - 1) ||
823 nr_sects != bdev_zone_sectors(bdev)) {
824 f2fs_msg(sbi->sb, KERN_INFO,
825 "(%d) %s: Unaligned discard attempted (block %x + %x)",
826 devi, sbi->s_ndevs ? FDEV(devi).path: "",
827 blkstart, blklen);
828 return -EIO;
829 }
830 trace_f2fs_issue_reset_zone(bdev, blkstart);
739 return blkdev_reset_zones(bdev, sector, 831 return blkdev_reset_zones(bdev, sector,
740 nr_sects, GFP_NOFS); 832 nr_sects, GFP_NOFS);
741 default: 833 default:
@@ -800,13 +892,14 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
800 struct cp_control *cpc, struct seg_entry *se, 892 struct cp_control *cpc, struct seg_entry *se,
801 unsigned int start, unsigned int end) 893 unsigned int start, unsigned int end)
802{ 894{
803 struct list_head *head = &SM_I(sbi)->discard_list; 895 struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
804 struct discard_entry *new, *last; 896 struct discard_entry *new, *last;
805 897
806 if (!list_empty(head)) { 898 if (!list_empty(head)) {
807 last = list_last_entry(head, struct discard_entry, list); 899 last = list_last_entry(head, struct discard_entry, list);
808 if (START_BLOCK(sbi, cpc->trim_start) + start == 900 if (START_BLOCK(sbi, cpc->trim_start) + start ==
809 last->blkaddr + last->len) { 901 last->blkaddr + last->len &&
902 last->len < MAX_DISCARD_BLOCKS(sbi)) {
810 last->len += end - start; 903 last->len += end - start;
811 goto done; 904 goto done;
812 } 905 }
@@ -818,10 +911,11 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
818 new->len = end - start; 911 new->len = end - start;
819 list_add_tail(&new->list, head); 912 list_add_tail(&new->list, head);
820done: 913done:
821 SM_I(sbi)->nr_discards += end - start; 914 SM_I(sbi)->dcc_info->nr_discards += end - start;
822} 915}
823 916
824static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) 917static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
918 bool check_only)
825{ 919{
826 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 920 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
827 int max_blocks = sbi->blocks_per_seg; 921 int max_blocks = sbi->blocks_per_seg;
@@ -835,12 +929,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
835 int i; 929 int i;
836 930
837 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) 931 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
838 return; 932 return false;
839 933
840 if (!force) { 934 if (!force) {
841 if (!test_opt(sbi, DISCARD) || !se->valid_blocks || 935 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
842 SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) 936 SM_I(sbi)->dcc_info->nr_discards >=
843 return; 937 SM_I(sbi)->dcc_info->max_discards)
938 return false;
844 } 939 }
845 940
846 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 941 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -848,7 +943,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
848 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 943 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
849 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 944 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
850 945
851 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 946 while (force || SM_I(sbi)->dcc_info->nr_discards <=
947 SM_I(sbi)->dcc_info->max_discards) {
852 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 948 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
853 if (start >= max_blocks) 949 if (start >= max_blocks)
854 break; 950 break;
@@ -858,13 +954,17 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
858 && (end - start) < cpc->trim_minlen) 954 && (end - start) < cpc->trim_minlen)
859 continue; 955 continue;
860 956
957 if (check_only)
958 return true;
959
861 __add_discard_entry(sbi, cpc, se, start, end); 960 __add_discard_entry(sbi, cpc, se, start, end);
862 } 961 }
962 return false;
863} 963}
864 964
865void release_discard_addrs(struct f2fs_sb_info *sbi) 965void release_discard_addrs(struct f2fs_sb_info *sbi)
866{ 966{
867 struct list_head *head = &(SM_I(sbi)->discard_list); 967 struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
868 struct discard_entry *entry, *this; 968 struct discard_entry *entry, *this;
869 969
870 /* drop caches */ 970 /* drop caches */
@@ -890,17 +990,14 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
890 990
891void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) 991void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
892{ 992{
893 struct list_head *head = &(SM_I(sbi)->discard_list); 993 struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
894 struct discard_entry *entry, *this; 994 struct discard_entry *entry, *this;
895 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 995 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
896 struct blk_plug plug;
897 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 996 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
898 unsigned int start = 0, end = -1; 997 unsigned int start = 0, end = -1;
899 unsigned int secno, start_segno; 998 unsigned int secno, start_segno;
900 bool force = (cpc->reason == CP_DISCARD); 999 bool force = (cpc->reason == CP_DISCARD);
901 1000
902 blk_start_plug(&plug);
903
904 mutex_lock(&dirty_i->seglist_lock); 1001 mutex_lock(&dirty_i->seglist_lock);
905 1002
906 while (1) { 1003 while (1) {
@@ -916,9 +1013,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
916 1013
917 dirty_i->nr_dirty[PRE] -= end - start; 1014 dirty_i->nr_dirty[PRE] -= end - start;
918 1015
919 if (force || !test_opt(sbi, DISCARD)) 1016 if (!test_opt(sbi, DISCARD))
920 continue; 1017 continue;
921 1018
1019 if (force && start >= cpc->trim_start &&
1020 (end - 1) <= cpc->trim_end)
1021 continue;
1022
922 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { 1023 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
923 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 1024 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
924 (end - start) << sbi->log_blocks_per_seg); 1025 (end - start) << sbi->log_blocks_per_seg);
@@ -935,6 +1036,8 @@ next:
935 start = start_segno + sbi->segs_per_sec; 1036 start = start_segno + sbi->segs_per_sec;
936 if (start < end) 1037 if (start < end)
937 goto next; 1038 goto next;
1039 else
1040 end = start - 1;
938 } 1041 }
939 mutex_unlock(&dirty_i->seglist_lock); 1042 mutex_unlock(&dirty_i->seglist_lock);
940 1043
@@ -946,11 +1049,62 @@ next:
946 cpc->trimmed += entry->len; 1049 cpc->trimmed += entry->len;
947skip: 1050skip:
948 list_del(&entry->list); 1051 list_del(&entry->list);
949 SM_I(sbi)->nr_discards -= entry->len; 1052 SM_I(sbi)->dcc_info->nr_discards -= entry->len;
950 kmem_cache_free(discard_entry_slab, entry); 1053 kmem_cache_free(discard_entry_slab, entry);
951 } 1054 }
1055}
952 1056
953 blk_finish_plug(&plug); 1057static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1058{
1059 dev_t dev = sbi->sb->s_bdev->bd_dev;
1060 struct discard_cmd_control *dcc;
1061 int err = 0;
1062
1063 if (SM_I(sbi)->dcc_info) {
1064 dcc = SM_I(sbi)->dcc_info;
1065 goto init_thread;
1066 }
1067
1068 dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
1069 if (!dcc)
1070 return -ENOMEM;
1071
1072 INIT_LIST_HEAD(&dcc->discard_entry_list);
1073 INIT_LIST_HEAD(&dcc->discard_cmd_list);
1074 mutex_init(&dcc->cmd_lock);
1075 atomic_set(&dcc->submit_discard, 0);
1076 dcc->nr_discards = 0;
1077 dcc->max_discards = 0;
1078
1079 init_waitqueue_head(&dcc->discard_wait_queue);
1080 SM_I(sbi)->dcc_info = dcc;
1081init_thread:
1082 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
1083 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
1084 if (IS_ERR(dcc->f2fs_issue_discard)) {
1085 err = PTR_ERR(dcc->f2fs_issue_discard);
1086 kfree(dcc);
1087 SM_I(sbi)->dcc_info = NULL;
1088 return err;
1089 }
1090
1091 return err;
1092}
1093
1094static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
1095{
1096 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1097
1098 if (dcc && dcc->f2fs_issue_discard) {
1099 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1100
1101 dcc->f2fs_issue_discard = NULL;
1102 kthread_stop(discard_thread);
1103 }
1104 if (free) {
1105 kfree(dcc);
1106 SM_I(sbi)->dcc_info = NULL;
1107 }
954} 1108}
955 1109
956static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 1110static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -995,14 +1149,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
995 1149
996 /* Update valid block bitmap */ 1150 /* Update valid block bitmap */
997 if (del > 0) { 1151 if (del > 0) {
998 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) 1152 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
1153#ifdef CONFIG_F2FS_CHECK_FS
1154 if (f2fs_test_and_set_bit(offset,
1155 se->cur_valid_map_mir))
1156 f2fs_bug_on(sbi, 1);
1157 else
1158 WARN_ON(1);
1159#else
999 f2fs_bug_on(sbi, 1); 1160 f2fs_bug_on(sbi, 1);
1161#endif
1162 }
1000 if (f2fs_discard_en(sbi) && 1163 if (f2fs_discard_en(sbi) &&
1001 !f2fs_test_and_set_bit(offset, se->discard_map)) 1164 !f2fs_test_and_set_bit(offset, se->discard_map))
1002 sbi->discard_blks--; 1165 sbi->discard_blks--;
1003 } else { 1166 } else {
1004 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) 1167 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
1168#ifdef CONFIG_F2FS_CHECK_FS
1169 if (!f2fs_test_and_clear_bit(offset,
1170 se->cur_valid_map_mir))
1171 f2fs_bug_on(sbi, 1);
1172 else
1173 WARN_ON(1);
1174#else
1005 f2fs_bug_on(sbi, 1); 1175 f2fs_bug_on(sbi, 1);
1176#endif
1177 }
1006 if (f2fs_discard_en(sbi) && 1178 if (f2fs_discard_en(sbi) &&
1007 f2fs_test_and_clear_bit(offset, se->discard_map)) 1179 f2fs_test_and_clear_bit(offset, se->discard_map))
1008 sbi->discard_blks++; 1180 sbi->discard_blks++;
@@ -1167,17 +1339,6 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
1167 f2fs_put_page(page, 1); 1339 f2fs_put_page(page, 1);
1168} 1340}
1169 1341
1170static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
1171{
1172 struct curseg_info *curseg = CURSEG_I(sbi, type);
1173 unsigned int segno = curseg->segno + 1;
1174 struct free_segmap_info *free_i = FREE_I(sbi);
1175
1176 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
1177 return !test_bit(segno, free_i->free_segmap);
1178 return 0;
1179}
1180
1181/* 1342/*
1182 * Find a new segment from the free segments bitmap to right order 1343 * Find a new segment from the free segments bitmap to right order
1183 * This function should be returned with success, otherwise BUG 1344 * This function should be returned with success, otherwise BUG
@@ -1382,16 +1543,39 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1382{ 1543{
1383 struct curseg_info *curseg = CURSEG_I(sbi, type); 1544 struct curseg_info *curseg = CURSEG_I(sbi, type);
1384 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 1545 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1546 int i, cnt;
1547 bool reversed = false;
1548
1549 /* need_SSR() already forces to do this */
1550 if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
1551 return 1;
1385 1552
1386 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) 1553 /* For node segments, let's do SSR more intensively */
1387 return v_ops->get_victim(sbi, 1554 if (IS_NODESEG(type)) {
1388 &(curseg)->next_segno, BG_GC, type, SSR); 1555 if (type >= CURSEG_WARM_NODE) {
1556 reversed = true;
1557 i = CURSEG_COLD_NODE;
1558 } else {
1559 i = CURSEG_HOT_NODE;
1560 }
1561 cnt = NR_CURSEG_NODE_TYPE;
1562 } else {
1563 if (type >= CURSEG_WARM_DATA) {
1564 reversed = true;
1565 i = CURSEG_COLD_DATA;
1566 } else {
1567 i = CURSEG_HOT_DATA;
1568 }
1569 cnt = NR_CURSEG_DATA_TYPE;
1570 }
1389 1571
1390 /* For data segments, let's do SSR more intensively */ 1572 for (; cnt-- > 0; reversed ? i-- : i++) {
1391 for (; type >= CURSEG_HOT_DATA; type--) 1573 if (i == type)
1574 continue;
1392 if (v_ops->get_victim(sbi, &(curseg)->next_segno, 1575 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1393 BG_GC, type, SSR)) 1576 BG_GC, i, SSR))
1394 return 1; 1577 return 1;
1578 }
1395 return 0; 1579 return 0;
1396} 1580}
1397 1581
@@ -1402,20 +1586,17 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1402static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 1586static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1403 int type, bool force) 1587 int type, bool force)
1404{ 1588{
1405 struct curseg_info *curseg = CURSEG_I(sbi, type);
1406
1407 if (force) 1589 if (force)
1408 new_curseg(sbi, type, true); 1590 new_curseg(sbi, type, true);
1409 else if (type == CURSEG_WARM_NODE) 1591 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
1410 new_curseg(sbi, type, false); 1592 type == CURSEG_WARM_NODE)
1411 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1412 new_curseg(sbi, type, false); 1593 new_curseg(sbi, type, false);
1413 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 1594 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1414 change_curseg(sbi, type, true); 1595 change_curseg(sbi, type, true);
1415 else 1596 else
1416 new_curseg(sbi, type, false); 1597 new_curseg(sbi, type, false);
1417 1598
1418 stat_inc_seg_type(sbi, curseg); 1599 stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
1419} 1600}
1420 1601
1421void allocate_new_segments(struct f2fs_sb_info *sbi) 1602void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -1424,9 +1605,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
1424 unsigned int old_segno; 1605 unsigned int old_segno;
1425 int i; 1606 int i;
1426 1607
1427 if (test_opt(sbi, LFS))
1428 return;
1429
1430 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1608 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1431 curseg = CURSEG_I(sbi, i); 1609 curseg = CURSEG_I(sbi, i);
1432 old_segno = curseg->segno; 1610 old_segno = curseg->segno;
@@ -1439,6 +1617,24 @@ static const struct segment_allocation default_salloc_ops = {
1439 .allocate_segment = allocate_segment_by_default, 1617 .allocate_segment = allocate_segment_by_default,
1440}; 1618};
1441 1619
1620bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1621{
1622 __u64 trim_start = cpc->trim_start;
1623 bool has_candidate = false;
1624
1625 mutex_lock(&SIT_I(sbi)->sentry_lock);
1626 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
1627 if (add_discard_addrs(sbi, cpc, true)) {
1628 has_candidate = true;
1629 break;
1630 }
1631 }
1632 mutex_unlock(&SIT_I(sbi)->sentry_lock);
1633
1634 cpc->trim_start = trim_start;
1635 return has_candidate;
1636}
1637
1442int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 1638int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1443{ 1639{
1444 __u64 start = F2FS_BYTES_TO_BLK(range->start); 1640 __u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -1573,6 +1769,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1573 1769
1574 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1770 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1575 1771
1772 f2fs_wait_discard_bio(sbi, *new_blkaddr);
1773
1576 /* 1774 /*
1577 * __add_sum_entry should be resided under the curseg_mutex 1775 * __add_sum_entry should be resided under the curseg_mutex
1578 * because, this function updates a summary entry in the 1776 * because, this function updates a summary entry in the
@@ -1584,14 +1782,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1584 1782
1585 stat_inc_block_count(sbi, curseg); 1783 stat_inc_block_count(sbi, curseg);
1586 1784
1587 if (!__has_curseg_space(sbi, type))
1588 sit_i->s_ops->allocate_segment(sbi, type, false);
1589 /* 1785 /*
1590 * SIT information should be updated before segment allocation, 1786 * SIT information should be updated before segment allocation,
1591 * since SSR needs latest valid block information. 1787 * since SSR needs latest valid block information.
1592 */ 1788 */
1593 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 1789 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1594 1790
1791 if (!__has_curseg_space(sbi, type))
1792 sit_i->s_ops->allocate_segment(sbi, type, false);
1793
1595 mutex_unlock(&sit_i->sentry_lock); 1794 mutex_unlock(&sit_i->sentry_lock);
1596 1795
1597 if (page && IS_NODESEG(type)) 1796 if (page && IS_NODESEG(type))
@@ -1603,15 +1802,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1603static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 1802static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1604{ 1803{
1605 int type = __get_segment_type(fio->page, fio->type); 1804 int type = __get_segment_type(fio->page, fio->type);
1805 int err;
1606 1806
1607 if (fio->type == NODE || fio->type == DATA) 1807 if (fio->type == NODE || fio->type == DATA)
1608 mutex_lock(&fio->sbi->wio_mutex[fio->type]); 1808 mutex_lock(&fio->sbi->wio_mutex[fio->type]);
1609 1809reallocate:
1610 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 1810 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
1611 &fio->new_blkaddr, sum, type); 1811 &fio->new_blkaddr, sum, type);
1612 1812
1613 /* writeout dirty page into bdev */ 1813 /* writeout dirty page into bdev */
1614 f2fs_submit_page_mbio(fio); 1814 err = f2fs_submit_page_mbio(fio);
1815 if (err == -EAGAIN) {
1816 fio->old_blkaddr = fio->new_blkaddr;
1817 goto reallocate;
1818 }
1615 1819
1616 if (fio->type == NODE || fio->type == DATA) 1820 if (fio->type == NODE || fio->type == DATA)
1617 mutex_unlock(&fio->sbi->wio_mutex[fio->type]); 1821 mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
@@ -1753,7 +1957,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
1753 if (PageWriteback(page)) { 1957 if (PageWriteback(page)) {
1754 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1958 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1755 1959
1756 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); 1960 f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
1961 0, page->index, type, WRITE);
1757 if (ordered) 1962 if (ordered)
1758 wait_on_page_writeback(page); 1963 wait_on_page_writeback(page);
1759 else 1964 else
@@ -2228,7 +2433,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2228 /* add discard candidates */ 2433 /* add discard candidates */
2229 if (cpc->reason != CP_DISCARD) { 2434 if (cpc->reason != CP_DISCARD) {
2230 cpc->trim_start = segno; 2435 cpc->trim_start = segno;
2231 add_discard_addrs(sbi, cpc); 2436 add_discard_addrs(sbi, cpc, false);
2232 } 2437 }
2233 2438
2234 if (to_journal) { 2439 if (to_journal) {
@@ -2263,8 +2468,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2263 f2fs_bug_on(sbi, sit_i->dirty_sentries); 2468 f2fs_bug_on(sbi, sit_i->dirty_sentries);
2264out: 2469out:
2265 if (cpc->reason == CP_DISCARD) { 2470 if (cpc->reason == CP_DISCARD) {
2471 __u64 trim_start = cpc->trim_start;
2472
2266 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 2473 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
2267 add_discard_addrs(sbi, cpc); 2474 add_discard_addrs(sbi, cpc, false);
2475
2476 cpc->trim_start = trim_start;
2268 } 2477 }
2269 mutex_unlock(&sit_i->sentry_lock); 2478 mutex_unlock(&sit_i->sentry_lock);
2270 2479
@@ -2276,7 +2485,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2276 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 2485 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2277 struct sit_info *sit_i; 2486 struct sit_info *sit_i;
2278 unsigned int sit_segs, start; 2487 unsigned int sit_segs, start;
2279 char *src_bitmap, *dst_bitmap; 2488 char *src_bitmap;
2280 unsigned int bitmap_size; 2489 unsigned int bitmap_size;
2281 2490
2282 /* allocate memory for SIT information */ 2491 /* allocate memory for SIT information */
@@ -2305,6 +2514,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2305 !sit_i->sentries[start].ckpt_valid_map) 2514 !sit_i->sentries[start].ckpt_valid_map)
2306 return -ENOMEM; 2515 return -ENOMEM;
2307 2516
2517#ifdef CONFIG_F2FS_CHECK_FS
2518 sit_i->sentries[start].cur_valid_map_mir
2519 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2520 if (!sit_i->sentries[start].cur_valid_map_mir)
2521 return -ENOMEM;
2522#endif
2523
2308 if (f2fs_discard_en(sbi)) { 2524 if (f2fs_discard_en(sbi)) {
2309 sit_i->sentries[start].discard_map 2525 sit_i->sentries[start].discard_map
2310 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2526 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2331,17 +2547,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2331 bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 2547 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2332 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 2548 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2333 2549
2334 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); 2550 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2335 if (!dst_bitmap) 2551 if (!sit_i->sit_bitmap)
2336 return -ENOMEM; 2552 return -ENOMEM;
2337 2553
2554#ifdef CONFIG_F2FS_CHECK_FS
2555 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2556 if (!sit_i->sit_bitmap_mir)
2557 return -ENOMEM;
2558#endif
2559
2338 /* init SIT information */ 2560 /* init SIT information */
2339 sit_i->s_ops = &default_salloc_ops; 2561 sit_i->s_ops = &default_salloc_ops;
2340 2562
2341 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 2563 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2342 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 2564 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2343 sit_i->written_valid_blocks = 0; 2565 sit_i->written_valid_blocks = 0;
2344 sit_i->sit_bitmap = dst_bitmap;
2345 sit_i->bitmap_size = bitmap_size; 2566 sit_i->bitmap_size = bitmap_size;
2346 sit_i->dirty_sentries = 0; 2567 sit_i->dirty_sentries = 0;
2347 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 2568 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
@@ -2626,11 +2847,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
2626 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 2847 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2627 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 2848 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2628 2849
2629 INIT_LIST_HEAD(&sm_info->discard_list);
2630 INIT_LIST_HEAD(&sm_info->wait_list);
2631 sm_info->nr_discards = 0;
2632 sm_info->max_discards = 0;
2633
2634 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 2850 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2635 2851
2636 INIT_LIST_HEAD(&sm_info->sit_entry_set); 2852 INIT_LIST_HEAD(&sm_info->sit_entry_set);
@@ -2641,6 +2857,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
2641 return err; 2857 return err;
2642 } 2858 }
2643 2859
2860 err = create_discard_cmd_control(sbi);
2861 if (err)
2862 return err;
2863
2644 err = build_sit_info(sbi); 2864 err = build_sit_info(sbi);
2645 if (err) 2865 if (err)
2646 return err; 2866 return err;
@@ -2734,6 +2954,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
2734 if (sit_i->sentries) { 2954 if (sit_i->sentries) {
2735 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2955 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2736 kfree(sit_i->sentries[start].cur_valid_map); 2956 kfree(sit_i->sentries[start].cur_valid_map);
2957#ifdef CONFIG_F2FS_CHECK_FS
2958 kfree(sit_i->sentries[start].cur_valid_map_mir);
2959#endif
2737 kfree(sit_i->sentries[start].ckpt_valid_map); 2960 kfree(sit_i->sentries[start].ckpt_valid_map);
2738 kfree(sit_i->sentries[start].discard_map); 2961 kfree(sit_i->sentries[start].discard_map);
2739 } 2962 }
@@ -2746,6 +2969,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
2746 2969
2747 SM_I(sbi)->sit_info = NULL; 2970 SM_I(sbi)->sit_info = NULL;
2748 kfree(sit_i->sit_bitmap); 2971 kfree(sit_i->sit_bitmap);
2972#ifdef CONFIG_F2FS_CHECK_FS
2973 kfree(sit_i->sit_bitmap_mir);
2974#endif
2749 kfree(sit_i); 2975 kfree(sit_i);
2750} 2976}
2751 2977
@@ -2756,6 +2982,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
2756 if (!sm_info) 2982 if (!sm_info)
2757 return; 2983 return;
2758 destroy_flush_cmd_control(sbi, true); 2984 destroy_flush_cmd_control(sbi, true);
2985 destroy_discard_cmd_control(sbi, true);
2759 destroy_dirty_segmap(sbi); 2986 destroy_dirty_segmap(sbi);
2760 destroy_curseg(sbi); 2987 destroy_curseg(sbi);
2761 destroy_free_segmap(sbi); 2988 destroy_free_segmap(sbi);
@@ -2771,15 +2998,15 @@ int __init create_segment_manager_caches(void)
2771 if (!discard_entry_slab) 2998 if (!discard_entry_slab)
2772 goto fail; 2999 goto fail;
2773 3000
2774 bio_entry_slab = f2fs_kmem_cache_create("bio_entry", 3001 discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
2775 sizeof(struct bio_entry)); 3002 sizeof(struct discard_cmd));
2776 if (!bio_entry_slab) 3003 if (!discard_cmd_slab)
2777 goto destroy_discard_entry; 3004 goto destroy_discard_entry;
2778 3005
2779 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", 3006 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2780 sizeof(struct sit_entry_set)); 3007 sizeof(struct sit_entry_set));
2781 if (!sit_entry_set_slab) 3008 if (!sit_entry_set_slab)
2782 goto destroy_bio_entry; 3009 goto destroy_discard_cmd;
2783 3010
2784 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", 3011 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2785 sizeof(struct inmem_pages)); 3012 sizeof(struct inmem_pages));
@@ -2789,8 +3016,8 @@ int __init create_segment_manager_caches(void)
2789 3016
2790destroy_sit_entry_set: 3017destroy_sit_entry_set:
2791 kmem_cache_destroy(sit_entry_set_slab); 3018 kmem_cache_destroy(sit_entry_set_slab);
2792destroy_bio_entry: 3019destroy_discard_cmd:
2793 kmem_cache_destroy(bio_entry_slab); 3020 kmem_cache_destroy(discard_cmd_slab);
2794destroy_discard_entry: 3021destroy_discard_entry:
2795 kmem_cache_destroy(discard_entry_slab); 3022 kmem_cache_destroy(discard_entry_slab);
2796fail: 3023fail:
@@ -2800,7 +3027,7 @@ fail:
2800void destroy_segment_manager_caches(void) 3027void destroy_segment_manager_caches(void)
2801{ 3028{
2802 kmem_cache_destroy(sit_entry_set_slab); 3029 kmem_cache_destroy(sit_entry_set_slab);
2803 kmem_cache_destroy(bio_entry_slab); 3030 kmem_cache_destroy(discard_cmd_slab);
2804 kmem_cache_destroy(discard_entry_slab); 3031 kmem_cache_destroy(discard_entry_slab);
2805 kmem_cache_destroy(inmem_entry_slab); 3032 kmem_cache_destroy(inmem_entry_slab);
2806} 3033}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce83acb2..5e8ad4280a50 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
164 unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ 164 unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
165 unsigned int padding:6; /* padding */ 165 unsigned int padding:6; /* padding */
166 unsigned char *cur_valid_map; /* validity bitmap of blocks */ 166 unsigned char *cur_valid_map; /* validity bitmap of blocks */
167#ifdef CONFIG_F2FS_CHECK_FS
168 unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
169#endif
167 /* 170 /*
168 * # of valid blocks and the validity bitmap stored in the the last 171 * # of valid blocks and the validity bitmap stored in the the last
169 * checkpoint pack. This information is used by the SSR mode. 172 * checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +189,12 @@ struct segment_allocation {
186 * the page is atomically written, and it is in inmem_pages list. 189 * the page is atomically written, and it is in inmem_pages list.
187 */ 190 */
188#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) 191#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
192#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
189 193
190#define IS_ATOMIC_WRITTEN_PAGE(page) \ 194#define IS_ATOMIC_WRITTEN_PAGE(page) \
191 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) 195 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
196#define IS_DUMMY_WRITTEN_PAGE(page) \
197 (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
192 198
193struct inmem_pages { 199struct inmem_pages {
194 struct list_head list; 200 struct list_head list;
@@ -203,6 +209,9 @@ struct sit_info {
203 block_t sit_blocks; /* # of blocks used by SIT area */ 209 block_t sit_blocks; /* # of blocks used by SIT area */
204 block_t written_valid_blocks; /* # of valid blocks in main area */ 210 block_t written_valid_blocks; /* # of valid blocks in main area */
205 char *sit_bitmap; /* SIT bitmap pointer */ 211 char *sit_bitmap; /* SIT bitmap pointer */
212#ifdef CONFIG_F2FS_CHECK_FS
213 char *sit_bitmap_mir; /* SIT bitmap mirror */
214#endif
206 unsigned int bitmap_size; /* SIT bitmap size */ 215 unsigned int bitmap_size; /* SIT bitmap size */
207 216
208 unsigned long *tmp_map; /* bitmap for temporal use */ 217 unsigned long *tmp_map; /* bitmap for temporal use */
@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
317 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); 326 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
318 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); 327 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
319 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); 328 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
329#ifdef CONFIG_F2FS_CHECK_FS
330 memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
331#endif
320 se->type = GET_SIT_TYPE(rs); 332 se->type = GET_SIT_TYPE(rs);
321 se->mtime = le64_to_cpu(rs->mtime); 333 se->mtime = le64_to_cpu(rs->mtime);
322} 334}
@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
414 void *dst_addr) 426 void *dst_addr)
415{ 427{
416 struct sit_info *sit_i = SIT_I(sbi); 428 struct sit_info *sit_i = SIT_I(sbi);
429
430#ifdef CONFIG_F2FS_CHECK_FS
431 if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
432 sit_i->bitmap_size))
433 f2fs_bug_on(sbi, 1);
434#endif
417 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); 435 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
418} 436}
419 437
@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
634 652
635 check_seg_range(sbi, start); 653 check_seg_range(sbi, start);
636 654
655#ifdef CONFIG_F2FS_CHECK_FS
656 if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
657 f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
658 f2fs_bug_on(sbi, 1);
659#endif
660
637 /* calculate sit block address */ 661 /* calculate sit block address */
638 if (f2fs_test_bit(offset, sit_i->sit_bitmap)) 662 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
639 blk_addr += sit_i->sit_blocks; 663 blk_addr += sit_i->sit_blocks;
@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
659 unsigned int block_off = SIT_BLOCK_OFFSET(start); 683 unsigned int block_off = SIT_BLOCK_OFFSET(start);
660 684
661 f2fs_change_bit(block_off, sit_i->sit_bitmap); 685 f2fs_change_bit(block_off, sit_i->sit_bitmap);
686#ifdef CONFIG_F2FS_CHECK_FS
687 f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
688#endif
662} 689}
663 690
664static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) 691static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -689,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
689 - (base + 1) + type; 716 - (base + 1) + type;
690} 717}
691 718
719static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
720 unsigned int secno)
721{
722 if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
723 sbi->fggc_threshold)
724 return true;
725 return false;
726}
727
692static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) 728static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
693{ 729{
694 if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) 730 if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -700,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
700 * It is very important to gather dirty pages and write at once, so that we can 736 * It is very important to gather dirty pages and write at once, so that we can
701 * submit a big bio without interfering other data writes. 737 * submit a big bio without interfering other data writes.
702 * By default, 512 pages for directory data, 738 * By default, 512 pages for directory data,
703 * 512 pages (2MB) * 3 for three types of nodes, and 739 * 512 pages (2MB) * 8 for nodes, and
704 * max_bio_blocks for meta are set. 740 * 256 pages * 8 for meta are set.
705 */ 741 */
706static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) 742static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
707{ 743{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a831303bb777..96fe8ed73100 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,6 +89,7 @@ enum {
89 Opt_active_logs, 89 Opt_active_logs,
90 Opt_disable_ext_identify, 90 Opt_disable_ext_identify,
91 Opt_inline_xattr, 91 Opt_inline_xattr,
92 Opt_noinline_xattr,
92 Opt_inline_data, 93 Opt_inline_data,
93 Opt_inline_dentry, 94 Opt_inline_dentry,
94 Opt_noinline_dentry, 95 Opt_noinline_dentry,
@@ -101,6 +102,7 @@ enum {
101 Opt_noinline_data, 102 Opt_noinline_data,
102 Opt_data_flush, 103 Opt_data_flush,
103 Opt_mode, 104 Opt_mode,
105 Opt_io_size_bits,
104 Opt_fault_injection, 106 Opt_fault_injection,
105 Opt_lazytime, 107 Opt_lazytime,
106 Opt_nolazytime, 108 Opt_nolazytime,
@@ -121,6 +123,7 @@ static match_table_t f2fs_tokens = {
121 {Opt_active_logs, "active_logs=%u"}, 123 {Opt_active_logs, "active_logs=%u"},
122 {Opt_disable_ext_identify, "disable_ext_identify"}, 124 {Opt_disable_ext_identify, "disable_ext_identify"},
123 {Opt_inline_xattr, "inline_xattr"}, 125 {Opt_inline_xattr, "inline_xattr"},
126 {Opt_noinline_xattr, "noinline_xattr"},
124 {Opt_inline_data, "inline_data"}, 127 {Opt_inline_data, "inline_data"},
125 {Opt_inline_dentry, "inline_dentry"}, 128 {Opt_inline_dentry, "inline_dentry"},
126 {Opt_noinline_dentry, "noinline_dentry"}, 129 {Opt_noinline_dentry, "noinline_dentry"},
@@ -133,6 +136,7 @@ static match_table_t f2fs_tokens = {
133 {Opt_noinline_data, "noinline_data"}, 136 {Opt_noinline_data, "noinline_data"},
134 {Opt_data_flush, "data_flush"}, 137 {Opt_data_flush, "data_flush"},
135 {Opt_mode, "mode=%s"}, 138 {Opt_mode, "mode=%s"},
139 {Opt_io_size_bits, "io_bits=%u"},
136 {Opt_fault_injection, "fault_injection=%u"}, 140 {Opt_fault_injection, "fault_injection=%u"},
137 {Opt_lazytime, "lazytime"}, 141 {Opt_lazytime, "lazytime"},
138 {Opt_nolazytime, "nolazytime"}, 142 {Opt_nolazytime, "nolazytime"},
@@ -143,6 +147,7 @@ static match_table_t f2fs_tokens = {
143enum { 147enum {
144 GC_THREAD, /* struct f2fs_gc_thread */ 148 GC_THREAD, /* struct f2fs_gc_thread */
145 SM_INFO, /* struct f2fs_sm_info */ 149 SM_INFO, /* struct f2fs_sm_info */
150 DCC_INFO, /* struct discard_cmd_control */
146 NM_INFO, /* struct f2fs_nm_info */ 151 NM_INFO, /* struct f2fs_nm_info */
147 F2FS_SBI, /* struct f2fs_sb_info */ 152 F2FS_SBI, /* struct f2fs_sb_info */
148#ifdef CONFIG_F2FS_FAULT_INJECTION 153#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +171,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
166 return (unsigned char *)sbi->gc_thread; 171 return (unsigned char *)sbi->gc_thread;
167 else if (struct_type == SM_INFO) 172 else if (struct_type == SM_INFO)
168 return (unsigned char *)SM_I(sbi); 173 return (unsigned char *)SM_I(sbi);
174 else if (struct_type == DCC_INFO)
175 return (unsigned char *)SM_I(sbi)->dcc_info;
169 else if (struct_type == NM_INFO) 176 else if (struct_type == NM_INFO)
170 return (unsigned char *)NM_I(sbi); 177 return (unsigned char *)NM_I(sbi);
171 else if (struct_type == F2FS_SBI) 178 else if (struct_type == F2FS_SBI)
@@ -281,7 +288,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
281F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); 288F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
282F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); 289F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
283F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); 290F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
284F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 291F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
285F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); 292F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
286F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 293F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
287F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 294F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -439,6 +446,9 @@ static int parse_options(struct super_block *sb, char *options)
439 case Opt_inline_xattr: 446 case Opt_inline_xattr:
440 set_opt(sbi, INLINE_XATTR); 447 set_opt(sbi, INLINE_XATTR);
441 break; 448 break;
449 case Opt_noinline_xattr:
450 clear_opt(sbi, INLINE_XATTR);
451 break;
442#else 452#else
443 case Opt_user_xattr: 453 case Opt_user_xattr:
444 f2fs_msg(sb, KERN_INFO, 454 f2fs_msg(sb, KERN_INFO,
@@ -452,6 +462,10 @@ static int parse_options(struct super_block *sb, char *options)
452 f2fs_msg(sb, KERN_INFO, 462 f2fs_msg(sb, KERN_INFO,
453 "inline_xattr options not supported"); 463 "inline_xattr options not supported");
454 break; 464 break;
465 case Opt_noinline_xattr:
466 f2fs_msg(sb, KERN_INFO,
467 "noinline_xattr options not supported");
468 break;
455#endif 469#endif
456#ifdef CONFIG_F2FS_FS_POSIX_ACL 470#ifdef CONFIG_F2FS_FS_POSIX_ACL
457 case Opt_acl: 471 case Opt_acl:
@@ -535,11 +549,23 @@ static int parse_options(struct super_block *sb, char *options)
535 } 549 }
536 kfree(name); 550 kfree(name);
537 break; 551 break;
552 case Opt_io_size_bits:
553 if (args->from && match_int(args, &arg))
554 return -EINVAL;
555 if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
556 f2fs_msg(sb, KERN_WARNING,
557 "Not support %d, larger than %d",
558 1 << arg, BIO_MAX_PAGES);
559 return -EINVAL;
560 }
561 sbi->write_io_size_bits = arg;
562 break;
538 case Opt_fault_injection: 563 case Opt_fault_injection:
539 if (args->from && match_int(args, &arg)) 564 if (args->from && match_int(args, &arg))
540 return -EINVAL; 565 return -EINVAL;
541#ifdef CONFIG_F2FS_FAULT_INJECTION 566#ifdef CONFIG_F2FS_FAULT_INJECTION
542 f2fs_build_fault_attr(sbi, arg); 567 f2fs_build_fault_attr(sbi, arg);
568 set_opt(sbi, FAULT_INJECTION);
543#else 569#else
544 f2fs_msg(sb, KERN_INFO, 570 f2fs_msg(sb, KERN_INFO,
545 "FAULT_INJECTION was not selected"); 571 "FAULT_INJECTION was not selected");
@@ -558,6 +584,13 @@ static int parse_options(struct super_block *sb, char *options)
558 return -EINVAL; 584 return -EINVAL;
559 } 585 }
560 } 586 }
587
588 if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
589 f2fs_msg(sb, KERN_ERR,
590 "Should set mode=lfs with %uKB-sized IO",
591 F2FS_IO_SIZE_KB(sbi));
592 return -EINVAL;
593 }
561 return 0; 594 return 0;
562} 595}
563 596
@@ -591,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
591 624
592static int f2fs_drop_inode(struct inode *inode) 625static int f2fs_drop_inode(struct inode *inode)
593{ 626{
627 int ret;
594 /* 628 /*
595 * This is to avoid a deadlock condition like below. 629 * This is to avoid a deadlock condition like below.
596 * writeback_single_inode(inode) 630 * writeback_single_inode(inode)
@@ -623,10 +657,12 @@ static int f2fs_drop_inode(struct inode *inode)
623 spin_lock(&inode->i_lock); 657 spin_lock(&inode->i_lock);
624 atomic_dec(&inode->i_count); 658 atomic_dec(&inode->i_count);
625 } 659 }
660 trace_f2fs_drop_inode(inode, 0);
626 return 0; 661 return 0;
627 } 662 }
628 663 ret = generic_drop_inode(inode);
629 return generic_drop_inode(inode); 664 trace_f2fs_drop_inode(inode, ret);
665 return ret;
630} 666}
631 667
632int f2fs_inode_dirtied(struct inode *inode, bool sync) 668int f2fs_inode_dirtied(struct inode *inode, bool sync)
@@ -750,6 +786,9 @@ static void f2fs_put_super(struct super_block *sb)
750 write_checkpoint(sbi, &cpc); 786 write_checkpoint(sbi, &cpc);
751 } 787 }
752 788
789 /* be sure to wait for any on-going discard commands */
790 f2fs_wait_discard_bio(sbi, NULL_ADDR);
791
753 /* write_checkpoint can update stat informaion */ 792 /* write_checkpoint can update stat informaion */
754 f2fs_destroy_stats(sbi); 793 f2fs_destroy_stats(sbi);
755 794
@@ -782,7 +821,7 @@ static void f2fs_put_super(struct super_block *sb)
782 kfree(sbi->raw_super); 821 kfree(sbi->raw_super);
783 822
784 destroy_device_list(sbi); 823 destroy_device_list(sbi);
785 824 mempool_destroy(sbi->write_io_dummy);
786 destroy_percpu_info(sbi); 825 destroy_percpu_info(sbi);
787 kfree(sbi); 826 kfree(sbi);
788} 827}
@@ -882,6 +921,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
882 seq_puts(seq, ",nouser_xattr"); 921 seq_puts(seq, ",nouser_xattr");
883 if (test_opt(sbi, INLINE_XATTR)) 922 if (test_opt(sbi, INLINE_XATTR))
884 seq_puts(seq, ",inline_xattr"); 923 seq_puts(seq, ",inline_xattr");
924 else
925 seq_puts(seq, ",noinline_xattr");
885#endif 926#endif
886#ifdef CONFIG_F2FS_FS_POSIX_ACL 927#ifdef CONFIG_F2FS_FS_POSIX_ACL
887 if (test_opt(sbi, POSIX_ACL)) 928 if (test_opt(sbi, POSIX_ACL))
@@ -918,6 +959,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
918 else if (test_opt(sbi, LFS)) 959 else if (test_opt(sbi, LFS))
919 seq_puts(seq, "lfs"); 960 seq_puts(seq, "lfs");
920 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 961 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
962 if (F2FS_IO_SIZE_BITS(sbi))
963 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
964#ifdef CONFIG_F2FS_FAULT_INJECTION
965 if (test_opt(sbi, FAULT_INJECTION))
966 seq_puts(seq, ",fault_injection");
967#endif
921 968
922 return 0; 969 return 0;
923} 970}
@@ -995,6 +1042,7 @@ static void default_options(struct f2fs_sb_info *sbi)
995 sbi->active_logs = NR_CURSEG_TYPE; 1042 sbi->active_logs = NR_CURSEG_TYPE;
996 1043
997 set_opt(sbi, BG_GC); 1044 set_opt(sbi, BG_GC);
1045 set_opt(sbi, INLINE_XATTR);
998 set_opt(sbi, INLINE_DATA); 1046 set_opt(sbi, INLINE_DATA);
999 set_opt(sbi, INLINE_DENTRY); 1047 set_opt(sbi, INLINE_DENTRY);
1000 set_opt(sbi, EXTENT_CACHE); 1048 set_opt(sbi, EXTENT_CACHE);
@@ -1686,36 +1734,55 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1686static int f2fs_scan_devices(struct f2fs_sb_info *sbi) 1734static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1687{ 1735{
1688 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1736 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1737 unsigned int max_devices = MAX_DEVICES;
1689 int i; 1738 int i;
1690 1739
1691 for (i = 0; i < MAX_DEVICES; i++) { 1740 /* Initialize single device information */
1692 if (!RDEV(i).path[0]) 1741 if (!RDEV(0).path[0]) {
1742 if (!bdev_is_zoned(sbi->sb->s_bdev))
1693 return 0; 1743 return 0;
1744 max_devices = 1;
1745 }
1694 1746
1695 if (i == 0) { 1747 /*
1696 sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) * 1748 * Initialize multiple devices information, or single
1697 MAX_DEVICES, GFP_KERNEL); 1749 * zoned block device information.
1698 if (!sbi->devs) 1750 */
1699 return -ENOMEM; 1751 sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1700 } 1752 GFP_KERNEL);
1753 if (!sbi->devs)
1754 return -ENOMEM;
1701 1755
1702 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); 1756 for (i = 0; i < max_devices; i++) {
1703 FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
1704 if (i == 0) {
1705 FDEV(i).start_blk = 0;
1706 FDEV(i).end_blk = FDEV(i).start_blk +
1707 (FDEV(i).total_segments <<
1708 sbi->log_blocks_per_seg) - 1 +
1709 le32_to_cpu(raw_super->segment0_blkaddr);
1710 } else {
1711 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1712 FDEV(i).end_blk = FDEV(i).start_blk +
1713 (FDEV(i).total_segments <<
1714 sbi->log_blocks_per_seg) - 1;
1715 }
1716 1757
1717 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, 1758 if (i > 0 && !RDEV(i).path[0])
1759 break;
1760
1761 if (max_devices == 1) {
1762 /* Single zoned block device mount */
1763 FDEV(0).bdev =
1764 blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1765 sbi->sb->s_mode, sbi->sb->s_type);
1766 } else {
1767 /* Multi-device mount */
1768 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1769 FDEV(i).total_segments =
1770 le32_to_cpu(RDEV(i).total_segments);
1771 if (i == 0) {
1772 FDEV(i).start_blk = 0;
1773 FDEV(i).end_blk = FDEV(i).start_blk +
1774 (FDEV(i).total_segments <<
1775 sbi->log_blocks_per_seg) - 1 +
1776 le32_to_cpu(raw_super->segment0_blkaddr);
1777 } else {
1778 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1779 FDEV(i).end_blk = FDEV(i).start_blk +
1780 (FDEV(i).total_segments <<
1781 sbi->log_blocks_per_seg) - 1;
1782 }
1783 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1718 sbi->sb->s_mode, sbi->sb->s_type); 1784 sbi->sb->s_mode, sbi->sb->s_type);
1785 }
1719 if (IS_ERR(FDEV(i).bdev)) 1786 if (IS_ERR(FDEV(i).bdev))
1720 return PTR_ERR(FDEV(i).bdev); 1787 return PTR_ERR(FDEV(i).bdev);
1721 1788
@@ -1735,6 +1802,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1735 "Failed to initialize F2FS blkzone information"); 1802 "Failed to initialize F2FS blkzone information");
1736 return -EINVAL; 1803 return -EINVAL;
1737 } 1804 }
1805 if (max_devices == 1)
1806 break;
1738 f2fs_msg(sbi->sb, KERN_INFO, 1807 f2fs_msg(sbi->sb, KERN_INFO,
1739 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", 1808 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1740 i, FDEV(i).path, 1809 i, FDEV(i).path,
@@ -1751,6 +1820,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1751 FDEV(i).total_segments, 1820 FDEV(i).total_segments,
1752 FDEV(i).start_blk, FDEV(i).end_blk); 1821 FDEV(i).start_blk, FDEV(i).end_blk);
1753 } 1822 }
1823 f2fs_msg(sbi->sb, KERN_INFO,
1824 "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1754 return 0; 1825 return 0;
1755} 1826}
1756 1827
@@ -1868,12 +1939,19 @@ try_onemore:
1868 if (err) 1939 if (err)
1869 goto free_options; 1940 goto free_options;
1870 1941
1942 if (F2FS_IO_SIZE(sbi) > 1) {
1943 sbi->write_io_dummy =
1944 mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
1945 if (!sbi->write_io_dummy)
1946 goto free_options;
1947 }
1948
1871 /* get an inode for meta space */ 1949 /* get an inode for meta space */
1872 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 1950 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1873 if (IS_ERR(sbi->meta_inode)) { 1951 if (IS_ERR(sbi->meta_inode)) {
1874 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); 1952 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1875 err = PTR_ERR(sbi->meta_inode); 1953 err = PTR_ERR(sbi->meta_inode);
1876 goto free_options; 1954 goto free_io_dummy;
1877 } 1955 }
1878 1956
1879 err = get_valid_checkpoint(sbi); 1957 err = get_valid_checkpoint(sbi);
@@ -2048,6 +2126,8 @@ skip_recovery:
2048 sbi->valid_super_block ? 1 : 2, err); 2126 sbi->valid_super_block ? 1 : 2, err);
2049 } 2127 }
2050 2128
2129 f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
2130 cur_cp_version(F2FS_CKPT(sbi)));
2051 f2fs_update_time(sbi, CP_TIME); 2131 f2fs_update_time(sbi, CP_TIME);
2052 f2fs_update_time(sbi, REQ_TIME); 2132 f2fs_update_time(sbi, REQ_TIME);
2053 return 0; 2133 return 0;
@@ -2091,6 +2171,8 @@ free_devices:
2091free_meta_inode: 2171free_meta_inode:
2092 make_bad_inode(sbi->meta_inode); 2172 make_bad_inode(sbi->meta_inode);
2093 iput(sbi->meta_inode); 2173 iput(sbi->meta_inode);
2174free_io_dummy:
2175 mempool_destroy(sbi->write_io_dummy);
2094free_options: 2176free_options:
2095 destroy_percpu_info(sbi); 2177 destroy_percpu_info(sbi);
2096 kfree(options); 2178 kfree(options);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c47ce2f330a1..7298a4488f7f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
217 return entry; 217 return entry;
218} 218}
219 219
220static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
221 void **last_addr, int index,
222 size_t len, const char *name)
223{
224 struct f2fs_xattr_entry *entry;
225 unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
226
227 list_for_each_xattr(entry, base_addr) {
228 if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
229 (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
230 base_addr + inline_size) {
231 *last_addr = entry;
232 return NULL;
233 }
234 if (entry->e_name_index != index)
235 continue;
236 if (entry->e_name_len != len)
237 continue;
238 if (!memcmp(entry->e_name, name, len))
239 break;
240 }
241 return entry;
242}
243
244static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
245 unsigned int index, unsigned int len,
246 const char *name, struct f2fs_xattr_entry **xe,
247 void **base_addr)
248{
249 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
250 void *cur_addr, *txattr_addr, *last_addr = NULL;
251 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
252 unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
253 unsigned int inline_size = 0;
254 int err = 0;
255
256 inline_size = inline_xattr_size(inode);
257
258 if (!size && !inline_size)
259 return -ENODATA;
260
261 txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
262 GFP_F2FS_ZERO);
263 if (!txattr_addr)
264 return -ENOMEM;
265
266 /* read from inline xattr */
267 if (inline_size) {
268 struct page *page = NULL;
269 void *inline_addr;
270
271 if (ipage) {
272 inline_addr = inline_xattr_addr(ipage);
273 } else {
274 page = get_node_page(sbi, inode->i_ino);
275 if (IS_ERR(page)) {
276 err = PTR_ERR(page);
277 goto out;
278 }
279 inline_addr = inline_xattr_addr(page);
280 }
281 memcpy(txattr_addr, inline_addr, inline_size);
282 f2fs_put_page(page, 1);
283
284 *xe = __find_inline_xattr(txattr_addr, &last_addr,
285 index, len, name);
286 if (*xe)
287 goto check;
288 }
289
290 /* read from xattr node block */
291 if (xnid) {
292 struct page *xpage;
293 void *xattr_addr;
294
295 /* The inode already has an extended attribute block. */
296 xpage = get_node_page(sbi, xnid);
297 if (IS_ERR(xpage)) {
298 err = PTR_ERR(xpage);
299 goto out;
300 }
301
302 xattr_addr = page_address(xpage);
303 memcpy(txattr_addr + inline_size, xattr_addr, size);
304 f2fs_put_page(xpage, 1);
305 }
306
307 if (last_addr)
308 cur_addr = XATTR_HDR(last_addr) - 1;
309 else
310 cur_addr = txattr_addr;
311
312 *xe = __find_xattr(cur_addr, index, len, name);
313check:
314 if (IS_XATTR_LAST_ENTRY(*xe)) {
315 err = -ENODATA;
316 goto out;
317 }
318
319 *base_addr = txattr_addr;
320 return 0;
321out:
322 kzfree(txattr_addr);
323 return err;
324}
325
220static int read_all_xattrs(struct inode *inode, struct page *ipage, 326static int read_all_xattrs(struct inode *inode, struct page *ipage,
221 void **base_addr) 327 void **base_addr)
222{ 328{
@@ -348,23 +454,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
348 } 454 }
349 455
350 xattr_addr = page_address(xpage); 456 xattr_addr = page_address(xpage);
351 memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - 457 memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
352 sizeof(struct node_footer));
353 set_page_dirty(xpage); 458 set_page_dirty(xpage);
354 f2fs_put_page(xpage, 1); 459 f2fs_put_page(xpage, 1);
355 460
356 /* need to checkpoint during fsync */
357 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
358 return 0; 461 return 0;
359} 462}
360 463
361int f2fs_getxattr(struct inode *inode, int index, const char *name, 464int f2fs_getxattr(struct inode *inode, int index, const char *name,
362 void *buffer, size_t buffer_size, struct page *ipage) 465 void *buffer, size_t buffer_size, struct page *ipage)
363{ 466{
364 struct f2fs_xattr_entry *entry; 467 struct f2fs_xattr_entry *entry = NULL;
365 void *base_addr;
366 int error = 0; 468 int error = 0;
367 size_t size, len; 469 unsigned int size, len;
470 void *base_addr = NULL;
368 471
369 if (name == NULL) 472 if (name == NULL)
370 return -EINVAL; 473 return -EINVAL;
@@ -373,21 +476,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
373 if (len > F2FS_NAME_LEN) 476 if (len > F2FS_NAME_LEN)
374 return -ERANGE; 477 return -ERANGE;
375 478
376 error = read_all_xattrs(inode, ipage, &base_addr); 479 error = lookup_all_xattrs(inode, ipage, index, len, name,
480 &entry, &base_addr);
377 if (error) 481 if (error)
378 return error; 482 return error;
379 483
380 entry = __find_xattr(base_addr, index, len, name);
381 if (IS_XATTR_LAST_ENTRY(entry)) {
382 error = -ENODATA;
383 goto cleanup;
384 }
385
386 size = le16_to_cpu(entry->e_value_size); 484 size = le16_to_cpu(entry->e_value_size);
387 485
388 if (buffer && size > buffer_size) { 486 if (buffer && size > buffer_size) {
389 error = -ERANGE; 487 error = -ERANGE;
390 goto cleanup; 488 goto out;
391 } 489 }
392 490
393 if (buffer) { 491 if (buffer) {
@@ -395,8 +493,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
395 memcpy(buffer, pval, size); 493 memcpy(buffer, pval, size);
396 } 494 }
397 error = size; 495 error = size;
398 496out:
399cleanup:
400 kzfree(base_addr); 497 kzfree(base_addr);
401 return error; 498 return error;
402} 499}
@@ -445,6 +542,13 @@ cleanup:
445 return error; 542 return error;
446} 543}
447 544
545static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
546 const void *value, size_t size)
547{
548 void *pval = entry->e_name + entry->e_name_len;
549 return (entry->e_value_size == size) && !memcmp(pval, value, size);
550}
551
448static int __f2fs_setxattr(struct inode *inode, int index, 552static int __f2fs_setxattr(struct inode *inode, int index,
449 const char *name, const void *value, size_t size, 553 const char *name, const void *value, size_t size,
450 struct page *ipage, int flags) 554 struct page *ipage, int flags)
@@ -479,12 +583,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
479 583
480 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; 584 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
481 585
482 if ((flags & XATTR_REPLACE) && !found) { 586 if (found) {
587 if ((flags & XATTR_CREATE)) {
588 error = -EEXIST;
589 goto exit;
590 }
591
592 if (f2fs_xattr_value_same(here, value, size))
593 goto exit;
594 } else if ((flags & XATTR_REPLACE)) {
483 error = -ENODATA; 595 error = -ENODATA;
484 goto exit; 596 goto exit;
485 } else if ((flags & XATTR_CREATE) && found) {
486 error = -EEXIST;
487 goto exit;
488 } 597 }
489 598
490 last = here; 599 last = here;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index f990de20cdcd..d5a94928c116 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
72 for (entry = XATTR_FIRST_ENTRY(addr);\ 72 for (entry = XATTR_FIRST_ENTRY(addr);\
73 !IS_XATTR_LAST_ENTRY(entry);\ 73 !IS_XATTR_LAST_ENTRY(entry);\
74 entry = XATTR_NEXT_ENTRY(entry)) 74 entry = XATTR_NEXT_ENTRY(entry))
75 75#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
76#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ 76#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
77 sizeof(struct node_footer) - sizeof(__u32)) 77#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
78 VALID_XATTR_BLOCK_SIZE)
78 79
79#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ 80#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
80 sizeof(struct f2fs_xattr_header) - \ 81 sizeof(struct f2fs_xattr_header) - \
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index cea41a124a80..e2d239ed4c60 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,12 @@
36#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) 36#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
37#define F2FS_META_INO(sbi) (sbi->meta_ino_num) 37#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
38 38
39#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
40#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
41#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
42#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */
43#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
44
39/* This flag is used by node and meta inodes, and by recovery */ 45/* This flag is used by node and meta inodes, and by recovery */
40#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) 46#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
41#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) 47#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
@@ -108,6 +114,7 @@ struct f2fs_super_block {
108/* 114/*
109 * For checkpoint 115 * For checkpoint
110 */ 116 */
117#define CP_NAT_BITS_FLAG 0x00000080
111#define CP_CRC_RECOVERY_FLAG 0x00000040 118#define CP_CRC_RECOVERY_FLAG 0x00000040
112#define CP_FASTBOOT_FLAG 0x00000020 119#define CP_FASTBOOT_FLAG 0x00000020
113#define CP_FSCK_FLAG 0x00000010 120#define CP_FSCK_FLAG 0x00000010
@@ -272,6 +279,7 @@ struct f2fs_node {
272 * For NAT entries 279 * For NAT entries
273 */ 280 */
274#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) 281#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
282#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
275 283
276struct f2fs_nat_entry { 284struct f2fs_nat_entry {
277 __u8 version; /* latest version of cached nat entry */ 285 __u8 version; /* latest version of cached nat entry */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 01b3c9869a0d..c80fcad0a6c9 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -6,8 +6,8 @@
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8 8
9#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev) 9#define show_dev(dev) MAJOR(dev), MINOR(dev)
10#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino 10#define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino
11 11
12TRACE_DEFINE_ENUM(NODE); 12TRACE_DEFINE_ENUM(NODE);
13TRACE_DEFINE_ENUM(DATA); 13TRACE_DEFINE_ENUM(DATA);
@@ -55,25 +55,35 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
55 { IPU, "IN-PLACE" }, \ 55 { IPU, "IN-PLACE" }, \
56 { OPU, "OUT-OF-PLACE" }) 56 { OPU, "OUT-OF-PLACE" })
57 57
58#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA)) 58#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
59#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) 59 REQ_PRIO)
60 60#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS)
61#define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ 61
62 show_bio_extra(op_flags) 62#define show_bio_type(op,op_flags) show_bio_op(op), \
63 show_bio_op_flags(op_flags)
64
65#define show_bio_op(op) \
66 __print_symbolic(op, \
67 { REQ_OP_READ, "READ" }, \
68 { REQ_OP_WRITE, "WRITE" }, \
69 { REQ_OP_FLUSH, "FLUSH" }, \
70 { REQ_OP_DISCARD, "DISCARD" }, \
71 { REQ_OP_ZONE_REPORT, "ZONE_REPORT" }, \
72 { REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \
73 { REQ_OP_ZONE_RESET, "ZONE_RESET" }, \
74 { REQ_OP_WRITE_SAME, "WRITE_SAME" }, \
75 { REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" })
63 76
64#define show_bio_op_flags(flags) \ 77#define show_bio_op_flags(flags) \
65 __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ 78 __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \
66 { 0, "WRITE" }, \ 79 { REQ_RAHEAD, "(RA)" }, \
67 { REQ_RAHEAD, "READAHEAD" }, \ 80 { REQ_SYNC, "(S)" }, \
68 { REQ_SYNC, "REQ_SYNC" }, \ 81 { REQ_SYNC | REQ_PRIO, "(SP)" }, \
69 { REQ_PREFLUSH, "REQ_PREFLUSH" }, \
70 { REQ_FUA, "REQ_FUA" })
71
72#define show_bio_extra(type) \
73 __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \
74 { REQ_META, "(M)" }, \ 82 { REQ_META, "(M)" }, \
75 { REQ_PRIO, "(P)" }, \
76 { REQ_META | REQ_PRIO, "(MP)" }, \ 83 { REQ_META | REQ_PRIO, "(MP)" }, \
84 { REQ_SYNC | REQ_PREFLUSH , "(SF)" }, \
85 { REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" }, \
86 { REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" }, \
77 { 0, " \b" }) 87 { 0, " \b" })
78 88
79#define show_data_type(type) \ 89#define show_data_type(type) \
@@ -235,7 +245,7 @@ TRACE_EVENT(f2fs_sync_fs,
235 ), 245 ),
236 246
237 TP_printk("dev = (%d,%d), superblock is %s, wait = %d", 247 TP_printk("dev = (%d,%d), superblock is %s, wait = %d",
238 show_dev(__entry), 248 show_dev(__entry->dev),
239 __entry->dirty ? "dirty" : "not dirty", 249 __entry->dirty ? "dirty" : "not dirty",
240 __entry->wait) 250 __entry->wait)
241); 251);
@@ -305,6 +315,13 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
305 TP_ARGS(inode, ret) 315 TP_ARGS(inode, ret)
306); 316);
307 317
318DEFINE_EVENT(f2fs__inode_exit, f2fs_drop_inode,
319
320 TP_PROTO(struct inode *inode, int ret),
321
322 TP_ARGS(inode, ret)
323);
324
308DEFINE_EVENT(f2fs__inode, f2fs_truncate, 325DEFINE_EVENT(f2fs__inode, f2fs_truncate,
309 326
310 TP_PROTO(struct inode *inode), 327 TP_PROTO(struct inode *inode),
@@ -534,7 +551,7 @@ TRACE_EVENT(f2fs_background_gc,
534 ), 551 ),
535 552
536 TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u", 553 TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
537 show_dev(__entry), 554 show_dev(__entry->dev),
538 __entry->wait_ms, 555 __entry->wait_ms,
539 __entry->prefree, 556 __entry->prefree,
540 __entry->free) 557 __entry->free)
@@ -555,6 +572,7 @@ TRACE_EVENT(f2fs_get_victim,
555 __field(int, alloc_mode) 572 __field(int, alloc_mode)
556 __field(int, gc_mode) 573 __field(int, gc_mode)
557 __field(unsigned int, victim) 574 __field(unsigned int, victim)
575 __field(unsigned int, cost)
558 __field(unsigned int, ofs_unit) 576 __field(unsigned int, ofs_unit)
559 __field(unsigned int, pre_victim) 577 __field(unsigned int, pre_victim)
560 __field(unsigned int, prefree) 578 __field(unsigned int, prefree)
@@ -568,20 +586,23 @@ TRACE_EVENT(f2fs_get_victim,
568 __entry->alloc_mode = p->alloc_mode; 586 __entry->alloc_mode = p->alloc_mode;
569 __entry->gc_mode = p->gc_mode; 587 __entry->gc_mode = p->gc_mode;
570 __entry->victim = p->min_segno; 588 __entry->victim = p->min_segno;
589 __entry->cost = p->min_cost;
571 __entry->ofs_unit = p->ofs_unit; 590 __entry->ofs_unit = p->ofs_unit;
572 __entry->pre_victim = pre_victim; 591 __entry->pre_victim = pre_victim;
573 __entry->prefree = prefree; 592 __entry->prefree = prefree;
574 __entry->free = free; 593 __entry->free = free;
575 ), 594 ),
576 595
577 TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u " 596 TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), "
578 "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u", 597 "victim = %u, cost = %u, ofs_unit = %u, "
579 show_dev(__entry), 598 "pre_victim_secno = %d, prefree = %u, free = %u",
599 show_dev(__entry->dev),
580 show_data_type(__entry->type), 600 show_data_type(__entry->type),
581 show_gc_type(__entry->gc_type), 601 show_gc_type(__entry->gc_type),
582 show_alloc_mode(__entry->alloc_mode), 602 show_alloc_mode(__entry->alloc_mode),
583 show_victim_policy(__entry->gc_mode), 603 show_victim_policy(__entry->gc_mode),
584 __entry->victim, 604 __entry->victim,
605 __entry->cost,
585 __entry->ofs_unit, 606 __entry->ofs_unit,
586 (int)__entry->pre_victim, 607 (int)__entry->pre_victim,
587 __entry->prefree, 608 __entry->prefree,
@@ -713,7 +734,7 @@ TRACE_EVENT(f2fs_reserve_new_blocks,
713 ), 734 ),
714 735
715 TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu", 736 TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
716 show_dev(__entry), 737 show_dev(__entry->dev),
717 (unsigned int)__entry->nid, 738 (unsigned int)__entry->nid,
718 __entry->ofs_in_node, 739 __entry->ofs_in_node,
719 (unsigned long long)__entry->count) 740 (unsigned long long)__entry->count)
@@ -753,7 +774,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
753 (unsigned long)__entry->index, 774 (unsigned long)__entry->index,
754 (unsigned long long)__entry->old_blkaddr, 775 (unsigned long long)__entry->old_blkaddr,
755 (unsigned long long)__entry->new_blkaddr, 776 (unsigned long long)__entry->new_blkaddr,
756 show_bio_type(__entry->op_flags), 777 show_bio_type(__entry->op, __entry->op_flags),
757 show_block_type(__entry->type)) 778 show_block_type(__entry->type))
758); 779);
759 780
@@ -775,15 +796,15 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
775 TP_CONDITION(page->mapping) 796 TP_CONDITION(page->mapping)
776); 797);
777 798
778DECLARE_EVENT_CLASS(f2fs__submit_bio, 799DECLARE_EVENT_CLASS(f2fs__bio,
779 800
780 TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, 801 TP_PROTO(struct super_block *sb, int type, struct bio *bio),
781 struct bio *bio),
782 802
783 TP_ARGS(sb, fio, bio), 803 TP_ARGS(sb, type, bio),
784 804
785 TP_STRUCT__entry( 805 TP_STRUCT__entry(
786 __field(dev_t, dev) 806 __field(dev_t, dev)
807 __field(dev_t, target)
787 __field(int, op) 808 __field(int, op)
788 __field(int, op_flags) 809 __field(int, op_flags)
789 __field(int, type) 810 __field(int, type)
@@ -793,37 +814,55 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
793 814
794 TP_fast_assign( 815 TP_fast_assign(
795 __entry->dev = sb->s_dev; 816 __entry->dev = sb->s_dev;
796 __entry->op = fio->op; 817 __entry->target = bio->bi_bdev->bd_dev;
797 __entry->op_flags = fio->op_flags; 818 __entry->op = bio_op(bio);
798 __entry->type = fio->type; 819 __entry->op_flags = bio->bi_opf;
820 __entry->type = type;
799 __entry->sector = bio->bi_iter.bi_sector; 821 __entry->sector = bio->bi_iter.bi_sector;
800 __entry->size = bio->bi_iter.bi_size; 822 __entry->size = bio->bi_iter.bi_size;
801 ), 823 ),
802 824
803 TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u", 825 TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
804 show_dev(__entry), 826 show_dev(__entry->target),
805 show_bio_type(__entry->op_flags), 827 show_dev(__entry->dev),
828 show_bio_type(__entry->op, __entry->op_flags),
806 show_block_type(__entry->type), 829 show_block_type(__entry->type),
807 (unsigned long long)__entry->sector, 830 (unsigned long long)__entry->sector,
808 __entry->size) 831 __entry->size)
809); 832);
810 833
811DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, 834DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio,
835
836 TP_PROTO(struct super_block *sb, int type, struct bio *bio),
837
838 TP_ARGS(sb, type, bio),
839
840 TP_CONDITION(bio)
841);
842
843DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio,
812 844
813 TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, 845 TP_PROTO(struct super_block *sb, int type, struct bio *bio),
814 struct bio *bio),
815 846
816 TP_ARGS(sb, fio, bio), 847 TP_ARGS(sb, type, bio),
817 848
818 TP_CONDITION(bio) 849 TP_CONDITION(bio)
819); 850);
820 851
821DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, 852DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio,
822 853
823 TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, 854 TP_PROTO(struct super_block *sb, int type, struct bio *bio),
824 struct bio *bio),
825 855
826 TP_ARGS(sb, fio, bio), 856 TP_ARGS(sb, type, bio),
857
858 TP_CONDITION(bio)
859);
860
861DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio,
862
863 TP_PROTO(struct super_block *sb, int type, struct bio *bio),
864
865 TP_ARGS(sb, type, bio),
827 866
828 TP_CONDITION(bio) 867 TP_CONDITION(bio)
829); 868);
@@ -1082,16 +1121,16 @@ TRACE_EVENT(f2fs_write_checkpoint,
1082 ), 1121 ),
1083 1122
1084 TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", 1123 TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
1085 show_dev(__entry), 1124 show_dev(__entry->dev),
1086 show_cpreason(__entry->reason), 1125 show_cpreason(__entry->reason),
1087 __entry->msg) 1126 __entry->msg)
1088); 1127);
1089 1128
1090TRACE_EVENT(f2fs_issue_discard, 1129TRACE_EVENT(f2fs_issue_discard,
1091 1130
1092 TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen), 1131 TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
1093 1132
1094 TP_ARGS(sb, blkstart, blklen), 1133 TP_ARGS(dev, blkstart, blklen),
1095 1134
1096 TP_STRUCT__entry( 1135 TP_STRUCT__entry(
1097 __field(dev_t, dev) 1136 __field(dev_t, dev)
@@ -1100,22 +1139,22 @@ TRACE_EVENT(f2fs_issue_discard,
1100 ), 1139 ),
1101 1140
1102 TP_fast_assign( 1141 TP_fast_assign(
1103 __entry->dev = sb->s_dev; 1142 __entry->dev = dev->bd_dev;
1104 __entry->blkstart = blkstart; 1143 __entry->blkstart = blkstart;
1105 __entry->blklen = blklen; 1144 __entry->blklen = blklen;
1106 ), 1145 ),
1107 1146
1108 TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx", 1147 TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
1109 show_dev(__entry), 1148 show_dev(__entry->dev),
1110 (unsigned long long)__entry->blkstart, 1149 (unsigned long long)__entry->blkstart,
1111 (unsigned long long)__entry->blklen) 1150 (unsigned long long)__entry->blklen)
1112); 1151);
1113 1152
1114TRACE_EVENT(f2fs_issue_reset_zone, 1153TRACE_EVENT(f2fs_issue_reset_zone,
1115 1154
1116 TP_PROTO(struct super_block *sb, block_t blkstart), 1155 TP_PROTO(struct block_device *dev, block_t blkstart),
1117 1156
1118 TP_ARGS(sb, blkstart), 1157 TP_ARGS(dev, blkstart),
1119 1158
1120 TP_STRUCT__entry( 1159 TP_STRUCT__entry(
1121 __field(dev_t, dev) 1160 __field(dev_t, dev)
@@ -1123,21 +1162,21 @@ TRACE_EVENT(f2fs_issue_reset_zone,
1123 ), 1162 ),
1124 1163
1125 TP_fast_assign( 1164 TP_fast_assign(
1126 __entry->dev = sb->s_dev; 1165 __entry->dev = dev->bd_dev;
1127 __entry->blkstart = blkstart; 1166 __entry->blkstart = blkstart;
1128 ), 1167 ),
1129 1168
1130 TP_printk("dev = (%d,%d), reset zone at block = 0x%llx", 1169 TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
1131 show_dev(__entry), 1170 show_dev(__entry->dev),
1132 (unsigned long long)__entry->blkstart) 1171 (unsigned long long)__entry->blkstart)
1133); 1172);
1134 1173
1135TRACE_EVENT(f2fs_issue_flush, 1174TRACE_EVENT(f2fs_issue_flush,
1136 1175
1137 TP_PROTO(struct super_block *sb, unsigned int nobarrier, 1176 TP_PROTO(struct block_device *dev, unsigned int nobarrier,
1138 unsigned int flush_merge), 1177 unsigned int flush_merge),
1139 1178
1140 TP_ARGS(sb, nobarrier, flush_merge), 1179 TP_ARGS(dev, nobarrier, flush_merge),
1141 1180
1142 TP_STRUCT__entry( 1181 TP_STRUCT__entry(
1143 __field(dev_t, dev) 1182 __field(dev_t, dev)
@@ -1146,13 +1185,13 @@ TRACE_EVENT(f2fs_issue_flush,
1146 ), 1185 ),
1147 1186
1148 TP_fast_assign( 1187 TP_fast_assign(
1149 __entry->dev = sb->s_dev; 1188 __entry->dev = dev->bd_dev;
1150 __entry->nobarrier = nobarrier; 1189 __entry->nobarrier = nobarrier;
1151 __entry->flush_merge = flush_merge; 1190 __entry->flush_merge = flush_merge;
1152 ), 1191 ),
1153 1192
1154 TP_printk("dev = (%d,%d), %s %s", 1193 TP_printk("dev = (%d,%d), %s %s",
1155 show_dev(__entry), 1194 show_dev(__entry->dev),
1156 __entry->nobarrier ? "skip (nobarrier)" : "issue", 1195 __entry->nobarrier ? "skip (nobarrier)" : "issue",
1157 __entry->flush_merge ? " with flush_merge" : "") 1196 __entry->flush_merge ? " with flush_merge" : "")
1158); 1197);
@@ -1267,7 +1306,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
1267 ), 1306 ),
1268 1307
1269 TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", 1308 TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u",
1270 show_dev(__entry), 1309 show_dev(__entry->dev),
1271 __entry->node_cnt, 1310 __entry->node_cnt,
1272 __entry->tree_cnt) 1311 __entry->tree_cnt)
1273); 1312);
@@ -1314,7 +1353,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
1314 ), 1353 ),
1315 1354
1316 TP_printk("dev = (%d,%d), %s, dirty count = %lld", 1355 TP_printk("dev = (%d,%d), %s, dirty count = %lld",
1317 show_dev(__entry), 1356 show_dev(__entry->dev),
1318 show_file_type(__entry->type), 1357 show_file_type(__entry->type),
1319 __entry->count) 1358 __entry->count)
1320); 1359);