aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 18:55:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-01 18:55:04 -0500
commit25c4e6c3f0c14d1575aa488ff4ca47e045ae51a0 (patch)
tree4ecf60124fd87fbd655393a081beecaf88746eea /fs
parent6053dc981449718d90a429933e99b441e1adaea6 (diff)
parent900f736251c81886f3064c9d489c85eddee921b7 (diff)
Merge tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This round introduces several interesting features such as on-disk NAT bitmaps, IO alignment, and a discard thread. And it includes a couple of major bug fixes as below. Enhancements: - introduce on-disk bitmaps to avoid scanning NAT blocks when getting free nids - support IO alignment to prepare open-channel SSD integration in future - introduce a discard thread to avoid long latency during checkpoint and fstrim - use SSR for warm node and enable inline_xattr by default - introduce in-memory bitmaps to check FS consistency for debugging - improve write_begin by avoiding needless read IO Bug fixes: - fix broken zone_reset behavior for SMR drive - fix wrong victim selection policy during GC - fix missing behavior when preparing discard commands - fix bugs in atomic write support and fiemap - workaround to handle multiple f2fs_add_link calls having same name ... and it includes a bunch of clean-up patches as well" * tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (97 commits) f2fs: avoid to flush nat journal entries f2fs: avoid to issue redundant discard commands f2fs: fix a plint compile warning f2fs: add f2fs_drop_inode tracepoint f2fs: Fix zoned block device support f2fs: remove redundant set_page_dirty() f2fs: fix to enlarge size of write_io_dummy mempool f2fs: fix memory leak of write_io_dummy mempool during umount f2fs: fix to update F2FS_{CP_}WB_DATA count correctly f2fs: use MAX_FREE_NIDS for the free nids target f2fs: introduce free nid bitmap f2fs: new helper cur_cp_crc() getting crc in f2fs_checkpoint f2fs: update the comment of default nr_pages to skipping f2fs: drop the duplicate pval in f2fs_getxattr f2fs: Don't update the xattr data that same as the exist f2fs: kill __is_extent_same f2fs: avoid bggc->fggc when enough free segments are avaliable after cp f2fs: select target segment with closer temperature in SSR mode f2fs: show simple call stack in fault injection message f2fs: no need lock_op in f2fs_write_inline_data ...
Diffstat (limited to 'fs')
-rw-r--r--fs/f2fs/checkpoint.c70
-rw-r--r--fs/f2fs/data.c191
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c38
-rw-r--r--fs/f2fs/extent_cache.c52
-rw-r--r--fs/f2fs/f2fs.h644
-rw-r--r--fs/f2fs/file.c36
-rw-r--r--fs/f2fs/gc.c79
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/namei.c18
-rw-r--r--fs/f2fs/node.c560
-rw-r--r--fs/f2fs/node.h33
-rw-r--r--fs/f2fs/recovery.c17
-rw-r--r--fs/f2fs/segment.c501
-rw-r--r--fs/f2fs/segment.h40
-rw-r--r--fs/f2fs/super.c138
-rw-r--r--fs/f2fs/xattr.c151
-rw-r--r--fs/f2fs/xattr.h7
18 files changed, 1858 insertions, 752 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee9534d83..0339daf4ca02 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
249 dec_page_count(sbi, F2FS_DIRTY_META); 249 dec_page_count(sbi, F2FS_DIRTY_META);
250 250
251 if (wbc->for_reclaim) 251 if (wbc->for_reclaim)
252 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); 252 f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
253 0, page->index, META, WRITE);
253 254
254 unlock_page(page); 255 unlock_page(page);
255 256
@@ -493,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
493#ifdef CONFIG_F2FS_FAULT_INJECTION 494#ifdef CONFIG_F2FS_FAULT_INJECTION
494 if (time_to_inject(sbi, FAULT_ORPHAN)) { 495 if (time_to_inject(sbi, FAULT_ORPHAN)) {
495 spin_unlock(&im->ino_lock); 496 spin_unlock(&im->ino_lock);
497 f2fs_show_injection_info(FAULT_ORPHAN);
496 return -ENOSPC; 498 return -ENOSPC;
497 } 499 }
498#endif 500#endif
@@ -681,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
681 return -EINVAL; 683 return -EINVAL;
682 } 684 }
683 685
684 crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block 686 crc = cur_cp_crc(*cp_block);
685 + crc_offset)));
686 if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { 687 if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
687 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); 688 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
688 return -EINVAL; 689 return -EINVAL;
@@ -891,7 +892,7 @@ retry:
891 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); 892 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
892 return 0; 893 return 0;
893 } 894 }
894 fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); 895 fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
895 inode = igrab(&fi->vfs_inode); 896 inode = igrab(&fi->vfs_inode);
896 spin_unlock(&sbi->inode_lock[type]); 897 spin_unlock(&sbi->inode_lock[type]);
897 if (inode) { 898 if (inode) {
@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
924 spin_unlock(&sbi->inode_lock[DIRTY_META]); 925 spin_unlock(&sbi->inode_lock[DIRTY_META]);
925 return 0; 926 return 0;
926 } 927 }
927 fi = list_entry(head->next, struct f2fs_inode_info, 928 fi = list_first_entry(head, struct f2fs_inode_info,
928 gdirty_list); 929 gdirty_list);
929 inode = igrab(&fi->vfs_inode); 930 inode = igrab(&fi->vfs_inode);
930 spin_unlock(&sbi->inode_lock[DIRTY_META]); 931 spin_unlock(&sbi->inode_lock[DIRTY_META]);
@@ -998,8 +999,6 @@ out:
998static void unblock_operations(struct f2fs_sb_info *sbi) 999static void unblock_operations(struct f2fs_sb_info *sbi)
999{ 1000{
1000 up_write(&sbi->node_write); 1001 up_write(&sbi->node_write);
1001
1002 build_free_nids(sbi, false);
1003 f2fs_unlock_all(sbi); 1002 f2fs_unlock_all(sbi);
1004} 1003}
1005 1004
@@ -1025,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1025 1024
1026 spin_lock(&sbi->cp_lock); 1025 spin_lock(&sbi->cp_lock);
1027 1026
1027 if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
1028 sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
1029 disable_nat_bits(sbi, false);
1030
1028 if (cpc->reason == CP_UMOUNT) 1031 if (cpc->reason == CP_UMOUNT)
1029 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 1032 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1030 else 1033 else
@@ -1137,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1137 1140
1138 start_blk = __start_cp_next_addr(sbi); 1141 start_blk = __start_cp_next_addr(sbi);
1139 1142
1143 /* write nat bits */
1144 if (enabled_nat_bits(sbi, cpc)) {
1145 __u64 cp_ver = cur_cp_version(ckpt);
1146 unsigned int i;
1147 block_t blk;
1148
1149 cp_ver |= ((__u64)crc32 << 32);
1150 *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
1151
1152 blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
1153 for (i = 0; i < nm_i->nat_bits_blocks; i++)
1154 update_meta_page(sbi, nm_i->nat_bits +
1155 (i << F2FS_BLKSIZE_BITS), blk + i);
1156
1157 /* Flush all the NAT BITS pages */
1158 while (get_pages(sbi, F2FS_DIRTY_META)) {
1159 sync_meta_pages(sbi, META, LONG_MAX);
1160 if (unlikely(f2fs_cp_error(sbi)))
1161 return -EIO;
1162 }
1163 }
1164
1140 /* need to wait for end_io results */ 1165 /* need to wait for end_io results */
1141 wait_on_all_pages_writeback(sbi); 1166 wait_on_all_pages_writeback(sbi);
1142 if (unlikely(f2fs_cp_error(sbi))) 1167 if (unlikely(f2fs_cp_error(sbi)))
@@ -1248,15 +1273,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1248 f2fs_flush_merged_bios(sbi); 1273 f2fs_flush_merged_bios(sbi);
1249 1274
1250 /* this is the case of multiple fstrims without any changes */ 1275 /* this is the case of multiple fstrims without any changes */
1251 if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) { 1276 if (cpc->reason == CP_DISCARD) {
1252 f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt); 1277 if (!exist_trim_candidates(sbi, cpc)) {
1253 f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries); 1278 unblock_operations(sbi);
1254 f2fs_bug_on(sbi, prefree_segments(sbi)); 1279 goto out;
1255 flush_sit_entries(sbi, cpc); 1280 }
1256 clear_prefree_segments(sbi, cpc); 1281
1257 f2fs_wait_all_discard_bio(sbi); 1282 if (NM_I(sbi)->dirty_nat_cnt == 0 &&
1258 unblock_operations(sbi); 1283 SIT_I(sbi)->dirty_sentries == 0 &&
1259 goto out; 1284 prefree_segments(sbi) == 0) {
1285 flush_sit_entries(sbi, cpc);
1286 clear_prefree_segments(sbi, cpc);
1287 unblock_operations(sbi);
1288 goto out;
1289 }
1260 } 1290 }
1261 1291
1262 /* 1292 /*
@@ -1268,17 +1298,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1268 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); 1298 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1269 1299
1270 /* write cached NAT/SIT entries to NAT/SIT area */ 1300 /* write cached NAT/SIT entries to NAT/SIT area */
1271 flush_nat_entries(sbi); 1301 flush_nat_entries(sbi, cpc);
1272 flush_sit_entries(sbi, cpc); 1302 flush_sit_entries(sbi, cpc);
1273 1303
1274 /* unlock all the fs_lock[] in do_checkpoint() */ 1304 /* unlock all the fs_lock[] in do_checkpoint() */
1275 err = do_checkpoint(sbi, cpc); 1305 err = do_checkpoint(sbi, cpc);
1276 if (err) { 1306 if (err)
1277 release_discard_addrs(sbi); 1307 release_discard_addrs(sbi);
1278 } else { 1308 else
1279 clear_prefree_segments(sbi, cpc); 1309 clear_prefree_segments(sbi, cpc);
1280 f2fs_wait_all_discard_bio(sbi);
1281 }
1282 1310
1283 unblock_operations(sbi); 1311 unblock_operations(sbi);
1284 stat_inc_cp_count(sbi->stat_info); 1312 stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ac262564fa6..1375fef11146 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -55,8 +55,10 @@ static void f2fs_read_end_io(struct bio *bio)
55 int i; 55 int i;
56 56
57#ifdef CONFIG_F2FS_FAULT_INJECTION 57#ifdef CONFIG_F2FS_FAULT_INJECTION
58 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) 58 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
59 f2fs_show_injection_info(FAULT_IO);
59 bio->bi_error = -EIO; 60 bio->bi_error = -EIO;
61 }
60#endif 62#endif
61 63
62 if (f2fs_bio_encrypted(bio)) { 64 if (f2fs_bio_encrypted(bio)) {
@@ -93,6 +95,17 @@ static void f2fs_write_end_io(struct bio *bio)
93 struct page *page = bvec->bv_page; 95 struct page *page = bvec->bv_page;
94 enum count_type type = WB_DATA_TYPE(page); 96 enum count_type type = WB_DATA_TYPE(page);
95 97
98 if (IS_DUMMY_WRITTEN_PAGE(page)) {
99 set_page_private(page, (unsigned long)NULL);
100 ClearPagePrivate(page);
101 unlock_page(page);
102 mempool_free(page, sbi->write_io_dummy);
103
104 if (unlikely(bio->bi_error))
105 f2fs_stop_checkpoint(sbi, true);
106 continue;
107 }
108
96 fscrypt_pullback_bio_page(&page, true); 109 fscrypt_pullback_bio_page(&page, true);
97 110
98 if (unlikely(bio->bi_error)) { 111 if (unlikely(bio->bi_error)) {
@@ -171,10 +184,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
171 struct bio *bio, enum page_type type) 184 struct bio *bio, enum page_type type)
172{ 185{
173 if (!is_read_io(bio_op(bio))) { 186 if (!is_read_io(bio_op(bio))) {
187 unsigned int start;
188
174 if (f2fs_sb_mounted_blkzoned(sbi->sb) && 189 if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
175 current->plug && (type == DATA || type == NODE)) 190 current->plug && (type == DATA || type == NODE))
176 blk_finish_plug(current->plug); 191 blk_finish_plug(current->plug);
192
193 if (type != DATA && type != NODE)
194 goto submit_io;
195
196 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
197 start %= F2FS_IO_SIZE(sbi);
198
199 if (start == 0)
200 goto submit_io;
201
202 /* fill dummy pages */
203 for (; start < F2FS_IO_SIZE(sbi); start++) {
204 struct page *page =
205 mempool_alloc(sbi->write_io_dummy,
206 GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
207 f2fs_bug_on(sbi, !page);
208
209 SetPagePrivate(page);
210 set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
211 lock_page(page);
212 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
213 f2fs_bug_on(sbi, 1);
214 }
215 /*
216 * In the NODE case, we lose next block address chain. So, we
217 * need to do checkpoint in f2fs_sync_file.
218 */
219 if (type == NODE)
220 set_sbi_flag(sbi, SBI_NEED_CP);
177 } 221 }
222submit_io:
223 if (is_read_io(bio_op(bio)))
224 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
225 else
226 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
178 submit_bio(bio); 227 submit_bio(bio);
179} 228}
180 229
@@ -185,19 +234,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
185 if (!io->bio) 234 if (!io->bio)
186 return; 235 return;
187 236
237 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
238
188 if (is_read_io(fio->op)) 239 if (is_read_io(fio->op))
189 trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); 240 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
190 else 241 else
191 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 242 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
192
193 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
194 243
195 __submit_bio(io->sbi, io->bio, fio->type); 244 __submit_bio(io->sbi, io->bio, fio->type);
196 io->bio = NULL; 245 io->bio = NULL;
197} 246}
198 247
199static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, 248static bool __has_merged_page(struct f2fs_bio_info *io,
200 struct page *page, nid_t ino) 249 struct inode *inode, nid_t ino, pgoff_t idx)
201{ 250{
202 struct bio_vec *bvec; 251 struct bio_vec *bvec;
203 struct page *target; 252 struct page *target;
@@ -206,7 +255,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
206 if (!io->bio) 255 if (!io->bio)
207 return false; 256 return false;
208 257
209 if (!inode && !page && !ino) 258 if (!inode && !ino)
210 return true; 259 return true;
211 260
212 bio_for_each_segment_all(bvec, io->bio, i) { 261 bio_for_each_segment_all(bvec, io->bio, i) {
@@ -216,10 +265,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
216 else 265 else
217 target = fscrypt_control_page(bvec->bv_page); 266 target = fscrypt_control_page(bvec->bv_page);
218 267
268 if (idx != target->index)
269 continue;
270
219 if (inode && inode == target->mapping->host) 271 if (inode && inode == target->mapping->host)
220 return true; 272 return true;
221 if (page && page == target)
222 return true;
223 if (ino && ino == ino_of_node(target)) 273 if (ino && ino == ino_of_node(target))
224 return true; 274 return true;
225 } 275 }
@@ -228,22 +278,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
228} 278}
229 279
230static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, 280static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
231 struct page *page, nid_t ino, 281 nid_t ino, pgoff_t idx, enum page_type type)
232 enum page_type type)
233{ 282{
234 enum page_type btype = PAGE_TYPE_OF_BIO(type); 283 enum page_type btype = PAGE_TYPE_OF_BIO(type);
235 struct f2fs_bio_info *io = &sbi->write_io[btype]; 284 struct f2fs_bio_info *io = &sbi->write_io[btype];
236 bool ret; 285 bool ret;
237 286
238 down_read(&io->io_rwsem); 287 down_read(&io->io_rwsem);
239 ret = __has_merged_page(io, inode, page, ino); 288 ret = __has_merged_page(io, inode, ino, idx);
240 up_read(&io->io_rwsem); 289 up_read(&io->io_rwsem);
241 return ret; 290 return ret;
242} 291}
243 292
244static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, 293static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
245 struct inode *inode, struct page *page, 294 struct inode *inode, nid_t ino, pgoff_t idx,
246 nid_t ino, enum page_type type, int rw) 295 enum page_type type, int rw)
247{ 296{
248 enum page_type btype = PAGE_TYPE_OF_BIO(type); 297 enum page_type btype = PAGE_TYPE_OF_BIO(type);
249 struct f2fs_bio_info *io; 298 struct f2fs_bio_info *io;
@@ -252,16 +301,16 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
252 301
253 down_write(&io->io_rwsem); 302 down_write(&io->io_rwsem);
254 303
255 if (!__has_merged_page(io, inode, page, ino)) 304 if (!__has_merged_page(io, inode, ino, idx))
256 goto out; 305 goto out;
257 306
258 /* change META to META_FLUSH in the checkpoint procedure */ 307 /* change META to META_FLUSH in the checkpoint procedure */
259 if (type >= META_FLUSH) { 308 if (type >= META_FLUSH) {
260 io->fio.type = META_FLUSH; 309 io->fio.type = META_FLUSH;
261 io->fio.op = REQ_OP_WRITE; 310 io->fio.op = REQ_OP_WRITE;
262 io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; 311 io->fio.op_flags = REQ_META | REQ_PRIO;
263 if (!test_opt(sbi, NOBARRIER)) 312 if (!test_opt(sbi, NOBARRIER))
264 io->fio.op_flags |= REQ_FUA; 313 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
265 } 314 }
266 __submit_merged_bio(io); 315 __submit_merged_bio(io);
267out: 316out:
@@ -271,15 +320,15 @@ out:
271void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, 320void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
272 int rw) 321 int rw)
273{ 322{
274 __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw); 323 __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
275} 324}
276 325
277void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, 326void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
278 struct inode *inode, struct page *page, 327 struct inode *inode, nid_t ino, pgoff_t idx,
279 nid_t ino, enum page_type type, int rw) 328 enum page_type type, int rw)
280{ 329{
281 if (has_merged_page(sbi, inode, page, ino, type)) 330 if (has_merged_page(sbi, inode, ino, idx, type))
282 __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw); 331 __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
283} 332}
284 333
285void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) 334void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -315,13 +364,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
315 return 0; 364 return 0;
316} 365}
317 366
318void f2fs_submit_page_mbio(struct f2fs_io_info *fio) 367int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
319{ 368{
320 struct f2fs_sb_info *sbi = fio->sbi; 369 struct f2fs_sb_info *sbi = fio->sbi;
321 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 370 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
322 struct f2fs_bio_info *io; 371 struct f2fs_bio_info *io;
323 bool is_read = is_read_io(fio->op); 372 bool is_read = is_read_io(fio->op);
324 struct page *bio_page; 373 struct page *bio_page;
374 int err = 0;
325 375
326 io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 376 io = is_read ? &sbi->read_io : &sbi->write_io[btype];
327 377
@@ -331,6 +381,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
331 381
332 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; 382 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
333 383
384 /* set submitted = 1 as a return value */
385 fio->submitted = 1;
386
334 if (!is_read) 387 if (!is_read)
335 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 388 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
336 389
@@ -342,6 +395,13 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
342 __submit_merged_bio(io); 395 __submit_merged_bio(io);
343alloc_new: 396alloc_new:
344 if (io->bio == NULL) { 397 if (io->bio == NULL) {
398 if ((fio->type == DATA || fio->type == NODE) &&
399 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
400 err = -EAGAIN;
401 if (!is_read)
402 dec_page_count(sbi, WB_DATA_TYPE(bio_page));
403 goto out_fail;
404 }
345 io->bio = __bio_alloc(sbi, fio->new_blkaddr, 405 io->bio = __bio_alloc(sbi, fio->new_blkaddr,
346 BIO_MAX_PAGES, is_read); 406 BIO_MAX_PAGES, is_read);
347 io->fio = *fio; 407 io->fio = *fio;
@@ -355,9 +415,10 @@ alloc_new:
355 415
356 io->last_block_in_bio = fio->new_blkaddr; 416 io->last_block_in_bio = fio->new_blkaddr;
357 f2fs_trace_ios(fio, 0); 417 f2fs_trace_ios(fio, 0);
358 418out_fail:
359 up_write(&io->io_rwsem); 419 up_write(&io->io_rwsem);
360 trace_f2fs_submit_page_mbio(fio->page, fio); 420 trace_f2fs_submit_page_mbio(fio->page, fio);
421 return err;
361} 422}
362 423
363static void __set_data_blkaddr(struct dnode_of_data *dn) 424static void __set_data_blkaddr(struct dnode_of_data *dn)
@@ -453,7 +514,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
453 514
454int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) 515int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
455{ 516{
456 struct extent_info ei; 517 struct extent_info ei = {0,0,0};
457 struct inode *inode = dn->inode; 518 struct inode *inode = dn->inode;
458 519
459 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 520 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -470,7 +531,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
470 struct address_space *mapping = inode->i_mapping; 531 struct address_space *mapping = inode->i_mapping;
471 struct dnode_of_data dn; 532 struct dnode_of_data dn;
472 struct page *page; 533 struct page *page;
473 struct extent_info ei; 534 struct extent_info ei = {0,0,0};
474 int err; 535 int err;
475 struct f2fs_io_info fio = { 536 struct f2fs_io_info fio = {
476 .sbi = F2FS_I_SB(inode), 537 .sbi = F2FS_I_SB(inode),
@@ -694,6 +755,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
694 struct f2fs_map_blocks map; 755 struct f2fs_map_blocks map;
695 int err = 0; 756 int err = 0;
696 757
758 if (is_inode_flag_set(inode, FI_NO_PREALLOC))
759 return 0;
760
697 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 761 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
698 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); 762 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
699 if (map.m_len > map.m_lblk) 763 if (map.m_len > map.m_lblk)
@@ -742,7 +806,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
742 int err = 0, ofs = 1; 806 int err = 0, ofs = 1;
743 unsigned int ofs_in_node, last_ofs_in_node; 807 unsigned int ofs_in_node, last_ofs_in_node;
744 blkcnt_t prealloc; 808 blkcnt_t prealloc;
745 struct extent_info ei; 809 struct extent_info ei = {0,0,0};
746 block_t blkaddr; 810 block_t blkaddr;
747 811
748 if (!maxblocks) 812 if (!maxblocks)
@@ -806,7 +870,7 @@ next_block:
806 } 870 }
807 if (err) 871 if (err)
808 goto sync_out; 872 goto sync_out;
809 map->m_flags = F2FS_MAP_NEW; 873 map->m_flags |= F2FS_MAP_NEW;
810 blkaddr = dn.data_blkaddr; 874 blkaddr = dn.data_blkaddr;
811 } else { 875 } else {
812 if (flag == F2FS_GET_BLOCK_BMAP) { 876 if (flag == F2FS_GET_BLOCK_BMAP) {
@@ -906,7 +970,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
906 if (!err) { 970 if (!err) {
907 map_bh(bh, inode->i_sb, map.m_pblk); 971 map_bh(bh, inode->i_sb, map.m_pblk);
908 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; 972 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
909 bh->b_size = map.m_len << inode->i_blkbits; 973 bh->b_size = (u64)map.m_len << inode->i_blkbits;
910 } 974 }
911 return err; 975 return err;
912} 976}
@@ -1088,7 +1152,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
1088 1152
1089 prefetchw(&page->flags); 1153 prefetchw(&page->flags);
1090 if (pages) { 1154 if (pages) {
1091 page = list_entry(pages->prev, struct page, lru); 1155 page = list_last_entry(pages, struct page, lru);
1092 list_del(&page->lru); 1156 list_del(&page->lru);
1093 if (add_to_page_cache_lru(page, mapping, 1157 if (add_to_page_cache_lru(page, mapping,
1094 page->index, 1158 page->index,
@@ -1207,7 +1271,7 @@ static int f2fs_read_data_pages(struct file *file,
1207 struct list_head *pages, unsigned nr_pages) 1271 struct list_head *pages, unsigned nr_pages)
1208{ 1272{
1209 struct inode *inode = file->f_mapping->host; 1273 struct inode *inode = file->f_mapping->host;
1210 struct page *page = list_entry(pages->prev, struct page, lru); 1274 struct page *page = list_last_entry(pages, struct page, lru);
1211 1275
1212 trace_f2fs_readpages(inode, page, nr_pages); 1276 trace_f2fs_readpages(inode, page, nr_pages);
1213 1277
@@ -1288,8 +1352,8 @@ out_writepage:
1288 return err; 1352 return err;
1289} 1353}
1290 1354
1291static int f2fs_write_data_page(struct page *page, 1355static int __write_data_page(struct page *page, bool *submitted,
1292 struct writeback_control *wbc) 1356 struct writeback_control *wbc)
1293{ 1357{
1294 struct inode *inode = page->mapping->host; 1358 struct inode *inode = page->mapping->host;
1295 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1359 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1307,6 +1371,7 @@ static int f2fs_write_data_page(struct page *page,
1307 .op_flags = wbc_to_write_flags(wbc), 1371 .op_flags = wbc_to_write_flags(wbc),
1308 .page = page, 1372 .page = page,
1309 .encrypted_page = NULL, 1373 .encrypted_page = NULL,
1374 .submitted = false,
1310 }; 1375 };
1311 1376
1312 trace_f2fs_writepage(page, DATA); 1377 trace_f2fs_writepage(page, DATA);
@@ -1352,9 +1417,12 @@ write:
1352 goto redirty_out; 1417 goto redirty_out;
1353 1418
1354 err = -EAGAIN; 1419 err = -EAGAIN;
1355 f2fs_lock_op(sbi); 1420 if (f2fs_has_inline_data(inode)) {
1356 if (f2fs_has_inline_data(inode))
1357 err = f2fs_write_inline_data(inode, page); 1421 err = f2fs_write_inline_data(inode, page);
1422 if (!err)
1423 goto out;
1424 }
1425 f2fs_lock_op(sbi);
1358 if (err == -EAGAIN) 1426 if (err == -EAGAIN)
1359 err = do_write_data_page(&fio); 1427 err = do_write_data_page(&fio);
1360 if (F2FS_I(inode)->last_disk_size < psize) 1428 if (F2FS_I(inode)->last_disk_size < psize)
@@ -1370,15 +1438,22 @@ out:
1370 ClearPageUptodate(page); 1438 ClearPageUptodate(page);
1371 1439
1372 if (wbc->for_reclaim) { 1440 if (wbc->for_reclaim) {
1373 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE); 1441 f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
1442 DATA, WRITE);
1374 remove_dirty_inode(inode); 1443 remove_dirty_inode(inode);
1444 submitted = NULL;
1375 } 1445 }
1376 1446
1377 unlock_page(page); 1447 unlock_page(page);
1378 f2fs_balance_fs(sbi, need_balance_fs); 1448 f2fs_balance_fs(sbi, need_balance_fs);
1379 1449
1380 if (unlikely(f2fs_cp_error(sbi))) 1450 if (unlikely(f2fs_cp_error(sbi))) {
1381 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1451 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1452 submitted = NULL;
1453 }
1454
1455 if (submitted)
1456 *submitted = fio.submitted;
1382 1457
1383 return 0; 1458 return 0;
1384 1459
@@ -1390,6 +1465,12 @@ redirty_out:
1390 return err; 1465 return err;
1391} 1466}
1392 1467
1468static int f2fs_write_data_page(struct page *page,
1469 struct writeback_control *wbc)
1470{
1471 return __write_data_page(page, NULL, wbc);
1472}
1473
1393/* 1474/*
1394 * This function was copied from write_cche_pages from mm/page-writeback.c. 1475 * This function was copied from write_cche_pages from mm/page-writeback.c.
1395 * The major change is making write step of cold data page separately from 1476 * The major change is making write step of cold data page separately from
@@ -1406,10 +1487,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
1406 pgoff_t index; 1487 pgoff_t index;
1407 pgoff_t end; /* Inclusive */ 1488 pgoff_t end; /* Inclusive */
1408 pgoff_t done_index; 1489 pgoff_t done_index;
1490 pgoff_t last_idx = ULONG_MAX;
1409 int cycled; 1491 int cycled;
1410 int range_whole = 0; 1492 int range_whole = 0;
1411 int tag; 1493 int tag;
1412 int nwritten = 0;
1413 1494
1414 pagevec_init(&pvec, 0); 1495 pagevec_init(&pvec, 0);
1415 1496
@@ -1446,6 +1527,7 @@ retry:
1446 1527
1447 for (i = 0; i < nr_pages; i++) { 1528 for (i = 0; i < nr_pages; i++) {
1448 struct page *page = pvec.pages[i]; 1529 struct page *page = pvec.pages[i];
1530 bool submitted = false;
1449 1531
1450 if (page->index > end) { 1532 if (page->index > end) {
1451 done = 1; 1533 done = 1;
@@ -1479,7 +1561,7 @@ continue_unlock:
1479 if (!clear_page_dirty_for_io(page)) 1561 if (!clear_page_dirty_for_io(page))
1480 goto continue_unlock; 1562 goto continue_unlock;
1481 1563
1482 ret = mapping->a_ops->writepage(page, wbc); 1564 ret = __write_data_page(page, &submitted, wbc);
1483 if (unlikely(ret)) { 1565 if (unlikely(ret)) {
1484 /* 1566 /*
1485 * keep nr_to_write, since vfs uses this to 1567 * keep nr_to_write, since vfs uses this to
@@ -1493,8 +1575,8 @@ continue_unlock:
1493 done_index = page->index + 1; 1575 done_index = page->index + 1;
1494 done = 1; 1576 done = 1;
1495 break; 1577 break;
1496 } else { 1578 } else if (submitted) {
1497 nwritten++; 1579 last_idx = page->index;
1498 } 1580 }
1499 1581
1500 if (--wbc->nr_to_write <= 0 && 1582 if (--wbc->nr_to_write <= 0 &&
@@ -1516,9 +1598,9 @@ continue_unlock:
1516 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 1598 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1517 mapping->writeback_index = done_index; 1599 mapping->writeback_index = done_index;
1518 1600
1519 if (nwritten) 1601 if (last_idx != ULONG_MAX)
1520 f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, 1602 f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
1521 NULL, 0, DATA, WRITE); 1603 0, last_idx, DATA, WRITE);
1522 1604
1523 return ret; 1605 return ret;
1524} 1606}
@@ -1591,14 +1673,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
1591 struct dnode_of_data dn; 1673 struct dnode_of_data dn;
1592 struct page *ipage; 1674 struct page *ipage;
1593 bool locked = false; 1675 bool locked = false;
1594 struct extent_info ei; 1676 struct extent_info ei = {0,0,0};
1595 int err = 0; 1677 int err = 0;
1596 1678
1597 /* 1679 /*
1598 * we already allocated all the blocks, so we don't need to get 1680 * we already allocated all the blocks, so we don't need to get
1599 * the block addresses when there is no need to fill the page. 1681 * the block addresses when there is no need to fill the page.
1600 */ 1682 */
1601 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE) 1683 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
1684 !is_inode_flag_set(inode, FI_NO_PREALLOC))
1602 return 0; 1685 return 0;
1603 1686
1604 if (f2fs_has_inline_data(inode) || 1687 if (f2fs_has_inline_data(inode) ||
@@ -1682,7 +1765,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1682 goto fail; 1765 goto fail;
1683 } 1766 }
1684repeat: 1767repeat:
1685 page = grab_cache_page_write_begin(mapping, index, flags); 1768 /*
1769 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
1770 * wait_for_stable_page. Will wait that below with our IO control.
1771 */
1772 page = pagecache_get_page(mapping, index,
1773 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
1686 if (!page) { 1774 if (!page) {
1687 err = -ENOMEM; 1775 err = -ENOMEM;
1688 goto fail; 1776 goto fail;
@@ -1715,6 +1803,11 @@ repeat:
1715 if (len == PAGE_SIZE || PageUptodate(page)) 1803 if (len == PAGE_SIZE || PageUptodate(page))
1716 return 0; 1804 return 0;
1717 1805
1806 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
1807 zero_user_segment(page, len, PAGE_SIZE);
1808 return 0;
1809 }
1810
1718 if (blkaddr == NEW_ADDR) { 1811 if (blkaddr == NEW_ADDR) {
1719 zero_user_segment(page, 0, PAGE_SIZE); 1812 zero_user_segment(page, 0, PAGE_SIZE);
1720 SetPageUptodate(page); 1813 SetPageUptodate(page);
@@ -1768,7 +1861,7 @@ static int f2fs_write_end(struct file *file,
1768 * let generic_perform_write() try to copy data again through copied=0. 1861 * let generic_perform_write() try to copy data again through copied=0.
1769 */ 1862 */
1770 if (!PageUptodate(page)) { 1863 if (!PageUptodate(page)) {
1771 if (unlikely(copied != PAGE_SIZE)) 1864 if (unlikely(copied != len))
1772 copied = 0; 1865 copied = 0;
1773 else 1866 else
1774 SetPageUptodate(page); 1867 SetPageUptodate(page);
@@ -1917,7 +2010,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
1917 if (!PageUptodate(page)) 2010 if (!PageUptodate(page))
1918 SetPageUptodate(page); 2011 SetPageUptodate(page);
1919 2012
1920 if (f2fs_is_atomic_file(inode)) { 2013 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
1921 if (!IS_ATOMIC_WRITTEN_PAGE(page)) { 2014 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
1922 register_inmem_page(inode, page); 2015 register_inmem_page(inode, page);
1923 return 1; 2016 return 1;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..a77df377e2e8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
53 si->aw_cnt = atomic_read(&sbi->aw_cnt);
54 si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
53 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); 55 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
54 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); 56 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
57 if (SM_I(sbi) && SM_I(sbi)->fcc_info)
58 si->nr_flush =
59 atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
60 if (SM_I(sbi) && SM_I(sbi)->dcc_info)
61 si->nr_discard =
62 atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
55 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 63 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
56 si->rsvd_segs = reserved_segments(sbi); 64 si->rsvd_segs = reserved_segments(sbi);
57 si->overp_segs = overprovision_segments(sbi); 65 si->overp_segs = overprovision_segments(sbi);
@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
62 si->inline_xattr = atomic_read(&sbi->inline_xattr); 70 si->inline_xattr = atomic_read(&sbi->inline_xattr);
63 si->inline_inode = atomic_read(&sbi->inline_inode); 71 si->inline_inode = atomic_read(&sbi->inline_inode);
64 si->inline_dir = atomic_read(&sbi->inline_dir); 72 si->inline_dir = atomic_read(&sbi->inline_dir);
73 si->append = sbi->im[APPEND_INO].ino_num;
74 si->update = sbi->im[UPDATE_INO].ino_num;
65 si->orphans = sbi->im[ORPHAN_INO].ino_num; 75 si->orphans = sbi->im[ORPHAN_INO].ino_num;
66 si->utilization = utilization(sbi); 76 si->utilization = utilization(sbi);
67 77
@@ -183,6 +193,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
183 /* build nm */ 193 /* build nm */
184 si->base_mem += sizeof(struct f2fs_nm_info); 194 si->base_mem += sizeof(struct f2fs_nm_info);
185 si->base_mem += __bitmap_size(sbi, NAT_BITMAP); 195 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
196 si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
197 si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
198 si->base_mem += NM_I(sbi)->nat_blocks / 8;
186 199
187get_cache: 200get_cache:
188 si->cache_mem = 0; 201 si->cache_mem = 0;
@@ -192,8 +205,10 @@ get_cache:
192 si->cache_mem += sizeof(struct f2fs_gc_kthread); 205 si->cache_mem += sizeof(struct f2fs_gc_kthread);
193 206
194 /* build merge flush thread */ 207 /* build merge flush thread */
195 if (SM_I(sbi)->cmd_control_info) 208 if (SM_I(sbi)->fcc_info)
196 si->cache_mem += sizeof(struct flush_cmd_control); 209 si->cache_mem += sizeof(struct flush_cmd_control);
210 if (SM_I(sbi)->dcc_info)
211 si->cache_mem += sizeof(struct discard_cmd_control);
197 212
198 /* free nids */ 213 /* free nids */
199 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + 214 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -254,8 +269,8 @@ static int stat_show(struct seq_file *s, void *v)
254 si->inline_inode); 269 si->inline_inode);
255 seq_printf(s, " - Inline_dentry Inode: %u\n", 270 seq_printf(s, " - Inline_dentry Inode: %u\n",
256 si->inline_dir); 271 si->inline_dir);
257 seq_printf(s, " - Orphan Inode: %u\n", 272 seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
258 si->orphans); 273 si->orphans, si->append, si->update);
259 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 274 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
260 si->main_area_segs, si->main_area_sections, 275 si->main_area_segs, si->main_area_sections,
261 si->main_area_zones); 276 si->main_area_zones);
@@ -314,8 +329,11 @@ static int stat_show(struct seq_file *s, void *v)
314 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 329 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
315 si->ext_tree, si->zombie_tree, si->ext_node); 330 si->ext_tree, si->zombie_tree, si->ext_node);
316 seq_puts(s, "\nBalancing F2FS Async:\n"); 331 seq_puts(s, "\nBalancing F2FS Async:\n");
317 seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n", 332 seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
318 si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data); 333 si->nr_wb_cp_data, si->nr_wb_data,
334 si->nr_flush, si->nr_discard);
335 seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
336 si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
319 seq_printf(s, " - nodes: %4d in %4d\n", 337 seq_printf(s, " - nodes: %4d in %4d\n",
320 si->ndirty_node, si->node_pages); 338 si->ndirty_node, si->node_pages);
321 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", 339 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -414,6 +432,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
414 atomic_set(&sbi->inline_dir, 0); 432 atomic_set(&sbi->inline_dir, 0);
415 atomic_set(&sbi->inplace_count, 0); 433 atomic_set(&sbi->inplace_count, 0);
416 434
435 atomic_set(&sbi->aw_cnt, 0);
436 atomic_set(&sbi->max_aw_cnt, 0);
437
417 mutex_lock(&f2fs_stat_mutex); 438 mutex_lock(&f2fs_stat_mutex);
418 list_add_tail(&si->stat_list, &f2fs_stat_list); 439 list_add_tail(&si->stat_list, &f2fs_stat_list);
419 mutex_unlock(&f2fs_stat_mutex); 440 mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 18607fc5240d..4650c9b85de7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
207 f2fs_put_page(dentry_page, 0); 207 f2fs_put_page(dentry_page, 0);
208 } 208 }
209 209
210 if (!de && room && F2FS_I(dir)->chash != namehash) { 210 /* This is to increase the speed of f2fs_create */
211 F2FS_I(dir)->chash = namehash; 211 if (!de && room) {
212 F2FS_I(dir)->clevel = level; 212 F2FS_I(dir)->task = current;
213 if (F2FS_I(dir)->chash != namehash) {
214 F2FS_I(dir)->chash = namehash;
215 F2FS_I(dir)->clevel = level;
216 }
213 } 217 }
214 218
215 return de; 219 return de;
@@ -548,8 +552,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
548 552
549start: 553start:
550#ifdef CONFIG_F2FS_FAULT_INJECTION 554#ifdef CONFIG_F2FS_FAULT_INJECTION
551 if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) 555 if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
556 f2fs_show_injection_info(FAULT_DIR_DEPTH);
552 return -ENOSPC; 557 return -ENOSPC;
558 }
553#endif 559#endif
554 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) 560 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
555 return -ENOSPC; 561 return -ENOSPC;
@@ -646,14 +652,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
646 struct inode *inode, nid_t ino, umode_t mode) 652 struct inode *inode, nid_t ino, umode_t mode)
647{ 653{
648 struct fscrypt_name fname; 654 struct fscrypt_name fname;
655 struct page *page = NULL;
656 struct f2fs_dir_entry *de = NULL;
649 int err; 657 int err;
650 658
651 err = fscrypt_setup_filename(dir, name, 0, &fname); 659 err = fscrypt_setup_filename(dir, name, 0, &fname);
652 if (err) 660 if (err)
653 return err; 661 return err;
654 662
655 err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); 663 /*
656 664 * An immature stakable filesystem shows a race condition between lookup
665 * and create. If we have same task when doing lookup and create, it's
666 * definitely fine as expected by VFS normally. Otherwise, let's just
667 * verify on-disk dentry one more time, which guarantees filesystem
668 * consistency more.
669 */
670 if (current != F2FS_I(dir)->task) {
671 de = __f2fs_find_entry(dir, &fname, &page);
672 F2FS_I(dir)->task = NULL;
673 }
674 if (de) {
675 f2fs_dentry_kunmap(dir, page);
676 f2fs_put_page(page, 0);
677 err = -EEXIST;
678 } else if (IS_ERR(page)) {
679 err = PTR_ERR(page);
680 } else {
681 err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
682 }
657 fscrypt_free_filename(&fname); 683 fscrypt_free_filename(&fname);
658 return err; 684 return err;
659} 685}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 4db44da7ef69..c6934f014e0f 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
77 struct extent_tree *et; 77 struct extent_tree *et;
78 nid_t ino = inode->i_ino; 78 nid_t ino = inode->i_ino;
79 79
80 down_write(&sbi->extent_tree_lock); 80 mutex_lock(&sbi->extent_tree_lock);
81 et = radix_tree_lookup(&sbi->extent_tree_root, ino); 81 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
82 if (!et) { 82 if (!et) {
83 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); 83 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
94 atomic_dec(&sbi->total_zombie_tree); 94 atomic_dec(&sbi->total_zombie_tree);
95 list_del_init(&et->list); 95 list_del_init(&et->list);
96 } 96 }
97 up_write(&sbi->extent_tree_lock); 97 mutex_unlock(&sbi->extent_tree_lock);
98 98
99 /* never died until evict_inode */ 99 /* never died until evict_inode */
100 F2FS_I(inode)->extent_tree = et; 100 F2FS_I(inode)->extent_tree = et;
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
311 tmp_node = parent; 311 tmp_node = parent;
312 if (parent && fofs > en->ei.fofs) 312 if (parent && fofs > en->ei.fofs)
313 tmp_node = rb_next(parent); 313 tmp_node = rb_next(parent);
314 *next_ex = tmp_node ? 314 *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
315 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
316 315
317 tmp_node = parent; 316 tmp_node = parent;
318 if (parent && fofs < en->ei.fofs) 317 if (parent && fofs < en->ei.fofs)
319 tmp_node = rb_prev(parent); 318 tmp_node = rb_prev(parent);
320 *prev_ex = tmp_node ? 319 *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
321 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
322 return NULL; 320 return NULL;
323 321
324lookup_neighbors: 322lookup_neighbors:
325 if (fofs == en->ei.fofs) { 323 if (fofs == en->ei.fofs) {
326 /* lookup prev node for merging backward later */ 324 /* lookup prev node for merging backward later */
327 tmp_node = rb_prev(&en->rb_node); 325 tmp_node = rb_prev(&en->rb_node);
328 *prev_ex = tmp_node ? 326 *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
329 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
330 } 327 }
331 if (fofs == en->ei.fofs + en->ei.len - 1) { 328 if (fofs == en->ei.fofs + en->ei.len - 1) {
332 /* lookup next node for merging frontward later */ 329 /* lookup next node for merging frontward later */
333 tmp_node = rb_next(&en->rb_node); 330 tmp_node = rb_next(&en->rb_node);
334 *next_ex = tmp_node ? 331 *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
335 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
336 } 332 }
337 return en; 333 return en;
338} 334}
@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
352 } 348 }
353 349
354 if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { 350 if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
355 if (en)
356 __release_extent_node(sbi, et, prev_ex);
357 next_ex->ei.fofs = ei->fofs; 351 next_ex->ei.fofs = ei->fofs;
358 next_ex->ei.blk = ei->blk; 352 next_ex->ei.blk = ei->blk;
359 next_ex->ei.len += ei->len; 353 next_ex->ei.len += ei->len;
354 if (en)
355 __release_extent_node(sbi, et, prev_ex);
356
360 en = next_ex; 357 en = next_ex;
361 } 358 }
362 359
@@ -416,7 +413,7 @@ do_insert:
416 return en; 413 return en;
417} 414}
418 415
419static unsigned int f2fs_update_extent_tree_range(struct inode *inode, 416static void f2fs_update_extent_tree_range(struct inode *inode,
420 pgoff_t fofs, block_t blkaddr, unsigned int len) 417 pgoff_t fofs, block_t blkaddr, unsigned int len)
421{ 418{
422 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 419 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -429,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
429 unsigned int pos = (unsigned int)fofs; 426 unsigned int pos = (unsigned int)fofs;
430 427
431 if (!et) 428 if (!et)
432 return false; 429 return;
433 430
434 trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); 431 trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
435 432
@@ -437,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
437 434
438 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 435 if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
439 write_unlock(&et->lock); 436 write_unlock(&et->lock);
440 return false; 437 return;
441 } 438 }
442 439
443 prev = et->largest; 440 prev = et->largest;
@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
492 if (!next_en) { 489 if (!next_en) {
493 struct rb_node *node = rb_next(&en->rb_node); 490 struct rb_node *node = rb_next(&en->rb_node);
494 491
495 next_en = node ? 492 next_en = rb_entry_safe(node, struct extent_node,
496 rb_entry(node, struct extent_node, rb_node) 493 rb_node);
497 : NULL;
498 } 494 }
499 495
500 if (parts) 496 if (parts)
@@ -535,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
535 __free_extent_tree(sbi, et); 531 __free_extent_tree(sbi, et);
536 532
537 write_unlock(&et->lock); 533 write_unlock(&et->lock);
538
539 return !__is_extent_same(&prev, &et->largest);
540} 534}
541 535
542unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 536unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -552,7 +546,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
552 if (!atomic_read(&sbi->total_zombie_tree)) 546 if (!atomic_read(&sbi->total_zombie_tree))
553 goto free_node; 547 goto free_node;
554 548
555 if (!down_write_trylock(&sbi->extent_tree_lock)) 549 if (!mutex_trylock(&sbi->extent_tree_lock))
556 goto out; 550 goto out;
557 551
558 /* 1. remove unreferenced extent tree */ 552 /* 1. remove unreferenced extent tree */
@@ -574,11 +568,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
574 goto unlock_out; 568 goto unlock_out;
575 cond_resched(); 569 cond_resched();
576 } 570 }
577 up_write(&sbi->extent_tree_lock); 571 mutex_unlock(&sbi->extent_tree_lock);
578 572
579free_node: 573free_node:
580 /* 2. remove LRU extent entries */ 574 /* 2. remove LRU extent entries */
581 if (!down_write_trylock(&sbi->extent_tree_lock)) 575 if (!mutex_trylock(&sbi->extent_tree_lock))
582 goto out; 576 goto out;
583 577
584 remained = nr_shrink - (node_cnt + tree_cnt); 578 remained = nr_shrink - (node_cnt + tree_cnt);
@@ -608,7 +602,7 @@ free_node:
608 spin_unlock(&sbi->extent_lock); 602 spin_unlock(&sbi->extent_lock);
609 603
610unlock_out: 604unlock_out:
611 up_write(&sbi->extent_tree_lock); 605 mutex_unlock(&sbi->extent_tree_lock);
612out: 606out:
613 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); 607 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
614 608
@@ -655,10 +649,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
655 649
656 if (inode->i_nlink && !is_bad_inode(inode) && 650 if (inode->i_nlink && !is_bad_inode(inode) &&
657 atomic_read(&et->node_cnt)) { 651 atomic_read(&et->node_cnt)) {
658 down_write(&sbi->extent_tree_lock); 652 mutex_lock(&sbi->extent_tree_lock);
659 list_add_tail(&et->list, &sbi->zombie_list); 653 list_add_tail(&et->list, &sbi->zombie_list);
660 atomic_inc(&sbi->total_zombie_tree); 654 atomic_inc(&sbi->total_zombie_tree);
661 up_write(&sbi->extent_tree_lock); 655 mutex_unlock(&sbi->extent_tree_lock);
662 return; 656 return;
663 } 657 }
664 658
@@ -666,12 +660,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
666 node_cnt = f2fs_destroy_extent_node(inode); 660 node_cnt = f2fs_destroy_extent_node(inode);
667 661
668 /* delete extent tree entry in radix tree */ 662 /* delete extent tree entry in radix tree */
669 down_write(&sbi->extent_tree_lock); 663 mutex_lock(&sbi->extent_tree_lock);
670 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 664 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
671 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 665 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
672 kmem_cache_free(extent_tree_slab, et); 666 kmem_cache_free(extent_tree_slab, et);
673 atomic_dec(&sbi->total_ext_tree); 667 atomic_dec(&sbi->total_ext_tree);
674 up_write(&sbi->extent_tree_lock); 668 mutex_unlock(&sbi->extent_tree_lock);
675 669
676 F2FS_I(inode)->extent_tree = NULL; 670 F2FS_I(inode)->extent_tree = NULL;
677 671
@@ -718,7 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
718void init_extent_cache_info(struct f2fs_sb_info *sbi) 712void init_extent_cache_info(struct f2fs_sb_info *sbi)
719{ 713{
720 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); 714 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
721 init_rwsem(&sbi->extent_tree_lock); 715 mutex_init(&sbi->extent_tree_lock);
722 INIT_LIST_HEAD(&sbi->extent_list); 716 INIT_LIST_HEAD(&sbi->extent_list);
723 spin_lock_init(&sbi->extent_lock); 717 spin_lock_init(&sbi->extent_lock);
724 atomic_set(&sbi->total_ext_tree, 0); 718 atomic_set(&sbi->total_ext_tree, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 069fc7277d8d..d1483136fed6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -112,9 +112,9 @@ struct f2fs_mount_info {
112#define F2FS_HAS_FEATURE(sb, mask) \ 112#define F2FS_HAS_FEATURE(sb, mask) \
113 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) 113 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
114#define F2FS_SET_FEATURE(sb, mask) \ 114#define F2FS_SET_FEATURE(sb, mask) \
115 F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask) 115 (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
116#define F2FS_CLEAR_FEATURE(sb, mask) \ 116#define F2FS_CLEAR_FEATURE(sb, mask) \
117 F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask) 117 (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
118 118
119/* 119/*
120 * For checkpoint manager 120 * For checkpoint manager
@@ -132,11 +132,14 @@ enum {
132 CP_DISCARD, 132 CP_DISCARD,
133}; 133};
134 134
135#define DEF_BATCHED_TRIM_SECTIONS 2 135#define DEF_BATCHED_TRIM_SECTIONS 2048
136#define BATCHED_TRIM_SEGMENTS(sbi) \ 136#define BATCHED_TRIM_SEGMENTS(sbi) \
137 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) 137 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
138#define BATCHED_TRIM_BLOCKS(sbi) \ 138#define BATCHED_TRIM_BLOCKS(sbi) \
139 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 139 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
140#define MAX_DISCARD_BLOCKS(sbi) \
141 ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec)
142#define DISCARD_ISSUE_RATE 8
140#define DEF_CP_INTERVAL 60 /* 60 secs */ 143#define DEF_CP_INTERVAL 60 /* 60 secs */
141#define DEF_IDLE_INTERVAL 5 /* 5 secs */ 144#define DEF_IDLE_INTERVAL 5 /* 5 secs */
142 145
@@ -185,11 +188,30 @@ struct discard_entry {
185 int len; /* # of consecutive blocks of the discard */ 188 int len; /* # of consecutive blocks of the discard */
186}; 189};
187 190
188struct bio_entry { 191enum {
189 struct list_head list; 192 D_PREP,
190 struct bio *bio; 193 D_SUBMIT,
191 struct completion event; 194 D_DONE,
192 int error; 195};
196
197struct discard_cmd {
198 struct list_head list; /* command list */
199 struct completion wait; /* compleation */
200 block_t lstart; /* logical start address */
201 block_t len; /* length */
202 struct bio *bio; /* bio */
203 int state; /* state */
204};
205
206struct discard_cmd_control {
207 struct task_struct *f2fs_issue_discard; /* discard thread */
208 struct list_head discard_entry_list; /* 4KB discard entry list */
209 int nr_discards; /* # of discards in the list */
210 struct list_head discard_cmd_list; /* discard cmd list */
211 wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
212 struct mutex cmd_lock;
213 int max_discards; /* max. discards to be issued */
214 atomic_t submit_discard; /* # of issued discard */
193}; 215};
194 216
195/* for the list of fsync inodes, used only during recovery */ 217/* for the list of fsync inodes, used only during recovery */
@@ -214,6 +236,7 @@ struct fsync_inode_entry {
214static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i) 236static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
215{ 237{
216 int before = nats_in_cursum(journal); 238 int before = nats_in_cursum(journal);
239
217 journal->n_nats = cpu_to_le16(before + i); 240 journal->n_nats = cpu_to_le16(before + i);
218 return before; 241 return before;
219} 242}
@@ -221,6 +244,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
221static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i) 244static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
222{ 245{
223 int before = sits_in_cursum(journal); 246 int before = sits_in_cursum(journal);
247
224 journal->n_sits = cpu_to_le16(before + i); 248 journal->n_sits = cpu_to_le16(before + i);
225 return before; 249 return before;
226} 250}
@@ -306,12 +330,14 @@ static inline void make_dentry_ptr(struct inode *inode,
306 330
307 if (type == 1) { 331 if (type == 1) {
308 struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; 332 struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
333
309 d->max = NR_DENTRY_IN_BLOCK; 334 d->max = NR_DENTRY_IN_BLOCK;
310 d->bitmap = &t->dentry_bitmap; 335 d->bitmap = &t->dentry_bitmap;
311 d->dentry = t->dentry; 336 d->dentry = t->dentry;
312 d->filename = t->filename; 337 d->filename = t->filename;
313 } else { 338 } else {
314 struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; 339 struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
340
315 d->max = NR_INLINE_DENTRY; 341 d->max = NR_INLINE_DENTRY;
316 d->bitmap = &t->dentry_bitmap; 342 d->bitmap = &t->dentry_bitmap;
317 d->dentry = t->dentry; 343 d->dentry = t->dentry;
@@ -438,8 +464,8 @@ struct f2fs_inode_info {
438 atomic_t dirty_pages; /* # of dirty pages */ 464 atomic_t dirty_pages; /* # of dirty pages */
439 f2fs_hash_t chash; /* hash value of given file name */ 465 f2fs_hash_t chash; /* hash value of given file name */
440 unsigned int clevel; /* maximum level of given file name */ 466 unsigned int clevel; /* maximum level of given file name */
467 struct task_struct *task; /* lookup and create consistency */
441 nid_t i_xattr_nid; /* node id that contains xattrs */ 468 nid_t i_xattr_nid; /* node id that contains xattrs */
442 unsigned long long xattr_ver; /* cp version of xattr modification */
443 loff_t last_disk_size; /* lastly written file size */ 469 loff_t last_disk_size; /* lastly written file size */
444 470
445 struct list_head dirty_list; /* dirty list for dirs and files */ 471 struct list_head dirty_list; /* dirty list for dirs and files */
@@ -474,13 +500,6 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
474 ei->len = len; 500 ei->len = len;
475} 501}
476 502
477static inline bool __is_extent_same(struct extent_info *ei1,
478 struct extent_info *ei2)
479{
480 return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
481 ei1->len == ei2->len);
482}
483
484static inline bool __is_extent_mergeable(struct extent_info *back, 503static inline bool __is_extent_mergeable(struct extent_info *back,
485 struct extent_info *front) 504 struct extent_info *front)
486{ 505{
@@ -500,7 +519,7 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
500 return __is_extent_mergeable(cur, front); 519 return __is_extent_mergeable(cur, front);
501} 520}
502 521
503extern void f2fs_mark_inode_dirty_sync(struct inode *, bool); 522extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
504static inline void __try_update_largest_extent(struct inode *inode, 523static inline void __try_update_largest_extent(struct inode *inode,
505 struct extent_tree *et, struct extent_node *en) 524 struct extent_tree *et, struct extent_node *en)
506{ 525{
@@ -532,6 +551,7 @@ struct f2fs_nm_info {
532 struct list_head nat_entries; /* cached nat entry list (clean) */ 551 struct list_head nat_entries; /* cached nat entry list (clean) */
533 unsigned int nat_cnt; /* the # of cached nat entries */ 552 unsigned int nat_cnt; /* the # of cached nat entries */
534 unsigned int dirty_nat_cnt; /* total num of nat entries in set */ 553 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
554 unsigned int nat_blocks; /* # of nat blocks */
535 555
536 /* free node ids management */ 556 /* free node ids management */
537 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 557 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -539,9 +559,19 @@ struct f2fs_nm_info {
539 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ 559 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
540 spinlock_t nid_list_lock; /* protect nid lists ops */ 560 spinlock_t nid_list_lock; /* protect nid lists ops */
541 struct mutex build_lock; /* lock for build free nids */ 561 struct mutex build_lock; /* lock for build free nids */
562 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
563 unsigned char *nat_block_bitmap;
542 564
543 /* for checkpoint */ 565 /* for checkpoint */
544 char *nat_bitmap; /* NAT bitmap pointer */ 566 char *nat_bitmap; /* NAT bitmap pointer */
567
568 unsigned int nat_bits_blocks; /* # of nat bits blocks */
569 unsigned char *nat_bits; /* NAT bits blocks */
570 unsigned char *full_nat_bits; /* full NAT pages */
571 unsigned char *empty_nat_bits; /* empty NAT pages */
572#ifdef CONFIG_F2FS_CHECK_FS
573 char *nat_bitmap_mir; /* NAT bitmap mirror */
574#endif
545 int bitmap_size; /* bitmap size */ 575 int bitmap_size; /* bitmap size */
546}; 576};
547 577
@@ -632,12 +662,6 @@ struct f2fs_sm_info {
632 /* a threshold to reclaim prefree segments */ 662 /* a threshold to reclaim prefree segments */
633 unsigned int rec_prefree_segments; 663 unsigned int rec_prefree_segments;
634 664
635 /* for small discard management */
636 struct list_head discard_list; /* 4KB discard list */
637 struct list_head wait_list; /* linked with issued discard bio */
638 int nr_discards; /* # of discards in the list */
639 int max_discards; /* max. discards to be issued */
640
641 /* for batched trimming */ 665 /* for batched trimming */
642 unsigned int trim_sections; /* # of sections to trim */ 666 unsigned int trim_sections; /* # of sections to trim */
643 667
@@ -648,8 +672,10 @@ struct f2fs_sm_info {
648 unsigned int min_fsync_blocks; /* threshold for fsync */ 672 unsigned int min_fsync_blocks; /* threshold for fsync */
649 673
650 /* for flush command control */ 674 /* for flush command control */
651 struct flush_cmd_control *cmd_control_info; 675 struct flush_cmd_control *fcc_info;
652 676
677 /* for discard command control */
678 struct discard_cmd_control *dcc_info;
653}; 679};
654 680
655/* 681/*
@@ -708,6 +734,7 @@ struct f2fs_io_info {
708 block_t old_blkaddr; /* old block address before Cow */ 734 block_t old_blkaddr; /* old block address before Cow */
709 struct page *page; /* page to be written */ 735 struct page *page; /* page to be written */
710 struct page *encrypted_page; /* encrypted page */ 736 struct page *encrypted_page; /* encrypted page */
737 bool submitted; /* indicate IO submission */
711}; 738};
712 739
713#define is_read_io(rw) (rw == READ) 740#define is_read_io(rw) (rw == READ)
@@ -787,6 +814,8 @@ struct f2fs_sb_info {
787 struct f2fs_bio_info read_io; /* for read bios */ 814 struct f2fs_bio_info read_io; /* for read bios */
788 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 815 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
789 struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ 816 struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */
817 int write_io_size_bits; /* Write IO size bits */
818 mempool_t *write_io_dummy; /* Dummy pages */
790 819
791 /* for checkpoint */ 820 /* for checkpoint */
792 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 821 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -811,7 +840,7 @@ struct f2fs_sb_info {
811 840
812 /* for extent tree cache */ 841 /* for extent tree cache */
813 struct radix_tree_root extent_tree_root;/* cache extent cache entries */ 842 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
814 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ 843 struct mutex extent_tree_lock; /* locking extent radix tree */
815 struct list_head extent_list; /* lru list for shrinker */ 844 struct list_head extent_list; /* lru list for shrinker */
816 spinlock_t extent_lock; /* locking extent lru list */ 845 spinlock_t extent_lock; /* locking extent lru list */
817 atomic_t total_ext_tree; /* extent tree count */ 846 atomic_t total_ext_tree; /* extent tree count */
@@ -858,6 +887,9 @@ struct f2fs_sb_info {
858 struct f2fs_gc_kthread *gc_thread; /* GC thread */ 887 struct f2fs_gc_kthread *gc_thread; /* GC thread */
859 unsigned int cur_victim_sec; /* current victim section num */ 888 unsigned int cur_victim_sec; /* current victim section num */
860 889
890 /* threshold for converting bg victims for fg */
891 u64 fggc_threshold;
892
861 /* maximum # of trials to find a victim segment for SSR and GC */ 893 /* maximum # of trials to find a victim segment for SSR and GC */
862 unsigned int max_victim_search; 894 unsigned int max_victim_search;
863 895
@@ -877,6 +909,8 @@ struct f2fs_sb_info {
877 atomic_t inline_xattr; /* # of inline_xattr inodes */ 909 atomic_t inline_xattr; /* # of inline_xattr inodes */
878 atomic_t inline_inode; /* # of inline_data inodes */ 910 atomic_t inline_inode; /* # of inline_data inodes */
879 atomic_t inline_dir; /* # of inline_dentry inodes */ 911 atomic_t inline_dir; /* # of inline_dentry inodes */
912 atomic_t aw_cnt; /* # of atomic writes */
913 atomic_t max_aw_cnt; /* max # of atomic writes */
880 int bg_gc; /* background gc calls */ 914 int bg_gc; /* background gc calls */
881 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ 915 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
882#endif 916#endif
@@ -908,6 +942,10 @@ struct f2fs_sb_info {
908}; 942};
909 943
910#ifdef CONFIG_F2FS_FAULT_INJECTION 944#ifdef CONFIG_F2FS_FAULT_INJECTION
945#define f2fs_show_injection_info(type) \
946 printk("%sF2FS-fs : inject %s in %s of %pF\n", \
947 KERN_INFO, fault_name[type], \
948 __func__, __builtin_return_address(0))
911static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) 949static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
912{ 950{
913 struct f2fs_fault_info *ffi = &sbi->fault_info; 951 struct f2fs_fault_info *ffi = &sbi->fault_info;
@@ -921,10 +959,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
921 atomic_inc(&ffi->inject_ops); 959 atomic_inc(&ffi->inject_ops);
922 if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { 960 if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
923 atomic_set(&ffi->inject_ops, 0); 961 atomic_set(&ffi->inject_ops, 0);
924 printk("%sF2FS-fs : inject %s in %pF\n",
925 KERN_INFO,
926 fault_name[type],
927 __builtin_return_address(0));
928 return true; 962 return true;
929 } 963 }
930 return false; 964 return false;
@@ -1089,6 +1123,12 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
1089 return le64_to_cpu(cp->checkpoint_ver); 1123 return le64_to_cpu(cp->checkpoint_ver);
1090} 1124}
1091 1125
1126static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
1127{
1128 size_t crc_offset = le32_to_cpu(cp->checksum_offset);
1129 return le32_to_cpu(*((__le32 *)((unsigned char *)cp + crc_offset)));
1130}
1131
1092static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) 1132static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
1093{ 1133{
1094 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); 1134 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
@@ -1133,6 +1173,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
1133 spin_unlock(&sbi->cp_lock); 1173 spin_unlock(&sbi->cp_lock);
1134} 1174}
1135 1175
1176static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
1177{
1178 set_sbi_flag(sbi, SBI_NEED_FSCK);
1179
1180 if (lock)
1181 spin_lock(&sbi->cp_lock);
1182 __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
1183 kfree(NM_I(sbi)->nat_bits);
1184 NM_I(sbi)->nat_bits = NULL;
1185 if (lock)
1186 spin_unlock(&sbi->cp_lock);
1187}
1188
1189static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
1190 struct cp_control *cpc)
1191{
1192 bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
1193
1194 return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
1195}
1196
1136static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) 1197static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
1137{ 1198{
1138 down_read(&sbi->cp_rwsem); 1199 down_read(&sbi->cp_rwsem);
@@ -1212,8 +1273,10 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
1212 blkcnt_t diff; 1273 blkcnt_t diff;
1213 1274
1214#ifdef CONFIG_F2FS_FAULT_INJECTION 1275#ifdef CONFIG_F2FS_FAULT_INJECTION
1215 if (time_to_inject(sbi, FAULT_BLOCK)) 1276 if (time_to_inject(sbi, FAULT_BLOCK)) {
1277 f2fs_show_injection_info(FAULT_BLOCK);
1216 return false; 1278 return false;
1279 }
1217#endif 1280#endif
1218 /* 1281 /*
1219 * let's increase this in prior to actual block count change in order 1282 * let's increase this in prior to actual block count change in order
@@ -1449,11 +1512,14 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
1449{ 1512{
1450#ifdef CONFIG_F2FS_FAULT_INJECTION 1513#ifdef CONFIG_F2FS_FAULT_INJECTION
1451 struct page *page = find_lock_page(mapping, index); 1514 struct page *page = find_lock_page(mapping, index);
1515
1452 if (page) 1516 if (page)
1453 return page; 1517 return page;
1454 1518
1455 if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) 1519 if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
1520 f2fs_show_injection_info(FAULT_PAGE_ALLOC);
1456 return NULL; 1521 return NULL;
1522 }
1457#endif 1523#endif
1458 if (!for_write) 1524 if (!for_write)
1459 return grab_cache_page(mapping, index); 1525 return grab_cache_page(mapping, index);
@@ -1532,6 +1598,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
1532static inline bool IS_INODE(struct page *page) 1598static inline bool IS_INODE(struct page *page)
1533{ 1599{
1534 struct f2fs_node *p = F2FS_NODE(page); 1600 struct f2fs_node *p = F2FS_NODE(page);
1601
1535 return RAW_IS_INODE(p); 1602 return RAW_IS_INODE(p);
1536} 1603}
1537 1604
@@ -1545,6 +1612,7 @@ static inline block_t datablock_addr(struct page *node_page,
1545{ 1612{
1546 struct f2fs_node *raw_node; 1613 struct f2fs_node *raw_node;
1547 __le32 *addr_array; 1614 __le32 *addr_array;
1615
1548 raw_node = F2FS_NODE(node_page); 1616 raw_node = F2FS_NODE(node_page);
1549 addr_array = blkaddr_in_node(raw_node); 1617 addr_array = blkaddr_in_node(raw_node);
1550 return le32_to_cpu(addr_array[offset]); 1618 return le32_to_cpu(addr_array[offset]);
@@ -1628,6 +1696,7 @@ enum {
1628 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1696 FI_UPDATE_WRITE, /* inode has in-place-update data */
1629 FI_NEED_IPU, /* used for ipu per file */ 1697 FI_NEED_IPU, /* used for ipu per file */
1630 FI_ATOMIC_FILE, /* indicate atomic file */ 1698 FI_ATOMIC_FILE, /* indicate atomic file */
1699 FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
1631 FI_VOLATILE_FILE, /* indicate volatile file */ 1700 FI_VOLATILE_FILE, /* indicate volatile file */
1632 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ 1701 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
1633 FI_DROP_CACHE, /* drop dirty page cache */ 1702 FI_DROP_CACHE, /* drop dirty page cache */
@@ -1635,6 +1704,7 @@ enum {
1635 FI_INLINE_DOTS, /* indicate inline dot dentries */ 1704 FI_INLINE_DOTS, /* indicate inline dot dentries */
1636 FI_DO_DEFRAG, /* indicate defragment is running */ 1705 FI_DO_DEFRAG, /* indicate defragment is running */
1637 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ 1706 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
1707 FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
1638}; 1708};
1639 1709
1640static inline void __mark_inode_dirty_flag(struct inode *inode, 1710static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -1779,6 +1849,7 @@ static inline unsigned int addrs_per_inode(struct inode *inode)
1779static inline void *inline_xattr_addr(struct page *page) 1849static inline void *inline_xattr_addr(struct page *page)
1780{ 1850{
1781 struct f2fs_inode *ri = F2FS_INODE(page); 1851 struct f2fs_inode *ri = F2FS_INODE(page);
1852
1782 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 1853 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
1783 F2FS_INLINE_XATTR_ADDRS]); 1854 F2FS_INLINE_XATTR_ADDRS]);
1784} 1855}
@@ -1817,6 +1888,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
1817 return is_inode_flag_set(inode, FI_ATOMIC_FILE); 1888 return is_inode_flag_set(inode, FI_ATOMIC_FILE);
1818} 1889}
1819 1890
1891static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
1892{
1893 return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
1894}
1895
1820static inline bool f2fs_is_volatile_file(struct inode *inode) 1896static inline bool f2fs_is_volatile_file(struct inode *inode)
1821{ 1897{
1822 return is_inode_flag_set(inode, FI_VOLATILE_FILE); 1898 return is_inode_flag_set(inode, FI_VOLATILE_FILE);
@@ -1835,6 +1911,7 @@ static inline bool f2fs_is_drop_cache(struct inode *inode)
1835static inline void *inline_data_addr(struct page *page) 1911static inline void *inline_data_addr(struct page *page)
1836{ 1912{
1837 struct f2fs_inode *ri = F2FS_INODE(page); 1913 struct f2fs_inode *ri = F2FS_INODE(page);
1914
1838 return (void *)&(ri->i_addr[1]); 1915 return (void *)&(ri->i_addr[1]);
1839} 1916}
1840 1917
@@ -1918,8 +1995,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
1918 size_t size, gfp_t flags) 1995 size_t size, gfp_t flags)
1919{ 1996{
1920#ifdef CONFIG_F2FS_FAULT_INJECTION 1997#ifdef CONFIG_F2FS_FAULT_INJECTION
1921 if (time_to_inject(sbi, FAULT_KMALLOC)) 1998 if (time_to_inject(sbi, FAULT_KMALLOC)) {
1999 f2fs_show_injection_info(FAULT_KMALLOC);
1922 return NULL; 2000 return NULL;
2001 }
1923#endif 2002#endif
1924 return kmalloc(size, flags); 2003 return kmalloc(size, flags);
1925} 2004}
@@ -1957,29 +2036,30 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
1957/* 2036/*
1958 * file.c 2037 * file.c
1959 */ 2038 */
1960int f2fs_sync_file(struct file *, loff_t, loff_t, int); 2039int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
1961void truncate_data_blocks(struct dnode_of_data *); 2040void truncate_data_blocks(struct dnode_of_data *dn);
1962int truncate_blocks(struct inode *, u64, bool); 2041int truncate_blocks(struct inode *inode, u64 from, bool lock);
1963int f2fs_truncate(struct inode *); 2042int f2fs_truncate(struct inode *inode);
1964int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2043int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1965int f2fs_setattr(struct dentry *, struct iattr *); 2044 struct kstat *stat);
1966int truncate_hole(struct inode *, pgoff_t, pgoff_t); 2045int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
1967int truncate_data_blocks_range(struct dnode_of_data *, int); 2046int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
1968long f2fs_ioctl(struct file *, unsigned int, unsigned long); 2047int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
1969long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); 2048long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
2049long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1970 2050
1971/* 2051/*
1972 * inode.c 2052 * inode.c
1973 */ 2053 */
1974void f2fs_set_inode_flags(struct inode *); 2054void f2fs_set_inode_flags(struct inode *inode);
1975struct inode *f2fs_iget(struct super_block *, unsigned long); 2055struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
1976struct inode *f2fs_iget_retry(struct super_block *, unsigned long); 2056struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
1977int try_to_free_nats(struct f2fs_sb_info *, int); 2057int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
1978int update_inode(struct inode *, struct page *); 2058int update_inode(struct inode *inode, struct page *node_page);
1979int update_inode_page(struct inode *); 2059int update_inode_page(struct inode *inode);
1980int f2fs_write_inode(struct inode *, struct writeback_control *); 2060int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
1981void f2fs_evict_inode(struct inode *); 2061void f2fs_evict_inode(struct inode *inode);
1982void handle_failed_inode(struct inode *); 2062void handle_failed_inode(struct inode *inode);
1983 2063
1984/* 2064/*
1985 * namei.c 2065 * namei.c
@@ -1989,40 +2069,47 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1989/* 2069/*
1990 * dir.c 2070 * dir.c
1991 */ 2071 */
1992void set_de_type(struct f2fs_dir_entry *, umode_t); 2072void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
1993unsigned char get_de_type(struct f2fs_dir_entry *); 2073unsigned char get_de_type(struct f2fs_dir_entry *de);
1994struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 2074struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
1995 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 2075 f2fs_hash_t namehash, int *max_slots,
1996int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 2076 struct f2fs_dentry_ptr *d);
1997 unsigned int, struct fscrypt_str *); 2077int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
1998void do_make_empty_dir(struct inode *, struct inode *, 2078 unsigned int start_pos, struct fscrypt_str *fstr);
1999 struct f2fs_dentry_ptr *); 2079void do_make_empty_dir(struct inode *inode, struct inode *parent,
2000struct page *init_inode_metadata(struct inode *, struct inode *, 2080 struct f2fs_dentry_ptr *d);
2001 const struct qstr *, const struct qstr *, struct page *); 2081struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
2002void update_parent_metadata(struct inode *, struct inode *, unsigned int); 2082 const struct qstr *new_name,
2003int room_for_filename(const void *, int, int); 2083 const struct qstr *orig_name, struct page *dpage);
2004void f2fs_drop_nlink(struct inode *, struct inode *); 2084void update_parent_metadata(struct inode *dir, struct inode *inode,
2005struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *, 2085 unsigned int current_depth);
2006 struct page **); 2086int room_for_filename(const void *bitmap, int slots, int max_slots);
2007struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *, 2087void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
2008 struct page **); 2088struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
2009struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); 2089 struct fscrypt_name *fname, struct page **res_page);
2010ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **); 2090struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
2011void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 2091 const struct qstr *child, struct page **res_page);
2012 struct page *, struct inode *); 2092struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
2013int update_dent_inode(struct inode *, struct inode *, const struct qstr *); 2093ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
2014void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, 2094 struct page **page);
2015 const struct qstr *, f2fs_hash_t , unsigned int); 2095void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
2016int f2fs_add_regular_entry(struct inode *, const struct qstr *, 2096 struct page *page, struct inode *inode);
2017 const struct qstr *, struct inode *, nid_t, umode_t); 2097int update_dent_inode(struct inode *inode, struct inode *to,
2018int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *, 2098 const struct qstr *name);
2019 nid_t, umode_t); 2099void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
2020int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, 2100 const struct qstr *name, f2fs_hash_t name_hash,
2021 umode_t); 2101 unsigned int bit_pos);
2022void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 2102int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
2023 struct inode *); 2103 const struct qstr *orig_name,
2024int f2fs_do_tmpfile(struct inode *, struct inode *); 2104 struct inode *inode, nid_t ino, umode_t mode);
2025bool f2fs_empty_dir(struct inode *); 2105int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
2106 struct inode *inode, nid_t ino, umode_t mode);
2107int __f2fs_add_link(struct inode *dir, const struct qstr *name,
2108 struct inode *inode, nid_t ino, umode_t mode);
2109void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
2110 struct inode *dir, struct inode *inode);
2111int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
2112bool f2fs_empty_dir(struct inode *dir);
2026 2113
2027static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) 2114static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
2028{ 2115{
@@ -2033,18 +2120,18 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
2033/* 2120/*
2034 * super.c 2121 * super.c
2035 */ 2122 */
2036int f2fs_inode_dirtied(struct inode *, bool); 2123int f2fs_inode_dirtied(struct inode *inode, bool sync);
2037void f2fs_inode_synced(struct inode *); 2124void f2fs_inode_synced(struct inode *inode);
2038int f2fs_commit_super(struct f2fs_sb_info *, bool); 2125int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
2039int f2fs_sync_fs(struct super_block *, int); 2126int f2fs_sync_fs(struct super_block *sb, int sync);
2040extern __printf(3, 4) 2127extern __printf(3, 4)
2041void f2fs_msg(struct super_block *, const char *, const char *, ...); 2128void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
2042int sanity_check_ckpt(struct f2fs_sb_info *sbi); 2129int sanity_check_ckpt(struct f2fs_sb_info *sbi);
2043 2130
2044/* 2131/*
2045 * hash.c 2132 * hash.c
2046 */ 2133 */
2047f2fs_hash_t f2fs_dentry_hash(const struct qstr *); 2134f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info);
2048 2135
2049/* 2136/*
2050 * node.c 2137 * node.c
@@ -2052,163 +2139,183 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
2052struct dnode_of_data; 2139struct dnode_of_data;
2053struct node_info; 2140struct node_info;
2054 2141
2055bool available_free_memory(struct f2fs_sb_info *, int); 2142bool available_free_memory(struct f2fs_sb_info *sbi, int type);
2056int need_dentry_mark(struct f2fs_sb_info *, nid_t); 2143int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
2057bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); 2144bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
2058bool need_inode_block_update(struct f2fs_sb_info *, nid_t); 2145bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
2059void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 2146void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
2060pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t); 2147pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
2061int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 2148int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
2062int truncate_inode_blocks(struct inode *, pgoff_t); 2149int truncate_inode_blocks(struct inode *inode, pgoff_t from);
2063int truncate_xattr_node(struct inode *, struct page *); 2150int truncate_xattr_node(struct inode *inode, struct page *page);
2064int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 2151int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
2065int remove_inode_page(struct inode *); 2152int remove_inode_page(struct inode *inode);
2066struct page *new_inode_page(struct inode *); 2153struct page *new_inode_page(struct inode *inode);
2067struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 2154struct page *new_node_page(struct dnode_of_data *dn,
2068void ra_node_page(struct f2fs_sb_info *, nid_t); 2155 unsigned int ofs, struct page *ipage);
2069struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 2156void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
2070struct page *get_node_page_ra(struct page *, int); 2157struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
2071void move_node_page(struct page *, int); 2158struct page *get_node_page_ra(struct page *parent, int start);
2072int fsync_node_pages(struct f2fs_sb_info *, struct inode *, 2159void move_node_page(struct page *node_page, int gc_type);
2073 struct writeback_control *, bool); 2160int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
2074int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); 2161 struct writeback_control *wbc, bool atomic);
2075void build_free_nids(struct f2fs_sb_info *, bool); 2162int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
2076bool alloc_nid(struct f2fs_sb_info *, nid_t *); 2163void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
2077void alloc_nid_done(struct f2fs_sb_info *, nid_t); 2164bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
2078void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 2165void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
2079int try_to_free_nids(struct f2fs_sb_info *, int); 2166void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
2080void recover_inline_xattr(struct inode *, struct page *); 2167int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
2081void recover_xattr_data(struct inode *, struct page *, block_t); 2168void recover_inline_xattr(struct inode *inode, struct page *page);
2082int recover_inode_page(struct f2fs_sb_info *, struct page *); 2169int recover_xattr_data(struct inode *inode, struct page *page,
2083int restore_node_summary(struct f2fs_sb_info *, unsigned int, 2170 block_t blkaddr);
2084 struct f2fs_summary_block *); 2171int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
2085void flush_nat_entries(struct f2fs_sb_info *); 2172int restore_node_summary(struct f2fs_sb_info *sbi,
2086int build_node_manager(struct f2fs_sb_info *); 2173 unsigned int segno, struct f2fs_summary_block *sum);
2087void destroy_node_manager(struct f2fs_sb_info *); 2174void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2175int build_node_manager(struct f2fs_sb_info *sbi);
2176void destroy_node_manager(struct f2fs_sb_info *sbi);
2088int __init create_node_manager_caches(void); 2177int __init create_node_manager_caches(void);
2089void destroy_node_manager_caches(void); 2178void destroy_node_manager_caches(void);
2090 2179
2091/* 2180/*
2092 * segment.c 2181 * segment.c
2093 */ 2182 */
2094void register_inmem_page(struct inode *, struct page *); 2183void register_inmem_page(struct inode *inode, struct page *page);
2095void drop_inmem_pages(struct inode *); 2184void drop_inmem_pages(struct inode *inode);
2096int commit_inmem_pages(struct inode *); 2185int commit_inmem_pages(struct inode *inode);
2097void f2fs_balance_fs(struct f2fs_sb_info *, bool); 2186void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
2098void f2fs_balance_fs_bg(struct f2fs_sb_info *); 2187void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
2099int f2fs_issue_flush(struct f2fs_sb_info *); 2188int f2fs_issue_flush(struct f2fs_sb_info *sbi);
2100int create_flush_cmd_control(struct f2fs_sb_info *); 2189int create_flush_cmd_control(struct f2fs_sb_info *sbi);
2101void destroy_flush_cmd_control(struct f2fs_sb_info *, bool); 2190void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
2102void invalidate_blocks(struct f2fs_sb_info *, block_t); 2191void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
2103bool is_checkpointed_data(struct f2fs_sb_info *, block_t); 2192bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
2104void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 2193void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
2105void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); 2194void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
2106void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 2195void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2107void release_discard_addrs(struct f2fs_sb_info *); 2196void release_discard_addrs(struct f2fs_sb_info *sbi);
2108int npages_for_summary_flush(struct f2fs_sb_info *, bool); 2197int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
2109void allocate_new_segments(struct f2fs_sb_info *); 2198void allocate_new_segments(struct f2fs_sb_info *sbi);
2110int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 2199int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
2111struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 2200bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2112void update_meta_page(struct f2fs_sb_info *, void *, block_t); 2201struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
2113void write_meta_page(struct f2fs_sb_info *, struct page *); 2202void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
2114void write_node_page(unsigned int, struct f2fs_io_info *); 2203void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
2115void write_data_page(struct dnode_of_data *, struct f2fs_io_info *); 2204void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
2116void rewrite_data_page(struct f2fs_io_info *); 2205void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
2117void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *, 2206void rewrite_data_page(struct f2fs_io_info *fio);
2118 block_t, block_t, bool, bool); 2207void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2119void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, 2208 block_t old_blkaddr, block_t new_blkaddr,
2120 block_t, block_t, unsigned char, bool, bool); 2209 bool recover_curseg, bool recover_newaddr);
2121void allocate_data_block(struct f2fs_sb_info *, struct page *, 2210void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
2122 block_t, block_t *, struct f2fs_summary *, int); 2211 block_t old_addr, block_t new_addr,
2123void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); 2212 unsigned char version, bool recover_curseg,
2124void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t); 2213 bool recover_newaddr);
2125void write_data_summaries(struct f2fs_sb_info *, block_t); 2214void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2126void write_node_summaries(struct f2fs_sb_info *, block_t); 2215 block_t old_blkaddr, block_t *new_blkaddr,
2127int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int); 2216 struct f2fs_summary *sum, int type);
2128void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); 2217void f2fs_wait_on_page_writeback(struct page *page,
2129int build_segment_manager(struct f2fs_sb_info *); 2218 enum page_type type, bool ordered);
2130void destroy_segment_manager(struct f2fs_sb_info *); 2219void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
2220 block_t blkaddr);
2221void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
2222void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
2223int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
2224 unsigned int val, int alloc);
2225void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2226int build_segment_manager(struct f2fs_sb_info *sbi);
2227void destroy_segment_manager(struct f2fs_sb_info *sbi);
2131int __init create_segment_manager_caches(void); 2228int __init create_segment_manager_caches(void);
2132void destroy_segment_manager_caches(void); 2229void destroy_segment_manager_caches(void);
2133 2230
2134/* 2231/*
2135 * checkpoint.c 2232 * checkpoint.c
2136 */ 2233 */
2137void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool); 2234void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
2138struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 2235struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
2139struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 2236struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
2140struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); 2237struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
2141bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); 2238bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
2142int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); 2239int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
2143void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); 2240 int type, bool sync);
2144long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 2241void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
2145void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); 2242long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
2146void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); 2243 long nr_to_write);
2147void release_ino_entry(struct f2fs_sb_info *, bool); 2244void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2148bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 2245void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2149int f2fs_sync_inode_meta(struct f2fs_sb_info *); 2246void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
2150int acquire_orphan_inode(struct f2fs_sb_info *); 2247bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
2151void release_orphan_inode(struct f2fs_sb_info *); 2248int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
2152void add_orphan_inode(struct inode *); 2249int acquire_orphan_inode(struct f2fs_sb_info *sbi);
2153void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 2250void release_orphan_inode(struct f2fs_sb_info *sbi);
2154int recover_orphan_inodes(struct f2fs_sb_info *); 2251void add_orphan_inode(struct inode *inode);
2155int get_valid_checkpoint(struct f2fs_sb_info *); 2252void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
2156void update_dirty_page(struct inode *, struct page *); 2253int recover_orphan_inodes(struct f2fs_sb_info *sbi);
2157void remove_dirty_inode(struct inode *); 2254int get_valid_checkpoint(struct f2fs_sb_info *sbi);
2158int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); 2255void update_dirty_page(struct inode *inode, struct page *page);
2159int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); 2256void remove_dirty_inode(struct inode *inode);
2160void init_ino_entry_info(struct f2fs_sb_info *); 2257int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
2258int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2259void init_ino_entry_info(struct f2fs_sb_info *sbi);
2161int __init create_checkpoint_caches(void); 2260int __init create_checkpoint_caches(void);
2162void destroy_checkpoint_caches(void); 2261void destroy_checkpoint_caches(void);
2163 2262
2164/* 2263/*
2165 * data.c 2264 * data.c
2166 */ 2265 */
2167void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); 2266void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
2168void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *, 2267 int rw);
2169 struct page *, nid_t, enum page_type, int); 2268void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
2170void f2fs_flush_merged_bios(struct f2fs_sb_info *); 2269 struct inode *inode, nid_t ino, pgoff_t idx,
2171int f2fs_submit_page_bio(struct f2fs_io_info *); 2270 enum page_type type, int rw);
2172void f2fs_submit_page_mbio(struct f2fs_io_info *); 2271void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
2173struct block_device *f2fs_target_device(struct f2fs_sb_info *, 2272int f2fs_submit_page_bio(struct f2fs_io_info *fio);
2174 block_t, struct bio *); 2273int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
2175int f2fs_target_device_index(struct f2fs_sb_info *, block_t); 2274struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
2176void set_data_blkaddr(struct dnode_of_data *); 2275 block_t blk_addr, struct bio *bio);
2177void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 2276int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
2178int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); 2277void set_data_blkaddr(struct dnode_of_data *dn);
2179int reserve_new_block(struct dnode_of_data *); 2278void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
2180int f2fs_get_block(struct dnode_of_data *, pgoff_t); 2279int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
2181int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); 2280int reserve_new_block(struct dnode_of_data *dn);
2182int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 2281int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
2183struct page *get_read_data_page(struct inode *, pgoff_t, int, bool); 2282int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
2184struct page *find_data_page(struct inode *, pgoff_t); 2283int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
2185struct page *get_lock_data_page(struct inode *, pgoff_t, bool); 2284struct page *get_read_data_page(struct inode *inode, pgoff_t index,
2186struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 2285 int op_flags, bool for_write);
2187int do_write_data_page(struct f2fs_io_info *); 2286struct page *find_data_page(struct inode *inode, pgoff_t index);
2188int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); 2287struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
2189int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 2288 bool for_write);
2190void f2fs_set_page_dirty_nobuffers(struct page *); 2289struct page *get_new_data_page(struct inode *inode,
2191void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 2290 struct page *ipage, pgoff_t index, bool new_i_size);
2192int f2fs_release_page(struct page *, gfp_t); 2291int do_write_data_page(struct f2fs_io_info *fio);
2292int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
2293 int create, int flag);
2294int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2295 u64 start, u64 len);
2296void f2fs_set_page_dirty_nobuffers(struct page *page);
2297void f2fs_invalidate_page(struct page *page, unsigned int offset,
2298 unsigned int length);
2299int f2fs_release_page(struct page *page, gfp_t wait);
2193#ifdef CONFIG_MIGRATION 2300#ifdef CONFIG_MIGRATION
2194int f2fs_migrate_page(struct address_space *, struct page *, struct page *, 2301int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
2195 enum migrate_mode); 2302 struct page *page, enum migrate_mode mode);
2196#endif 2303#endif
2197 2304
2198/* 2305/*
2199 * gc.c 2306 * gc.c
2200 */ 2307 */
2201int start_gc_thread(struct f2fs_sb_info *); 2308int start_gc_thread(struct f2fs_sb_info *sbi);
2202void stop_gc_thread(struct f2fs_sb_info *); 2309void stop_gc_thread(struct f2fs_sb_info *sbi);
2203block_t start_bidx_of_node(unsigned int, struct inode *); 2310block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
2204int f2fs_gc(struct f2fs_sb_info *, bool, bool); 2311int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
2205void build_gc_manager(struct f2fs_sb_info *); 2312void build_gc_manager(struct f2fs_sb_info *sbi);
2206 2313
2207/* 2314/*
2208 * recovery.c 2315 * recovery.c
2209 */ 2316 */
2210int recover_fsync_data(struct f2fs_sb_info *, bool); 2317int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
2211bool space_for_roll_forward(struct f2fs_sb_info *); 2318bool space_for_roll_forward(struct f2fs_sb_info *sbi);
2212 2319
2213/* 2320/*
2214 * debug.c 2321 * debug.c
@@ -2227,8 +2334,9 @@ struct f2fs_stat_info {
2227 unsigned int ndirty_dirs, ndirty_files, ndirty_all; 2334 unsigned int ndirty_dirs, ndirty_files, ndirty_all;
2228 int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; 2335 int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
2229 int total_count, utilization; 2336 int total_count, utilization;
2230 int bg_gc, nr_wb_cp_data, nr_wb_data; 2337 int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
2231 int inline_xattr, inline_inode, inline_dir, orphans; 2338 int inline_xattr, inline_inode, inline_dir, append, update, orphans;
2339 int aw_cnt, max_aw_cnt;
2232 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; 2340 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
2233 unsigned int bimodal, avg_vblocks; 2341 unsigned int bimodal, avg_vblocks;
2234 int util_free, util_valid, util_invalid; 2342 int util_free, util_valid, util_invalid;
@@ -2300,6 +2408,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
2300 ((sbi)->block_count[(curseg)->alloc_type]++) 2408 ((sbi)->block_count[(curseg)->alloc_type]++)
2301#define stat_inc_inplace_blocks(sbi) \ 2409#define stat_inc_inplace_blocks(sbi) \
2302 (atomic_inc(&(sbi)->inplace_count)) 2410 (atomic_inc(&(sbi)->inplace_count))
2411#define stat_inc_atomic_write(inode) \
2412 (atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
2413#define stat_dec_atomic_write(inode) \
2414 (atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
2415#define stat_update_max_atomic_write(inode) \
2416 do { \
2417 int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \
2418 int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \
2419 if (cur > max) \
2420 atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
2421 } while (0)
2303#define stat_inc_seg_count(sbi, type, gc_type) \ 2422#define stat_inc_seg_count(sbi, type, gc_type) \
2304 do { \ 2423 do { \
2305 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 2424 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -2332,8 +2451,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
2332 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ 2451 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
2333 } while (0) 2452 } while (0)
2334 2453
2335int f2fs_build_stats(struct f2fs_sb_info *); 2454int f2fs_build_stats(struct f2fs_sb_info *sbi);
2336void f2fs_destroy_stats(struct f2fs_sb_info *); 2455void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
2337int __init f2fs_create_root_stats(void); 2456int __init f2fs_create_root_stats(void);
2338void f2fs_destroy_root_stats(void); 2457void f2fs_destroy_root_stats(void);
2339#else 2458#else
@@ -2353,6 +2472,9 @@ void f2fs_destroy_root_stats(void);
2353#define stat_dec_inline_inode(inode) 2472#define stat_dec_inline_inode(inode)
2354#define stat_inc_inline_dir(inode) 2473#define stat_inc_inline_dir(inode)
2355#define stat_dec_inline_dir(inode) 2474#define stat_dec_inline_dir(inode)
2475#define stat_inc_atomic_write(inode)
2476#define stat_dec_atomic_write(inode)
2477#define stat_update_max_atomic_write(inode)
2356#define stat_inc_seg_type(sbi, curseg) 2478#define stat_inc_seg_type(sbi, curseg)
2357#define stat_inc_block_count(sbi, curseg) 2479#define stat_inc_block_count(sbi, curseg)
2358#define stat_inc_inplace_blocks(sbi) 2480#define stat_inc_inplace_blocks(sbi)
@@ -2382,49 +2504,55 @@ extern struct kmem_cache *inode_entry_slab;
2382/* 2504/*
2383 * inline.c 2505 * inline.c
2384 */ 2506 */
2385bool f2fs_may_inline_data(struct inode *); 2507bool f2fs_may_inline_data(struct inode *inode);
2386bool f2fs_may_inline_dentry(struct inode *); 2508bool f2fs_may_inline_dentry(struct inode *inode);
2387void read_inline_data(struct page *, struct page *); 2509void read_inline_data(struct page *page, struct page *ipage);
2388bool truncate_inline_inode(struct page *, u64); 2510bool truncate_inline_inode(struct page *ipage, u64 from);
2389int f2fs_read_inline_data(struct inode *, struct page *); 2511int f2fs_read_inline_data(struct inode *inode, struct page *page);
2390int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 2512int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
2391int f2fs_convert_inline_inode(struct inode *); 2513int f2fs_convert_inline_inode(struct inode *inode);
2392int f2fs_write_inline_data(struct inode *, struct page *); 2514int f2fs_write_inline_data(struct inode *inode, struct page *page);
2393bool recover_inline_data(struct inode *, struct page *); 2515bool recover_inline_data(struct inode *inode, struct page *npage);
2394struct f2fs_dir_entry *find_in_inline_dir(struct inode *, 2516struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
2395 struct fscrypt_name *, struct page **); 2517 struct fscrypt_name *fname, struct page **res_page);
2396int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); 2518int make_empty_inline_dir(struct inode *inode, struct inode *parent,
2397int f2fs_add_inline_entry(struct inode *, const struct qstr *, 2519 struct page *ipage);
2398 const struct qstr *, struct inode *, nid_t, umode_t); 2520int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
2399void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, 2521 const struct qstr *orig_name,
2400 struct inode *, struct inode *); 2522 struct inode *inode, nid_t ino, umode_t mode);
2401bool f2fs_empty_inline_dir(struct inode *); 2523void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
2402int f2fs_read_inline_dir(struct file *, struct dir_context *, 2524 struct inode *dir, struct inode *inode);
2403 struct fscrypt_str *); 2525bool f2fs_empty_inline_dir(struct inode *dir);
2404int f2fs_inline_data_fiemap(struct inode *, 2526int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
2405 struct fiemap_extent_info *, __u64, __u64); 2527 struct fscrypt_str *fstr);
2528int f2fs_inline_data_fiemap(struct inode *inode,
2529 struct fiemap_extent_info *fieinfo,
2530 __u64 start, __u64 len);
2406 2531
2407/* 2532/*
2408 * shrinker.c 2533 * shrinker.c
2409 */ 2534 */
2410unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *); 2535unsigned long f2fs_shrink_count(struct shrinker *shrink,
2411unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *); 2536 struct shrink_control *sc);
2412void f2fs_join_shrinker(struct f2fs_sb_info *); 2537unsigned long f2fs_shrink_scan(struct shrinker *shrink,
2413void f2fs_leave_shrinker(struct f2fs_sb_info *); 2538 struct shrink_control *sc);
2539void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
2540void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
2414 2541
2415/* 2542/*
2416 * extent_cache.c 2543 * extent_cache.c
2417 */ 2544 */
2418unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); 2545unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
2419bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); 2546bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
2420void f2fs_drop_extent_tree(struct inode *); 2547void f2fs_drop_extent_tree(struct inode *inode);
2421unsigned int f2fs_destroy_extent_node(struct inode *); 2548unsigned int f2fs_destroy_extent_node(struct inode *inode);
2422void f2fs_destroy_extent_tree(struct inode *); 2549void f2fs_destroy_extent_tree(struct inode *inode);
2423bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); 2550bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
2424void f2fs_update_extent_cache(struct dnode_of_data *); 2551 struct extent_info *ei);
2552void f2fs_update_extent_cache(struct dnode_of_data *dn);
2425void f2fs_update_extent_cache_range(struct dnode_of_data *dn, 2553void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
2426 pgoff_t, block_t, unsigned int); 2554 pgoff_t fofs, block_t blkaddr, unsigned int len);
2427void init_extent_cache_info(struct f2fs_sb_info *); 2555void init_extent_cache_info(struct f2fs_sb_info *sbi);
2428int __init create_extent_cache(void); 2556int __init create_extent_cache(void);
2429void destroy_extent_cache(void); 2557void destroy_extent_cache(void);
2430 2558
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 1edc86e874e3..78e65288f2b2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/pagevec.h> 22#include <linux/pagevec.h>
23#include <linux/uio.h>
23#include <linux/uuid.h> 24#include <linux/uuid.h>
24#include <linux/file.h> 25#include <linux/file.h>
25 26
@@ -140,8 +141,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
140 need_cp = true; 141 need_cp = true;
141 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 142 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
142 need_cp = true; 143 need_cp = true;
143 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
144 need_cp = true;
145 else if (test_opt(sbi, FASTBOOT)) 144 else if (test_opt(sbi, FASTBOOT))
146 need_cp = true; 145 need_cp = true;
147 else if (sbi->active_logs == 2) 146 else if (sbi->active_logs == 2)
@@ -167,7 +166,6 @@ static void try_to_fix_pino(struct inode *inode)
167 nid_t pino; 166 nid_t pino;
168 167
169 down_write(&fi->i_sem); 168 down_write(&fi->i_sem);
170 fi->xattr_ver = 0;
171 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 169 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
172 get_parent_ino(inode, &pino)) { 170 get_parent_ino(inode, &pino)) {
173 f2fs_i_pino_write(inode, pino); 171 f2fs_i_pino_write(inode, pino);
@@ -276,7 +274,8 @@ sync_nodes:
276flush_out: 274flush_out:
277 remove_ino_entry(sbi, ino, UPDATE_INO); 275 remove_ino_entry(sbi, ino, UPDATE_INO);
278 clear_inode_flag(inode, FI_UPDATE_WRITE); 276 clear_inode_flag(inode, FI_UPDATE_WRITE);
279 ret = f2fs_issue_flush(sbi); 277 if (!atomic)
278 ret = f2fs_issue_flush(sbi);
280 f2fs_update_time(sbi, REQ_TIME); 279 f2fs_update_time(sbi, REQ_TIME);
281out: 280out:
282 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 281 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -567,8 +566,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
567 } 566 }
568 567
569 if (f2fs_has_inline_data(inode)) { 568 if (f2fs_has_inline_data(inode)) {
570 if (truncate_inline_inode(ipage, from)) 569 truncate_inline_inode(ipage, from);
571 set_page_dirty(ipage); 570 if (from == 0)
571 clear_inode_flag(inode, FI_DATA_EXIST);
572 f2fs_put_page(ipage, 1); 572 f2fs_put_page(ipage, 1);
573 truncate_page = true; 573 truncate_page = true;
574 goto out; 574 goto out;
@@ -1541,6 +1541,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1541 if (ret) 1541 if (ret)
1542 clear_inode_flag(inode, FI_ATOMIC_FILE); 1542 clear_inode_flag(inode, FI_ATOMIC_FILE);
1543out: 1543out:
1544 stat_inc_atomic_write(inode);
1545 stat_update_max_atomic_write(inode);
1544 inode_unlock(inode); 1546 inode_unlock(inode);
1545 mnt_drop_write_file(filp); 1547 mnt_drop_write_file(filp);
1546 return ret; 1548 return ret;
@@ -1564,15 +1566,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1564 goto err_out; 1566 goto err_out;
1565 1567
1566 if (f2fs_is_atomic_file(inode)) { 1568 if (f2fs_is_atomic_file(inode)) {
1567 clear_inode_flag(inode, FI_ATOMIC_FILE);
1568 ret = commit_inmem_pages(inode); 1569 ret = commit_inmem_pages(inode);
1569 if (ret) { 1570 if (ret)
1570 set_inode_flag(inode, FI_ATOMIC_FILE);
1571 goto err_out; 1571 goto err_out;
1572
1573 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1574 if (!ret) {
1575 clear_inode_flag(inode, FI_ATOMIC_FILE);
1576 stat_dec_atomic_write(inode);
1572 } 1577 }
1578 } else {
1579 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1573 } 1580 }
1574
1575 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1576err_out: 1581err_out:
1577 inode_unlock(inode); 1582 inode_unlock(inode);
1578 mnt_drop_write_file(filp); 1583 mnt_drop_write_file(filp);
@@ -1870,7 +1875,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
1870{ 1875{
1871 struct inode *inode = file_inode(filp); 1876 struct inode *inode = file_inode(filp);
1872 struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; 1877 struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
1873 struct extent_info ei; 1878 struct extent_info ei = {0,0,0};
1874 pgoff_t pg_start, pg_end; 1879 pgoff_t pg_start, pg_end;
1875 unsigned int blk_per_seg = sbi->blocks_per_seg; 1880 unsigned int blk_per_seg = sbi->blocks_per_seg;
1876 unsigned int total = 0, sec_num; 1881 unsigned int total = 0, sec_num;
@@ -2250,8 +2255,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2250 inode_lock(inode); 2255 inode_lock(inode);
2251 ret = generic_write_checks(iocb, from); 2256 ret = generic_write_checks(iocb, from);
2252 if (ret > 0) { 2257 if (ret > 0) {
2253 int err = f2fs_preallocate_blocks(iocb, from); 2258 int err;
2259
2260 if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
2261 set_inode_flag(inode, FI_NO_PREALLOC);
2254 2262
2263 err = f2fs_preallocate_blocks(iocb, from);
2255 if (err) { 2264 if (err) {
2256 inode_unlock(inode); 2265 inode_unlock(inode);
2257 return err; 2266 return err;
@@ -2259,6 +2268,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2259 blk_start_plug(&plug); 2268 blk_start_plug(&plug);
2260 ret = __generic_file_write_iter(iocb, from); 2269 ret = __generic_file_write_iter(iocb, from);
2261 blk_finish_plug(&plug); 2270 blk_finish_plug(&plug);
2271 clear_inode_flag(inode, FI_NO_PREALLOC);
2262 } 2272 }
2263 inode_unlock(inode); 2273 inode_unlock(inode);
2264 2274
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88bfc3dff496..418fd9881646 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
48 } 48 }
49 49
50#ifdef CONFIG_F2FS_FAULT_INJECTION 50#ifdef CONFIG_F2FS_FAULT_INJECTION
51 if (time_to_inject(sbi, FAULT_CHECKPOINT)) 51 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
52 f2fs_show_injection_info(FAULT_CHECKPOINT);
52 f2fs_stop_checkpoint(sbi, false); 53 f2fs_stop_checkpoint(sbi, false);
54 }
53#endif 55#endif
54 56
55 /* 57 /*
@@ -166,7 +168,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
166 p->ofs_unit = sbi->segs_per_sec; 168 p->ofs_unit = sbi->segs_per_sec;
167 } 169 }
168 170
169 if (p->max_search > sbi->max_victim_search) 171 /* we need to check every dirty segments in the FG_GC case */
172 if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
170 p->max_search = sbi->max_victim_search; 173 p->max_search = sbi->max_victim_search;
171 174
172 p->offset = sbi->last_victim[p->gc_mode]; 175 p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +202,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
199 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { 202 for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
200 if (sec_usage_check(sbi, secno)) 203 if (sec_usage_check(sbi, secno))
201 continue; 204 continue;
205
206 if (no_fggc_candidate(sbi, secno))
207 continue;
208
202 clear_bit(secno, dirty_i->victim_secmap); 209 clear_bit(secno, dirty_i->victim_secmap);
203 return secno * sbi->segs_per_sec; 210 return secno * sbi->segs_per_sec;
204 } 211 }
@@ -237,6 +244,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
237 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 244 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
238} 245}
239 246
247static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
248 unsigned int segno)
249{
250 unsigned int valid_blocks =
251 get_valid_blocks(sbi, segno, sbi->segs_per_sec);
252
253 return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
254 valid_blocks * 2 : valid_blocks;
255}
256
240static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 257static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
241 unsigned int segno, struct victim_sel_policy *p) 258 unsigned int segno, struct victim_sel_policy *p)
242{ 259{
@@ -245,7 +262,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
245 262
246 /* alloc_mode == LFS */ 263 /* alloc_mode == LFS */
247 if (p->gc_mode == GC_GREEDY) 264 if (p->gc_mode == GC_GREEDY)
248 return get_valid_blocks(sbi, segno, sbi->segs_per_sec); 265 return get_greedy_cost(sbi, segno);
249 else 266 else
250 return get_cb_cost(sbi, segno); 267 return get_cb_cost(sbi, segno);
251} 268}
@@ -322,13 +339,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
322 nsearched++; 339 nsearched++;
323 } 340 }
324 341
325
326 secno = GET_SECNO(sbi, segno); 342 secno = GET_SECNO(sbi, segno);
327 343
328 if (sec_usage_check(sbi, secno)) 344 if (sec_usage_check(sbi, secno))
329 goto next; 345 goto next;
330 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) 346 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
331 goto next; 347 goto next;
348 if (gc_type == FG_GC && p.alloc_mode == LFS &&
349 no_fggc_candidate(sbi, secno))
350 goto next;
332 351
333 cost = get_gc_cost(sbi, segno, &p); 352 cost = get_gc_cost(sbi, segno, &p);
334 353
@@ -569,6 +588,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
569 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) 588 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
570 goto out; 589 goto out;
571 590
591 if (f2fs_is_atomic_file(inode))
592 goto out;
593
572 set_new_dnode(&dn, inode, NULL, NULL, 0); 594 set_new_dnode(&dn, inode, NULL, NULL, 0);
573 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 595 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
574 if (err) 596 if (err)
@@ -661,6 +683,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
661 if (!check_valid_map(F2FS_I_SB(inode), segno, off)) 683 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
662 goto out; 684 goto out;
663 685
686 if (f2fs_is_atomic_file(inode))
687 goto out;
688
664 if (gc_type == BG_GC) { 689 if (gc_type == BG_GC) {
665 if (PageWriteback(page)) 690 if (PageWriteback(page))
666 goto out; 691 goto out;
@@ -921,8 +946,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
921 946
922 cpc.reason = __get_cp_reason(sbi); 947 cpc.reason = __get_cp_reason(sbi);
923gc_more: 948gc_more:
924 segno = NULL_SEGNO;
925
926 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 949 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
927 goto stop; 950 goto stop;
928 if (unlikely(f2fs_cp_error(sbi))) { 951 if (unlikely(f2fs_cp_error(sbi))) {
@@ -930,30 +953,23 @@ gc_more:
930 goto stop; 953 goto stop;
931 } 954 }
932 955
933 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) { 956 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
934 gc_type = FG_GC;
935 /* 957 /*
936 * If there is no victim and no prefree segment but still not 958 * For example, if there are many prefree_segments below given
937 * enough free sections, we should flush dent/node blocks and do 959 * threshold, we can make them free by checkpoint. Then, we
938 * garbage collections. 960 * secure free segments which doesn't need fggc any more.
939 */ 961 */
940 if (__get_victim(sbi, &segno, gc_type) || 962 ret = write_checkpoint(sbi, &cpc);
941 prefree_segments(sbi)) { 963 if (ret)
942 ret = write_checkpoint(sbi, &cpc); 964 goto stop;
943 if (ret) 965 if (has_not_enough_free_secs(sbi, 0, 0))
944 goto stop; 966 gc_type = FG_GC;
945 segno = NULL_SEGNO;
946 } else if (has_not_enough_free_secs(sbi, 0, 0)) {
947 ret = write_checkpoint(sbi, &cpc);
948 if (ret)
949 goto stop;
950 }
951 } else if (gc_type == BG_GC && !background) {
952 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
953 goto stop;
954 } 967 }
955 968
956 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) 969 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
970 if (gc_type == BG_GC && !background)
971 goto stop;
972 if (!__get_victim(sbi, &segno, gc_type))
957 goto stop; 973 goto stop;
958 ret = 0; 974 ret = 0;
959 975
@@ -983,5 +999,16 @@ stop:
983 999
984void build_gc_manager(struct f2fs_sb_info *sbi) 1000void build_gc_manager(struct f2fs_sb_info *sbi)
985{ 1001{
1002 u64 main_count, resv_count, ovp_count, blocks_per_sec;
1003
986 DIRTY_I(sbi)->v_ops = &default_v_ops; 1004 DIRTY_I(sbi)->v_ops = &default_v_ops;
1005
1006 /* threshold of # of valid blocks in a section for victims of FG_GC */
1007 main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
1008 resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
1009 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
1010 blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
1011
1012 sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
1013 (main_count - resv_count));
987} 1014}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index af06bda51a54..24bb8213d974 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
373 goto no_delete; 373 goto no_delete;
374 374
375#ifdef CONFIG_F2FS_FAULT_INJECTION 375#ifdef CONFIG_F2FS_FAULT_INJECTION
376 if (time_to_inject(sbi, FAULT_EVICT_INODE)) 376 if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
377 f2fs_show_injection_info(FAULT_EVICT_INODE);
377 goto no_delete; 378 goto no_delete;
379 }
378#endif 380#endif
379 381
380 remove_ino_entry(sbi, inode->i_ino, APPEND_INO); 382 remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 11cabcadb1a3..98f00a3a7f50 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
321 if (err) 321 if (err)
322 goto err_out; 322 goto err_out;
323 } 323 }
324 if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) && 324 if (f2fs_encrypted_inode(dir) &&
325 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && 325 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
326 !fscrypt_has_permitted_context(dir, inode)) { 326 !fscrypt_has_permitted_context(dir, inode)) {
327 bool nokey = f2fs_encrypted_inode(inode) && 327 bool nokey = f2fs_encrypted_inode(inode) &&
328 !fscrypt_has_encryption_key(inode); 328 !fscrypt_has_encryption_key(inode);
329 err = nokey ? -ENOKEY : -EPERM; 329 err = nokey ? -ENOKEY : -EPERM;
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
663 bool is_old_inline = f2fs_has_inline_dentry(old_dir); 663 bool is_old_inline = f2fs_has_inline_dentry(old_dir);
664 int err = -ENOENT; 664 int err = -ENOENT;
665 665
666 if ((f2fs_encrypted_inode(old_dir) &&
667 !fscrypt_has_encryption_key(old_dir)) ||
668 (f2fs_encrypted_inode(new_dir) &&
669 !fscrypt_has_encryption_key(new_dir)))
670 return -ENOKEY;
671
666 if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && 672 if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
667 !fscrypt_has_permitted_context(new_dir, old_inode)) { 673 !fscrypt_has_permitted_context(new_dir, old_inode)) {
668 err = -EPERM; 674 err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
843 int old_nlink = 0, new_nlink = 0; 849 int old_nlink = 0, new_nlink = 0;
844 int err = -ENOENT; 850 int err = -ENOENT;
845 851
852 if ((f2fs_encrypted_inode(old_dir) &&
853 !fscrypt_has_encryption_key(old_dir)) ||
854 (f2fs_encrypted_inode(new_dir) &&
855 !fscrypt_has_encryption_key(new_dir)))
856 return -ENOKEY;
857
846 if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && 858 if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
847 (old_dir != new_dir) && 859 (old_dir != new_dir) &&
848 (!fscrypt_has_permitted_context(new_dir, old_inode) || 860 (!fscrypt_has_permitted_context(new_dir, old_inode) ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9078fdb3743..94967171dee8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -245,12 +245,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
245 return need_update; 245 return need_update;
246} 246}
247 247
248static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 248static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
249 bool no_fail)
249{ 250{
250 struct nat_entry *new; 251 struct nat_entry *new;
251 252
252 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); 253 if (no_fail) {
253 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 254 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
255 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
256 } else {
257 new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
258 if (!new)
259 return NULL;
260 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
261 kmem_cache_free(nat_entry_slab, new);
262 return NULL;
263 }
264 }
265
254 memset(new, 0, sizeof(struct nat_entry)); 266 memset(new, 0, sizeof(struct nat_entry));
255 nat_set_nid(new, nid); 267 nat_set_nid(new, nid);
256 nat_reset_flag(new); 268 nat_reset_flag(new);
@@ -267,8 +279,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
267 279
268 e = __lookup_nat_cache(nm_i, nid); 280 e = __lookup_nat_cache(nm_i, nid);
269 if (!e) { 281 if (!e) {
270 e = grab_nat_entry(nm_i, nid); 282 e = grab_nat_entry(nm_i, nid, false);
271 node_info_from_raw_nat(&e->ni, ne); 283 if (e)
284 node_info_from_raw_nat(&e->ni, ne);
272 } else { 285 } else {
273 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || 286 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
274 nat_get_blkaddr(e) != 287 nat_get_blkaddr(e) !=
@@ -286,7 +299,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
286 down_write(&nm_i->nat_tree_lock); 299 down_write(&nm_i->nat_tree_lock);
287 e = __lookup_nat_cache(nm_i, ni->nid); 300 e = __lookup_nat_cache(nm_i, ni->nid);
288 if (!e) { 301 if (!e) {
289 e = grab_nat_entry(nm_i, ni->nid); 302 e = grab_nat_entry(nm_i, ni->nid, true);
290 copy_node_info(&e->ni, ni); 303 copy_node_info(&e->ni, ni);
291 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 304 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
292 } else if (new_blkaddr == NEW_ADDR) { 305 } else if (new_blkaddr == NEW_ADDR) {
@@ -325,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
325 set_nat_flag(e, IS_CHECKPOINTED, false); 338 set_nat_flag(e, IS_CHECKPOINTED, false);
326 __set_nat_cache_dirty(nm_i, e); 339 __set_nat_cache_dirty(nm_i, e);
327 340
341 if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
342 clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
343
328 /* update fsync_mark if its inode nat entry is still alive */ 344 /* update fsync_mark if its inode nat entry is still alive */
329 if (ni->nid != ni->ino) 345 if (ni->nid != ni->ino)
330 e = __lookup_nat_cache(nm_i, ni->ino); 346 e = __lookup_nat_cache(nm_i, ni->ino);
@@ -958,9 +974,6 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
958 974
959 f2fs_i_xnid_write(inode, 0); 975 f2fs_i_xnid_write(inode, 0);
960 976
961 /* need to do checkpoint during fsync */
962 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
963
964 set_new_dnode(&dn, inode, page, npage, nid); 977 set_new_dnode(&dn, inode, page, npage, nid);
965 978
966 if (page) 979 if (page)
@@ -1018,7 +1031,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
1018 unsigned int ofs, struct page *ipage) 1031 unsigned int ofs, struct page *ipage)
1019{ 1032{
1020 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1033 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1021 struct node_info old_ni, new_ni; 1034 struct node_info new_ni;
1022 struct page *page; 1035 struct page *page;
1023 int err; 1036 int err;
1024 1037
@@ -1033,13 +1046,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
1033 err = -ENOSPC; 1046 err = -ENOSPC;
1034 goto fail; 1047 goto fail;
1035 } 1048 }
1036 1049#ifdef CONFIG_F2FS_CHECK_FS
1037 get_node_info(sbi, dn->nid, &old_ni); 1050 get_node_info(sbi, dn->nid, &new_ni);
1038 1051 f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
1039 /* Reinitialize old_ni with new node page */ 1052#endif
1040 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); 1053 new_ni.nid = dn->nid;
1041 new_ni = old_ni;
1042 new_ni.ino = dn->inode->i_ino; 1054 new_ni.ino = dn->inode->i_ino;
1055 new_ni.blk_addr = NULL_ADDR;
1056 new_ni.flag = 0;
1057 new_ni.version = 0;
1043 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1058 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1044 1059
1045 f2fs_wait_on_page_writeback(page, NODE, true); 1060 f2fs_wait_on_page_writeback(page, NODE, true);
@@ -1305,16 +1320,99 @@ continue_unlock:
1305 return last_page; 1320 return last_page;
1306} 1321}
1307 1322
1323static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1324 struct writeback_control *wbc)
1325{
1326 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1327 nid_t nid;
1328 struct node_info ni;
1329 struct f2fs_io_info fio = {
1330 .sbi = sbi,
1331 .type = NODE,
1332 .op = REQ_OP_WRITE,
1333 .op_flags = wbc_to_write_flags(wbc),
1334 .page = page,
1335 .encrypted_page = NULL,
1336 .submitted = false,
1337 };
1338
1339 trace_f2fs_writepage(page, NODE);
1340
1341 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1342 goto redirty_out;
1343 if (unlikely(f2fs_cp_error(sbi)))
1344 goto redirty_out;
1345
1346 /* get old block addr of this node page */
1347 nid = nid_of_node(page);
1348 f2fs_bug_on(sbi, page->index != nid);
1349
1350 if (wbc->for_reclaim) {
1351 if (!down_read_trylock(&sbi->node_write))
1352 goto redirty_out;
1353 } else {
1354 down_read(&sbi->node_write);
1355 }
1356
1357 get_node_info(sbi, nid, &ni);
1358
1359 /* This page is already truncated */
1360 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1361 ClearPageUptodate(page);
1362 dec_page_count(sbi, F2FS_DIRTY_NODES);
1363 up_read(&sbi->node_write);
1364 unlock_page(page);
1365 return 0;
1366 }
1367
1368 if (atomic && !test_opt(sbi, NOBARRIER))
1369 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1370
1371 set_page_writeback(page);
1372 fio.old_blkaddr = ni.blk_addr;
1373 write_node_page(nid, &fio);
1374 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1375 dec_page_count(sbi, F2FS_DIRTY_NODES);
1376 up_read(&sbi->node_write);
1377
1378 if (wbc->for_reclaim) {
1379 f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
1380 page->index, NODE, WRITE);
1381 submitted = NULL;
1382 }
1383
1384 unlock_page(page);
1385
1386 if (unlikely(f2fs_cp_error(sbi))) {
1387 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1388 submitted = NULL;
1389 }
1390 if (submitted)
1391 *submitted = fio.submitted;
1392
1393 return 0;
1394
1395redirty_out:
1396 redirty_page_for_writepage(wbc, page);
1397 return AOP_WRITEPAGE_ACTIVATE;
1398}
1399
1400static int f2fs_write_node_page(struct page *page,
1401 struct writeback_control *wbc)
1402{
1403 return __write_node_page(page, false, NULL, wbc);
1404}
1405
1308int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, 1406int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1309 struct writeback_control *wbc, bool atomic) 1407 struct writeback_control *wbc, bool atomic)
1310{ 1408{
1311 pgoff_t index, end; 1409 pgoff_t index, end;
1410 pgoff_t last_idx = ULONG_MAX;
1312 struct pagevec pvec; 1411 struct pagevec pvec;
1313 int ret = 0; 1412 int ret = 0;
1314 struct page *last_page = NULL; 1413 struct page *last_page = NULL;
1315 bool marked = false; 1414 bool marked = false;
1316 nid_t ino = inode->i_ino; 1415 nid_t ino = inode->i_ino;
1317 int nwritten = 0;
1318 1416
1319 if (atomic) { 1417 if (atomic) {
1320 last_page = last_fsync_dnode(sbi, ino); 1418 last_page = last_fsync_dnode(sbi, ino);
@@ -1336,6 +1434,7 @@ retry:
1336 1434
1337 for (i = 0; i < nr_pages; i++) { 1435 for (i = 0; i < nr_pages; i++) {
1338 struct page *page = pvec.pages[i]; 1436 struct page *page = pvec.pages[i];
1437 bool submitted = false;
1339 1438
1340 if (unlikely(f2fs_cp_error(sbi))) { 1439 if (unlikely(f2fs_cp_error(sbi))) {
1341 f2fs_put_page(last_page, 0); 1440 f2fs_put_page(last_page, 0);
@@ -1384,13 +1483,15 @@ continue_unlock:
1384 if (!clear_page_dirty_for_io(page)) 1483 if (!clear_page_dirty_for_io(page))
1385 goto continue_unlock; 1484 goto continue_unlock;
1386 1485
1387 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1486 ret = __write_node_page(page, atomic &&
1487 page == last_page,
1488 &submitted, wbc);
1388 if (ret) { 1489 if (ret) {
1389 unlock_page(page); 1490 unlock_page(page);
1390 f2fs_put_page(last_page, 0); 1491 f2fs_put_page(last_page, 0);
1391 break; 1492 break;
1392 } else { 1493 } else if (submitted) {
1393 nwritten++; 1494 last_idx = page->index;
1394 } 1495 }
1395 1496
1396 if (page == last_page) { 1497 if (page == last_page) {
@@ -1416,8 +1517,9 @@ continue_unlock:
1416 goto retry; 1517 goto retry;
1417 } 1518 }
1418out: 1519out:
1419 if (nwritten) 1520 if (last_idx != ULONG_MAX)
1420 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); 1521 f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
1522 NODE, WRITE);
1421 return ret ? -EIO: 0; 1523 return ret ? -EIO: 0;
1422} 1524}
1423 1525
@@ -1445,6 +1547,7 @@ next_step:
1445 1547
1446 for (i = 0; i < nr_pages; i++) { 1548 for (i = 0; i < nr_pages; i++) {
1447 struct page *page = pvec.pages[i]; 1549 struct page *page = pvec.pages[i];
1550 bool submitted = false;
1448 1551
1449 if (unlikely(f2fs_cp_error(sbi))) { 1552 if (unlikely(f2fs_cp_error(sbi))) {
1450 pagevec_release(&pvec); 1553 pagevec_release(&pvec);
@@ -1498,9 +1601,10 @@ continue_unlock:
1498 set_fsync_mark(page, 0); 1601 set_fsync_mark(page, 0);
1499 set_dentry_mark(page, 0); 1602 set_dentry_mark(page, 0);
1500 1603
1501 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1604 ret = __write_node_page(page, false, &submitted, wbc);
1605 if (ret)
1502 unlock_page(page); 1606 unlock_page(page);
1503 else 1607 else if (submitted)
1504 nwritten++; 1608 nwritten++;
1505 1609
1506 if (--wbc->nr_to_write == 0) 1610 if (--wbc->nr_to_write == 0)
@@ -1564,72 +1668,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1564 return ret; 1668 return ret;
1565} 1669}
1566 1670
1567static int f2fs_write_node_page(struct page *page,
1568 struct writeback_control *wbc)
1569{
1570 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1571 nid_t nid;
1572 struct node_info ni;
1573 struct f2fs_io_info fio = {
1574 .sbi = sbi,
1575 .type = NODE,
1576 .op = REQ_OP_WRITE,
1577 .op_flags = wbc_to_write_flags(wbc),
1578 .page = page,
1579 .encrypted_page = NULL,
1580 };
1581
1582 trace_f2fs_writepage(page, NODE);
1583
1584 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1585 goto redirty_out;
1586 if (unlikely(f2fs_cp_error(sbi)))
1587 goto redirty_out;
1588
1589 /* get old block addr of this node page */
1590 nid = nid_of_node(page);
1591 f2fs_bug_on(sbi, page->index != nid);
1592
1593 if (wbc->for_reclaim) {
1594 if (!down_read_trylock(&sbi->node_write))
1595 goto redirty_out;
1596 } else {
1597 down_read(&sbi->node_write);
1598 }
1599
1600 get_node_info(sbi, nid, &ni);
1601
1602 /* This page is already truncated */
1603 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1604 ClearPageUptodate(page);
1605 dec_page_count(sbi, F2FS_DIRTY_NODES);
1606 up_read(&sbi->node_write);
1607 unlock_page(page);
1608 return 0;
1609 }
1610
1611 set_page_writeback(page);
1612 fio.old_blkaddr = ni.blk_addr;
1613 write_node_page(nid, &fio);
1614 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1615 dec_page_count(sbi, F2FS_DIRTY_NODES);
1616 up_read(&sbi->node_write);
1617
1618 if (wbc->for_reclaim)
1619 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
1620
1621 unlock_page(page);
1622
1623 if (unlikely(f2fs_cp_error(sbi)))
1624 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1625
1626 return 0;
1627
1628redirty_out:
1629 redirty_page_for_writepage(wbc, page);
1630 return AOP_WRITEPAGE_ACTIVATE;
1631}
1632
1633static int f2fs_write_node_pages(struct address_space *mapping, 1671static int f2fs_write_node_pages(struct address_space *mapping,
1634 struct writeback_control *wbc) 1672 struct writeback_control *wbc)
1635{ 1673{
@@ -1727,7 +1765,8 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
1727 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1765 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1728} 1766}
1729 1767
1730static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1768/* return if the nid is recognized as free */
1769static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1731{ 1770{
1732 struct f2fs_nm_info *nm_i = NM_I(sbi); 1771 struct f2fs_nm_info *nm_i = NM_I(sbi);
1733 struct free_nid *i; 1772 struct free_nid *i;
@@ -1736,14 +1775,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1736 1775
1737 /* 0 nid should not be used */ 1776 /* 0 nid should not be used */
1738 if (unlikely(nid == 0)) 1777 if (unlikely(nid == 0))
1739 return 0; 1778 return false;
1740 1779
1741 if (build) { 1780 if (build) {
1742 /* do not add allocated nids */ 1781 /* do not add allocated nids */
1743 ne = __lookup_nat_cache(nm_i, nid); 1782 ne = __lookup_nat_cache(nm_i, nid);
1744 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1783 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1745 nat_get_blkaddr(ne) != NULL_ADDR)) 1784 nat_get_blkaddr(ne) != NULL_ADDR))
1746 return 0; 1785 return false;
1747 } 1786 }
1748 1787
1749 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1788 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1752,7 +1791,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1752 1791
1753 if (radix_tree_preload(GFP_NOFS)) { 1792 if (radix_tree_preload(GFP_NOFS)) {
1754 kmem_cache_free(free_nid_slab, i); 1793 kmem_cache_free(free_nid_slab, i);
1755 return 0; 1794 return true;
1756 } 1795 }
1757 1796
1758 spin_lock(&nm_i->nid_list_lock); 1797 spin_lock(&nm_i->nid_list_lock);
@@ -1761,9 +1800,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1761 radix_tree_preload_end(); 1800 radix_tree_preload_end();
1762 if (err) { 1801 if (err) {
1763 kmem_cache_free(free_nid_slab, i); 1802 kmem_cache_free(free_nid_slab, i);
1764 return 0; 1803 return true;
1765 } 1804 }
1766 return 1; 1805 return true;
1767} 1806}
1768 1807
1769static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) 1808static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1784,17 +1823,36 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1784 kmem_cache_free(free_nid_slab, i); 1823 kmem_cache_free(free_nid_slab, i);
1785} 1824}
1786 1825
1826void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
1827{
1828 struct f2fs_nm_info *nm_i = NM_I(sbi);
1829 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
1830 unsigned int nid_ofs = nid - START_NID(nid);
1831
1832 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1833 return;
1834
1835 if (set)
1836 set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1837 else
1838 clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1839}
1840
1787static void scan_nat_page(struct f2fs_sb_info *sbi, 1841static void scan_nat_page(struct f2fs_sb_info *sbi,
1788 struct page *nat_page, nid_t start_nid) 1842 struct page *nat_page, nid_t start_nid)
1789{ 1843{
1790 struct f2fs_nm_info *nm_i = NM_I(sbi); 1844 struct f2fs_nm_info *nm_i = NM_I(sbi);
1791 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1845 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1792 block_t blk_addr; 1846 block_t blk_addr;
1847 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
1793 int i; 1848 int i;
1794 1849
1850 set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
1851
1795 i = start_nid % NAT_ENTRY_PER_BLOCK; 1852 i = start_nid % NAT_ENTRY_PER_BLOCK;
1796 1853
1797 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1854 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1855 bool freed = false;
1798 1856
1799 if (unlikely(start_nid >= nm_i->max_nid)) 1857 if (unlikely(start_nid >= nm_i->max_nid))
1800 break; 1858 break;
@@ -1802,11 +1860,106 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1802 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1860 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1803 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1861 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1804 if (blk_addr == NULL_ADDR) 1862 if (blk_addr == NULL_ADDR)
1805 add_free_nid(sbi, start_nid, true); 1863 freed = add_free_nid(sbi, start_nid, true);
1864 update_free_nid_bitmap(sbi, start_nid, freed);
1865 }
1866}
1867
1868static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1869{
1870 struct f2fs_nm_info *nm_i = NM_I(sbi);
1871 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1872 struct f2fs_journal *journal = curseg->journal;
1873 unsigned int i, idx;
1874
1875 down_read(&nm_i->nat_tree_lock);
1876
1877 for (i = 0; i < nm_i->nat_blocks; i++) {
1878 if (!test_bit_le(i, nm_i->nat_block_bitmap))
1879 continue;
1880 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1881 nid_t nid;
1882
1883 if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
1884 continue;
1885
1886 nid = i * NAT_ENTRY_PER_BLOCK + idx;
1887 add_free_nid(sbi, nid, true);
1888
1889 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1890 goto out;
1891 }
1892 }
1893out:
1894 down_read(&curseg->journal_rwsem);
1895 for (i = 0; i < nats_in_cursum(journal); i++) {
1896 block_t addr;
1897 nid_t nid;
1898
1899 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1900 nid = le32_to_cpu(nid_in_journal(journal, i));
1901 if (addr == NULL_ADDR)
1902 add_free_nid(sbi, nid, true);
1903 else
1904 remove_free_nid(sbi, nid);
1806 } 1905 }
1906 up_read(&curseg->journal_rwsem);
1907 up_read(&nm_i->nat_tree_lock);
1807} 1908}
1808 1909
1809static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync) 1910static int scan_nat_bits(struct f2fs_sb_info *sbi)
1911{
1912 struct f2fs_nm_info *nm_i = NM_I(sbi);
1913 struct page *page;
1914 unsigned int i = 0;
1915 nid_t nid;
1916
1917 if (!enabled_nat_bits(sbi, NULL))
1918 return -EAGAIN;
1919
1920 down_read(&nm_i->nat_tree_lock);
1921check_empty:
1922 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
1923 if (i >= nm_i->nat_blocks) {
1924 i = 0;
1925 goto check_partial;
1926 }
1927
1928 for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
1929 nid++) {
1930 if (unlikely(nid >= nm_i->max_nid))
1931 break;
1932 add_free_nid(sbi, nid, true);
1933 }
1934
1935 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1936 goto out;
1937 i++;
1938 goto check_empty;
1939
1940check_partial:
1941 i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
1942 if (i >= nm_i->nat_blocks) {
1943 disable_nat_bits(sbi, true);
1944 up_read(&nm_i->nat_tree_lock);
1945 return -EINVAL;
1946 }
1947
1948 nid = i * NAT_ENTRY_PER_BLOCK;
1949 page = get_current_nat_page(sbi, nid);
1950 scan_nat_page(sbi, page, nid);
1951 f2fs_put_page(page, 1);
1952
1953 if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
1954 i++;
1955 goto check_partial;
1956 }
1957out:
1958 up_read(&nm_i->nat_tree_lock);
1959 return 0;
1960}
1961
1962static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1810{ 1963{
1811 struct f2fs_nm_info *nm_i = NM_I(sbi); 1964 struct f2fs_nm_info *nm_i = NM_I(sbi);
1812 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1965 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1821,6 +1974,29 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1821 if (!sync && !available_free_memory(sbi, FREE_NIDS)) 1974 if (!sync && !available_free_memory(sbi, FREE_NIDS))
1822 return; 1975 return;
1823 1976
1977 if (!mount) {
1978 /* try to find free nids in free_nid_bitmap */
1979 scan_free_nid_bits(sbi);
1980
1981 if (nm_i->nid_cnt[FREE_NID_LIST])
1982 return;
1983
1984 /* try to find free nids with nat_bits */
1985 if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
1986 return;
1987 }
1988
1989 /* find next valid candidate */
1990 if (enabled_nat_bits(sbi, NULL)) {
1991 int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
1992 nm_i->nat_blocks, 0);
1993
1994 if (idx >= nm_i->nat_blocks)
1995 set_sbi_flag(sbi, SBI_NEED_FSCK);
1996 else
1997 nid = idx * NAT_ENTRY_PER_BLOCK;
1998 }
1999
1824 /* readahead nat pages to be scanned */ 2000 /* readahead nat pages to be scanned */
1825 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 2001 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
1826 META_NAT, true); 2002 META_NAT, true);
@@ -1863,10 +2039,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1863 nm_i->ra_nid_pages, META_NAT, false); 2039 nm_i->ra_nid_pages, META_NAT, false);
1864} 2040}
1865 2041
1866void build_free_nids(struct f2fs_sb_info *sbi, bool sync) 2042void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1867{ 2043{
1868 mutex_lock(&NM_I(sbi)->build_lock); 2044 mutex_lock(&NM_I(sbi)->build_lock);
1869 __build_free_nids(sbi, sync); 2045 __build_free_nids(sbi, sync, mount);
1870 mutex_unlock(&NM_I(sbi)->build_lock); 2046 mutex_unlock(&NM_I(sbi)->build_lock);
1871} 2047}
1872 2048
@@ -1881,8 +2057,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1881 struct free_nid *i = NULL; 2057 struct free_nid *i = NULL;
1882retry: 2058retry:
1883#ifdef CONFIG_F2FS_FAULT_INJECTION 2059#ifdef CONFIG_F2FS_FAULT_INJECTION
1884 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 2060 if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
2061 f2fs_show_injection_info(FAULT_ALLOC_NID);
1885 return false; 2062 return false;
2063 }
1886#endif 2064#endif
1887 spin_lock(&nm_i->nid_list_lock); 2065 spin_lock(&nm_i->nid_list_lock);
1888 2066
@@ -1902,13 +2080,16 @@ retry:
1902 i->state = NID_ALLOC; 2080 i->state = NID_ALLOC;
1903 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); 2081 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
1904 nm_i->available_nids--; 2082 nm_i->available_nids--;
2083
2084 update_free_nid_bitmap(sbi, *nid, false);
2085
1905 spin_unlock(&nm_i->nid_list_lock); 2086 spin_unlock(&nm_i->nid_list_lock);
1906 return true; 2087 return true;
1907 } 2088 }
1908 spin_unlock(&nm_i->nid_list_lock); 2089 spin_unlock(&nm_i->nid_list_lock);
1909 2090
1910 /* Let's scan nat pages and its caches to get free nids */ 2091 /* Let's scan nat pages and its caches to get free nids */
1911 build_free_nids(sbi, true); 2092 build_free_nids(sbi, true, false);
1912 goto retry; 2093 goto retry;
1913} 2094}
1914 2095
@@ -1956,6 +2137,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1956 2137
1957 nm_i->available_nids++; 2138 nm_i->available_nids++;
1958 2139
2140 update_free_nid_bitmap(sbi, nid, true);
2141
1959 spin_unlock(&nm_i->nid_list_lock); 2142 spin_unlock(&nm_i->nid_list_lock);
1960 2143
1961 if (need_free) 2144 if (need_free)
@@ -2018,18 +2201,18 @@ update_inode:
2018 f2fs_put_page(ipage, 1); 2201 f2fs_put_page(ipage, 1);
2019} 2202}
2020 2203
2021void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 2204int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2022{ 2205{
2023 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2206 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2024 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 2207 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2025 nid_t new_xnid = nid_of_node(page); 2208 nid_t new_xnid = nid_of_node(page);
2026 struct node_info ni; 2209 struct node_info ni;
2210 struct page *xpage;
2027 2211
2028 /* 1: invalidate the previous xattr nid */
2029 if (!prev_xnid) 2212 if (!prev_xnid)
2030 goto recover_xnid; 2213 goto recover_xnid;
2031 2214
2032 /* Deallocate node address */ 2215 /* 1: invalidate the previous xattr nid */
2033 get_node_info(sbi, prev_xnid, &ni); 2216 get_node_info(sbi, prev_xnid, &ni);
2034 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); 2217 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
2035 invalidate_blocks(sbi, ni.blk_addr); 2218 invalidate_blocks(sbi, ni.blk_addr);
@@ -2037,19 +2220,27 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2037 set_node_addr(sbi, &ni, NULL_ADDR, false); 2220 set_node_addr(sbi, &ni, NULL_ADDR, false);
2038 2221
2039recover_xnid: 2222recover_xnid:
2040 /* 2: allocate new xattr nid */ 2223 /* 2: update xattr nid in inode */
2224 remove_free_nid(sbi, new_xnid);
2225 f2fs_i_xnid_write(inode, new_xnid);
2041 if (unlikely(!inc_valid_node_count(sbi, inode))) 2226 if (unlikely(!inc_valid_node_count(sbi, inode)))
2042 f2fs_bug_on(sbi, 1); 2227 f2fs_bug_on(sbi, 1);
2228 update_inode_page(inode);
2229
2230 /* 3: update and set xattr node page dirty */
2231 xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid);
2232 if (!xpage)
2233 return -ENOMEM;
2234
2235 memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE);
2043 2236
2044 remove_free_nid(sbi, new_xnid);
2045 get_node_info(sbi, new_xnid, &ni); 2237 get_node_info(sbi, new_xnid, &ni);
2046 ni.ino = inode->i_ino; 2238 ni.ino = inode->i_ino;
2047 set_node_addr(sbi, &ni, NEW_ADDR, false); 2239 set_node_addr(sbi, &ni, NEW_ADDR, false);
2048 f2fs_i_xnid_write(inode, new_xnid); 2240 set_page_dirty(xpage);
2241 f2fs_put_page(xpage, 1);
2049 2242
2050 /* 3: update xattr blkaddr */ 2243 return 0;
2051 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
2052 set_node_addr(sbi, &ni, blkaddr, false);
2053} 2244}
2054 2245
2055int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 2246int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2152,7 +2343,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2152 2343
2153 ne = __lookup_nat_cache(nm_i, nid); 2344 ne = __lookup_nat_cache(nm_i, nid);
2154 if (!ne) { 2345 if (!ne) {
2155 ne = grab_nat_entry(nm_i, nid); 2346 ne = grab_nat_entry(nm_i, nid, true);
2156 node_info_from_raw_nat(&ne->ni, &raw_ne); 2347 node_info_from_raw_nat(&ne->ni, &raw_ne);
2157 } 2348 }
2158 2349
@@ -2192,8 +2383,39 @@ add_out:
2192 list_add_tail(&nes->set_list, head); 2383 list_add_tail(&nes->set_list, head);
2193} 2384}
2194 2385
2386void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2387 struct page *page)
2388{
2389 struct f2fs_nm_info *nm_i = NM_I(sbi);
2390 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2391 struct f2fs_nat_block *nat_blk = page_address(page);
2392 int valid = 0;
2393 int i;
2394
2395 if (!enabled_nat_bits(sbi, NULL))
2396 return;
2397
2398 for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
2399 if (start_nid == 0 && i == 0)
2400 valid++;
2401 if (nat_blk->entries[i].block_addr)
2402 valid++;
2403 }
2404 if (valid == 0) {
2405 set_bit_le(nat_index, nm_i->empty_nat_bits);
2406 clear_bit_le(nat_index, nm_i->full_nat_bits);
2407 return;
2408 }
2409
2410 clear_bit_le(nat_index, nm_i->empty_nat_bits);
2411 if (valid == NAT_ENTRY_PER_BLOCK)
2412 set_bit_le(nat_index, nm_i->full_nat_bits);
2413 else
2414 clear_bit_le(nat_index, nm_i->full_nat_bits);
2415}
2416
2195static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, 2417static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2196 struct nat_entry_set *set) 2418 struct nat_entry_set *set, struct cp_control *cpc)
2197{ 2419{
2198 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2420 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2199 struct f2fs_journal *journal = curseg->journal; 2421 struct f2fs_journal *journal = curseg->journal;
@@ -2208,7 +2430,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2208 * #1, flush nat entries to journal in current hot data summary block. 2430 * #1, flush nat entries to journal in current hot data summary block.
2209 * #2, flush nat entries to nat page. 2431 * #2, flush nat entries to nat page.
2210 */ 2432 */
2211 if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) 2433 if (enabled_nat_bits(sbi, cpc) ||
2434 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
2212 to_journal = false; 2435 to_journal = false;
2213 2436
2214 if (to_journal) { 2437 if (to_journal) {
@@ -2244,14 +2467,21 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2244 add_free_nid(sbi, nid, false); 2467 add_free_nid(sbi, nid, false);
2245 spin_lock(&NM_I(sbi)->nid_list_lock); 2468 spin_lock(&NM_I(sbi)->nid_list_lock);
2246 NM_I(sbi)->available_nids++; 2469 NM_I(sbi)->available_nids++;
2470 update_free_nid_bitmap(sbi, nid, true);
2471 spin_unlock(&NM_I(sbi)->nid_list_lock);
2472 } else {
2473 spin_lock(&NM_I(sbi)->nid_list_lock);
2474 update_free_nid_bitmap(sbi, nid, false);
2247 spin_unlock(&NM_I(sbi)->nid_list_lock); 2475 spin_unlock(&NM_I(sbi)->nid_list_lock);
2248 } 2476 }
2249 } 2477 }
2250 2478
2251 if (to_journal) 2479 if (to_journal) {
2252 up_write(&curseg->journal_rwsem); 2480 up_write(&curseg->journal_rwsem);
2253 else 2481 } else {
2482 __update_nat_bits(sbi, start_nid, page);
2254 f2fs_put_page(page, 1); 2483 f2fs_put_page(page, 1);
2484 }
2255 2485
2256 f2fs_bug_on(sbi, set->entry_cnt); 2486 f2fs_bug_on(sbi, set->entry_cnt);
2257 2487
@@ -2262,7 +2492,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2262/* 2492/*
2263 * This function is called during the checkpointing process. 2493 * This function is called during the checkpointing process.
2264 */ 2494 */
2265void flush_nat_entries(struct f2fs_sb_info *sbi) 2495void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2266{ 2496{
2267 struct f2fs_nm_info *nm_i = NM_I(sbi); 2497 struct f2fs_nm_info *nm_i = NM_I(sbi);
2268 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 2498 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2283,7 +2513,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2283 * entries, remove all entries from journal and merge them 2513 * entries, remove all entries from journal and merge them
2284 * into nat entry set. 2514 * into nat entry set.
2285 */ 2515 */
2286 if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 2516 if (enabled_nat_bits(sbi, cpc) ||
2517 !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
2287 remove_nats_in_journal(sbi); 2518 remove_nats_in_journal(sbi);
2288 2519
2289 while ((found = __gang_lookup_nat_set(nm_i, 2520 while ((found = __gang_lookup_nat_set(nm_i,
@@ -2297,27 +2528,69 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
2297 2528
2298 /* flush dirty nats in nat entry set */ 2529 /* flush dirty nats in nat entry set */
2299 list_for_each_entry_safe(set, tmp, &sets, set_list) 2530 list_for_each_entry_safe(set, tmp, &sets, set_list)
2300 __flush_nat_entry_set(sbi, set); 2531 __flush_nat_entry_set(sbi, set, cpc);
2301 2532
2302 up_write(&nm_i->nat_tree_lock); 2533 up_write(&nm_i->nat_tree_lock);
2303 2534
2304 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 2535 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
2305} 2536}
2306 2537
2538static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
2539{
2540 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2541 struct f2fs_nm_info *nm_i = NM_I(sbi);
2542 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
2543 unsigned int i;
2544 __u64 cp_ver = cur_cp_version(ckpt);
2545 block_t nat_bits_addr;
2546
2547 if (!enabled_nat_bits(sbi, NULL))
2548 return 0;
2549
2550 nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
2551 F2FS_BLKSIZE - 1);
2552 nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
2553 GFP_KERNEL);
2554 if (!nm_i->nat_bits)
2555 return -ENOMEM;
2556
2557 nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
2558 nm_i->nat_bits_blocks;
2559 for (i = 0; i < nm_i->nat_bits_blocks; i++) {
2560 struct page *page = get_meta_page(sbi, nat_bits_addr++);
2561
2562 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
2563 page_address(page), F2FS_BLKSIZE);
2564 f2fs_put_page(page, 1);
2565 }
2566
2567 cp_ver |= (cur_cp_crc(ckpt) << 32);
2568 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
2569 disable_nat_bits(sbi, true);
2570 return 0;
2571 }
2572
2573 nm_i->full_nat_bits = nm_i->nat_bits + 8;
2574 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
2575
2576 f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
2577 return 0;
2578}
2579
2307static int init_node_manager(struct f2fs_sb_info *sbi) 2580static int init_node_manager(struct f2fs_sb_info *sbi)
2308{ 2581{
2309 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); 2582 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
2310 struct f2fs_nm_info *nm_i = NM_I(sbi); 2583 struct f2fs_nm_info *nm_i = NM_I(sbi);
2311 unsigned char *version_bitmap; 2584 unsigned char *version_bitmap;
2312 unsigned int nat_segs, nat_blocks; 2585 unsigned int nat_segs;
2586 int err;
2313 2587
2314 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr); 2588 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
2315 2589
2316 /* segment_count_nat includes pair segment so divide to 2. */ 2590 /* segment_count_nat includes pair segment so divide to 2. */
2317 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 2591 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2318 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 2592 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
2319 2593 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
2320 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
2321 2594
2322 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2595 /* not used nids: 0, node, meta, (and root counted as valid node) */
2323 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - 2596 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2350,6 +2623,34 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2350 GFP_KERNEL); 2623 GFP_KERNEL);
2351 if (!nm_i->nat_bitmap) 2624 if (!nm_i->nat_bitmap)
2352 return -ENOMEM; 2625 return -ENOMEM;
2626
2627 err = __get_nat_bitmaps(sbi);
2628 if (err)
2629 return err;
2630
2631#ifdef CONFIG_F2FS_CHECK_FS
2632 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
2633 GFP_KERNEL);
2634 if (!nm_i->nat_bitmap_mir)
2635 return -ENOMEM;
2636#endif
2637
2638 return 0;
2639}
2640
2641int init_free_nid_cache(struct f2fs_sb_info *sbi)
2642{
2643 struct f2fs_nm_info *nm_i = NM_I(sbi);
2644
2645 nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
2646 NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
2647 if (!nm_i->free_nid_bitmap)
2648 return -ENOMEM;
2649
2650 nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
2651 GFP_KERNEL);
2652 if (!nm_i->nat_block_bitmap)
2653 return -ENOMEM;
2353 return 0; 2654 return 0;
2354} 2655}
2355 2656
@@ -2365,7 +2666,11 @@ int build_node_manager(struct f2fs_sb_info *sbi)
2365 if (err) 2666 if (err)
2366 return err; 2667 return err;
2367 2668
2368 build_free_nids(sbi, true); 2669 err = init_free_nid_cache(sbi);
2670 if (err)
2671 return err;
2672
2673 build_free_nids(sbi, true, true);
2369 return 0; 2674 return 0;
2370} 2675}
2371 2676
@@ -2423,7 +2728,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2423 } 2728 }
2424 up_write(&nm_i->nat_tree_lock); 2729 up_write(&nm_i->nat_tree_lock);
2425 2730
2731 kvfree(nm_i->nat_block_bitmap);
2732 kvfree(nm_i->free_nid_bitmap);
2733
2426 kfree(nm_i->nat_bitmap); 2734 kfree(nm_i->nat_bitmap);
2735 kfree(nm_i->nat_bits);
2736#ifdef CONFIG_F2FS_CHECK_FS
2737 kfree(nm_i->nat_bitmap_mir);
2738#endif
2427 sbi->nm_info = NULL; 2739 sbi->nm_info = NULL;
2428 kfree(nm_i); 2740 kfree(nm_i);
2429} 2741}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e7997e240366..2f9603fa85a5 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
174 spin_unlock(&nm_i->nid_list_lock); 174 spin_unlock(&nm_i->nid_list_lock);
175 return; 175 return;
176 } 176 }
177 fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next, 177 fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
178 struct free_nid, list); 178 struct free_nid, list);
179 *nid = fnid->nid; 179 *nid = fnid->nid;
180 spin_unlock(&nm_i->nid_list_lock); 180 spin_unlock(&nm_i->nid_list_lock);
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
186static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) 186static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
187{ 187{
188 struct f2fs_nm_info *nm_i = NM_I(sbi); 188 struct f2fs_nm_info *nm_i = NM_I(sbi);
189
190#ifdef CONFIG_F2FS_CHECK_FS
191 if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
192 nm_i->bitmap_size))
193 f2fs_bug_on(sbi, 1);
194#endif
189 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); 195 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
190} 196}
191 197
@@ -228,6 +234,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
228 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); 234 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
229 235
230 f2fs_change_bit(block_off, nm_i->nat_bitmap); 236 f2fs_change_bit(block_off, nm_i->nat_bitmap);
237#ifdef CONFIG_F2FS_CHECK_FS
238 f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
239#endif
231} 240}
232 241
233static inline nid_t ino_of_node(struct page *node_page) 242static inline nid_t ino_of_node(struct page *node_page)
@@ -291,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
291{ 300{
292 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 301 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
293 struct f2fs_node *rn = F2FS_NODE(page); 302 struct f2fs_node *rn = F2FS_NODE(page);
294 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); 303 __u64 cp_ver = cur_cp_version(ckpt);
295 __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); 304
305 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
306 cp_ver |= (cur_cp_crc(ckpt) << 32);
296 307
297 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
298 __u64 crc = le32_to_cpu(*((__le32 *)
299 ((unsigned char *)ckpt + crc_offset)));
300 cp_ver |= (crc << 32);
301 }
302 rn->footer.cp_ver = cpu_to_le64(cp_ver); 308 rn->footer.cp_ver = cpu_to_le64(cp_ver);
303 rn->footer.next_blkaddr = cpu_to_le32(blkaddr); 309 rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
304} 310}
@@ -306,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
306static inline bool is_recoverable_dnode(struct page *page) 312static inline bool is_recoverable_dnode(struct page *page)
307{ 313{
308 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); 314 struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
309 size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
310 __u64 cp_ver = cur_cp_version(ckpt); 315 __u64 cp_ver = cur_cp_version(ckpt);
311 316
312 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { 317 if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
313 __u64 crc = le32_to_cpu(*((__le32 *) 318 cp_ver |= (cur_cp_crc(ckpt) << 32);
314 ((unsigned char *)ckpt + crc_offset))); 319
315 cp_ver |= (crc << 32);
316 }
317 return cp_ver == cpver_of_node(page); 320 return cp_ver == cpver_of_node(page);
318} 321}
319 322
@@ -343,7 +346,7 @@ static inline bool IS_DNODE(struct page *node_page)
343 unsigned int ofs = ofs_of_node(node_page); 346 unsigned int ofs = ofs_of_node(node_page);
344 347
345 if (f2fs_has_xattr_block(ofs)) 348 if (f2fs_has_xattr_block(ofs))
346 return false; 349 return true;
347 350
348 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || 351 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
349 ofs == 5 + 2 * NIDS_PER_BLOCK) 352 ofs == 5 + 2 * NIDS_PER_BLOCK)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 981a9584b62f..d025aa83fb5b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
378 if (IS_INODE(page)) { 378 if (IS_INODE(page)) {
379 recover_inline_xattr(inode, page); 379 recover_inline_xattr(inode, page);
380 } else if (f2fs_has_xattr_block(ofs_of_node(page))) { 380 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
381 /* 381 err = recover_xattr_data(inode, page, blkaddr);
382 * Deprecated; xattr blocks should be found from cold log. 382 if (!err)
383 * But, we should remain this for backward compatibility. 383 recovered++;
384 */
385 recover_xattr_data(inode, page, blkaddr);
386 goto out; 384 goto out;
387 } 385 }
388 386
@@ -428,8 +426,9 @@ retry_dn:
428 } 426 }
429 427
430 if (!file_keep_isize(inode) && 428 if (!file_keep_isize(inode) &&
431 (i_size_read(inode) <= (start << PAGE_SHIFT))) 429 (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
432 f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); 430 f2fs_i_size_write(inode,
431 (loff_t)(start + 1) << PAGE_SHIFT);
433 432
434 /* 433 /*
435 * dest is reserved block, invalidate src block 434 * dest is reserved block, invalidate src block
@@ -552,10 +551,8 @@ next:
552 551
553int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) 552int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
554{ 553{
555 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
556 struct list_head inode_list; 554 struct list_head inode_list;
557 struct list_head dir_list; 555 struct list_head dir_list;
558 block_t blkaddr;
559 int err; 556 int err;
560 int ret = 0; 557 int ret = 0;
561 bool need_writecp = false; 558 bool need_writecp = false;
@@ -571,8 +568,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
571 /* prevent checkpoint */ 568 /* prevent checkpoint */
572 mutex_lock(&sbi->cp_mutex); 569 mutex_lock(&sbi->cp_mutex);
573 570
574 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
575
576 /* step #1: find fsynced inode numbers */ 571 /* step #1: find fsynced inode numbers */
577 err = find_fsync_dnodes(sbi, &inode_list); 572 err = find_fsync_dnodes(sbi, &inode_list);
578 if (err || list_empty(&inode_list)) 573 if (err || list_empty(&inode_list))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0d8802453758..4bd7a8b19332 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
26#define __reverse_ffz(x) __reverse_ffs(~(x)) 26#define __reverse_ffz(x) __reverse_ffs(~(x))
27 27
28static struct kmem_cache *discard_entry_slab; 28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *bio_entry_slab; 29static struct kmem_cache *discard_cmd_slab;
30static struct kmem_cache *sit_entry_set_slab; 30static struct kmem_cache *sit_entry_set_slab;
31static struct kmem_cache *inmem_entry_slab; 31static struct kmem_cache *inmem_entry_slab;
32 32
@@ -242,11 +242,12 @@ void drop_inmem_pages(struct inode *inode)
242{ 242{
243 struct f2fs_inode_info *fi = F2FS_I(inode); 243 struct f2fs_inode_info *fi = F2FS_I(inode);
244 244
245 clear_inode_flag(inode, FI_ATOMIC_FILE);
246
247 mutex_lock(&fi->inmem_lock); 245 mutex_lock(&fi->inmem_lock);
248 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 246 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
249 mutex_unlock(&fi->inmem_lock); 247 mutex_unlock(&fi->inmem_lock);
248
249 clear_inode_flag(inode, FI_ATOMIC_FILE);
250 stat_dec_atomic_write(inode);
250} 251}
251 252
252static int __commit_inmem_pages(struct inode *inode, 253static int __commit_inmem_pages(struct inode *inode,
@@ -262,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode,
262 .op_flags = REQ_SYNC | REQ_PRIO, 263 .op_flags = REQ_SYNC | REQ_PRIO,
263 .encrypted_page = NULL, 264 .encrypted_page = NULL,
264 }; 265 };
265 bool submit_bio = false; 266 pgoff_t last_idx = ULONG_MAX;
266 int err = 0; 267 int err = 0;
267 268
268 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 269 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
@@ -288,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode,
288 289
289 /* record old blkaddr for revoking */ 290 /* record old blkaddr for revoking */
290 cur->old_addr = fio.old_blkaddr; 291 cur->old_addr = fio.old_blkaddr;
291 292 last_idx = page->index;
292 submit_bio = true;
293 } 293 }
294 unlock_page(page); 294 unlock_page(page);
295 list_move_tail(&cur->list, revoke_list); 295 list_move_tail(&cur->list, revoke_list);
296 } 296 }
297 297
298 if (submit_bio) 298 if (last_idx != ULONG_MAX)
299 f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); 299 f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
300 DATA, WRITE);
300 301
301 if (!err) 302 if (!err)
302 __revoke_inmem_pages(inode, revoke_list, false, false); 303 __revoke_inmem_pages(inode, revoke_list, false, false);
@@ -315,6 +316,8 @@ int commit_inmem_pages(struct inode *inode)
315 f2fs_balance_fs(sbi, true); 316 f2fs_balance_fs(sbi, true);
316 f2fs_lock_op(sbi); 317 f2fs_lock_op(sbi);
317 318
319 set_inode_flag(inode, FI_ATOMIC_COMMIT);
320
318 mutex_lock(&fi->inmem_lock); 321 mutex_lock(&fi->inmem_lock);
319 err = __commit_inmem_pages(inode, &revoke_list); 322 err = __commit_inmem_pages(inode, &revoke_list);
320 if (err) { 323 if (err) {
@@ -336,6 +339,8 @@ int commit_inmem_pages(struct inode *inode)
336 } 339 }
337 mutex_unlock(&fi->inmem_lock); 340 mutex_unlock(&fi->inmem_lock);
338 341
342 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
343
339 f2fs_unlock_op(sbi); 344 f2fs_unlock_op(sbi);
340 return err; 345 return err;
341} 346}
@@ -347,8 +352,10 @@ int commit_inmem_pages(struct inode *inode)
347void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) 352void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
348{ 353{
349#ifdef CONFIG_F2FS_FAULT_INJECTION 354#ifdef CONFIG_F2FS_FAULT_INJECTION
350 if (time_to_inject(sbi, FAULT_CHECKPOINT)) 355 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
356 f2fs_show_injection_info(FAULT_CHECKPOINT);
351 f2fs_stop_checkpoint(sbi, false); 357 f2fs_stop_checkpoint(sbi, false);
358 }
352#endif 359#endif
353 360
354 if (!need) 361 if (!need)
@@ -381,7 +388,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
381 if (!available_free_memory(sbi, FREE_NIDS)) 388 if (!available_free_memory(sbi, FREE_NIDS))
382 try_to_free_nids(sbi, MAX_FREE_NIDS); 389 try_to_free_nids(sbi, MAX_FREE_NIDS);
383 else 390 else
384 build_free_nids(sbi, false); 391 build_free_nids(sbi, false, false);
385 392
386 if (!is_idle(sbi)) 393 if (!is_idle(sbi))
387 return; 394 return;
@@ -423,6 +430,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
423 430
424 if (sbi->s_ndevs && !ret) { 431 if (sbi->s_ndevs && !ret) {
425 for (i = 1; i < sbi->s_ndevs; i++) { 432 for (i = 1; i < sbi->s_ndevs; i++) {
433 trace_f2fs_issue_flush(FDEV(i).bdev,
434 test_opt(sbi, NOBARRIER),
435 test_opt(sbi, FLUSH_MERGE));
426 ret = __submit_flush_wait(FDEV(i).bdev); 436 ret = __submit_flush_wait(FDEV(i).bdev);
427 if (ret) 437 if (ret)
428 break; 438 break;
@@ -434,7 +444,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
434static int issue_flush_thread(void *data) 444static int issue_flush_thread(void *data)
435{ 445{
436 struct f2fs_sb_info *sbi = data; 446 struct f2fs_sb_info *sbi = data;
437 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 447 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
438 wait_queue_head_t *q = &fcc->flush_wait_queue; 448 wait_queue_head_t *q = &fcc->flush_wait_queue;
439repeat: 449repeat:
440 if (kthread_should_stop()) 450 if (kthread_should_stop())
@@ -463,16 +473,16 @@ repeat:
463 473
464int f2fs_issue_flush(struct f2fs_sb_info *sbi) 474int f2fs_issue_flush(struct f2fs_sb_info *sbi)
465{ 475{
466 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 476 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
467 struct flush_cmd cmd; 477 struct flush_cmd cmd;
468 478
469 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
470 test_opt(sbi, FLUSH_MERGE));
471
472 if (test_opt(sbi, NOBARRIER)) 479 if (test_opt(sbi, NOBARRIER))
473 return 0; 480 return 0;
474 481
475 if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { 482 if (!test_opt(sbi, FLUSH_MERGE))
483 return submit_flush_wait(sbi);
484
485 if (!atomic_read(&fcc->submit_flush)) {
476 int ret; 486 int ret;
477 487
478 atomic_inc(&fcc->submit_flush); 488 atomic_inc(&fcc->submit_flush);
@@ -506,8 +516,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
506 struct flush_cmd_control *fcc; 516 struct flush_cmd_control *fcc;
507 int err = 0; 517 int err = 0;
508 518
509 if (SM_I(sbi)->cmd_control_info) { 519 if (SM_I(sbi)->fcc_info) {
510 fcc = SM_I(sbi)->cmd_control_info; 520 fcc = SM_I(sbi)->fcc_info;
511 goto init_thread; 521 goto init_thread;
512 } 522 }
513 523
@@ -517,14 +527,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
517 atomic_set(&fcc->submit_flush, 0); 527 atomic_set(&fcc->submit_flush, 0);
518 init_waitqueue_head(&fcc->flush_wait_queue); 528 init_waitqueue_head(&fcc->flush_wait_queue);
519 init_llist_head(&fcc->issue_list); 529 init_llist_head(&fcc->issue_list);
520 SM_I(sbi)->cmd_control_info = fcc; 530 SM_I(sbi)->fcc_info = fcc;
521init_thread: 531init_thread:
522 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 532 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
523 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 533 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
524 if (IS_ERR(fcc->f2fs_issue_flush)) { 534 if (IS_ERR(fcc->f2fs_issue_flush)) {
525 err = PTR_ERR(fcc->f2fs_issue_flush); 535 err = PTR_ERR(fcc->f2fs_issue_flush);
526 kfree(fcc); 536 kfree(fcc);
527 SM_I(sbi)->cmd_control_info = NULL; 537 SM_I(sbi)->fcc_info = NULL;
528 return err; 538 return err;
529 } 539 }
530 540
@@ -533,7 +543,7 @@ init_thread:
533 543
534void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) 544void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
535{ 545{
536 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 546 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
537 547
538 if (fcc && fcc->f2fs_issue_flush) { 548 if (fcc && fcc->f2fs_issue_flush) {
539 struct task_struct *flush_thread = fcc->f2fs_issue_flush; 549 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -543,7 +553,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
543 } 553 }
544 if (free) { 554 if (free) {
545 kfree(fcc); 555 kfree(fcc);
546 SM_I(sbi)->cmd_control_info = NULL; 556 SM_I(sbi)->fcc_info = NULL;
547 } 557 }
548} 558}
549 559
@@ -623,60 +633,144 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
623 mutex_unlock(&dirty_i->seglist_lock); 633 mutex_unlock(&dirty_i->seglist_lock);
624} 634}
625 635
626static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, 636static void __add_discard_cmd(struct f2fs_sb_info *sbi,
627 struct bio *bio) 637 struct bio *bio, block_t lstart, block_t len)
628{ 638{
629 struct list_head *wait_list = &(SM_I(sbi)->wait_list); 639 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
630 struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); 640 struct list_head *cmd_list = &(dcc->discard_cmd_list);
641 struct discard_cmd *dc;
631 642
632 INIT_LIST_HEAD(&be->list); 643 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
633 be->bio = bio; 644 INIT_LIST_HEAD(&dc->list);
634 init_completion(&be->event); 645 dc->bio = bio;
635 list_add_tail(&be->list, wait_list); 646 bio->bi_private = dc;
647 dc->lstart = lstart;
648 dc->len = len;
649 dc->state = D_PREP;
650 init_completion(&dc->wait);
636 651
637 return be; 652 mutex_lock(&dcc->cmd_lock);
653 list_add_tail(&dc->list, cmd_list);
654 mutex_unlock(&dcc->cmd_lock);
638} 655}
639 656
640void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) 657static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
641{ 658{
642 struct list_head *wait_list = &(SM_I(sbi)->wait_list); 659 int err = dc->bio->bi_error;
643 struct bio_entry *be, *tmp;
644 660
645 list_for_each_entry_safe(be, tmp, wait_list, list) { 661 if (dc->state == D_DONE)
646 struct bio *bio = be->bio; 662 atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
647 int err;
648 663
649 wait_for_completion_io(&be->event); 664 if (err == -EOPNOTSUPP)
650 err = be->error; 665 err = 0;
651 if (err == -EOPNOTSUPP)
652 err = 0;
653 666
654 if (err) 667 if (err)
655 f2fs_msg(sbi->sb, KERN_INFO, 668 f2fs_msg(sbi->sb, KERN_INFO,
656 "Issue discard failed, ret: %d", err); 669 "Issue discard failed, ret: %d", err);
670 bio_put(dc->bio);
671 list_del(&dc->list);
672 kmem_cache_free(discard_cmd_slab, dc);
673}
674
675/* This should be covered by global mutex, &sit_i->sentry_lock */
676void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
677{
678 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
679 struct list_head *wait_list = &(dcc->discard_cmd_list);
680 struct discard_cmd *dc, *tmp;
681 struct blk_plug plug;
682
683 mutex_lock(&dcc->cmd_lock);
657 684
658 bio_put(bio); 685 blk_start_plug(&plug);
659 list_del(&be->list); 686
660 kmem_cache_free(bio_entry_slab, be); 687 list_for_each_entry_safe(dc, tmp, wait_list, list) {
688
689 if (blkaddr == NULL_ADDR) {
690 if (dc->state == D_PREP) {
691 dc->state = D_SUBMIT;
692 submit_bio(dc->bio);
693 atomic_inc(&dcc->submit_discard);
694 }
695 continue;
696 }
697
698 if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
699 if (dc->state == D_SUBMIT)
700 wait_for_completion_io(&dc->wait);
701 else
702 __remove_discard_cmd(sbi, dc);
703 }
704 }
705 blk_finish_plug(&plug);
706
707 /* this comes from f2fs_put_super */
708 if (blkaddr == NULL_ADDR) {
709 list_for_each_entry_safe(dc, tmp, wait_list, list) {
710 wait_for_completion_io(&dc->wait);
711 __remove_discard_cmd(sbi, dc);
712 }
661 } 713 }
714 mutex_unlock(&dcc->cmd_lock);
715}
716
717static void f2fs_submit_discard_endio(struct bio *bio)
718{
719 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
720
721 complete(&dc->wait);
722 dc->state = D_DONE;
662} 723}
663 724
664static void f2fs_submit_bio_wait_endio(struct bio *bio) 725static int issue_discard_thread(void *data)
665{ 726{
666 struct bio_entry *be = (struct bio_entry *)bio->bi_private; 727 struct f2fs_sb_info *sbi = data;
728 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
729 wait_queue_head_t *q = &dcc->discard_wait_queue;
730 struct list_head *cmd_list = &dcc->discard_cmd_list;
731 struct discard_cmd *dc, *tmp;
732 struct blk_plug plug;
733 int iter = 0;
734repeat:
735 if (kthread_should_stop())
736 return 0;
737
738 blk_start_plug(&plug);
739
740 mutex_lock(&dcc->cmd_lock);
741 list_for_each_entry_safe(dc, tmp, cmd_list, list) {
742 if (dc->state == D_PREP) {
743 dc->state = D_SUBMIT;
744 submit_bio(dc->bio);
745 atomic_inc(&dcc->submit_discard);
746 if (iter++ > DISCARD_ISSUE_RATE)
747 break;
748 } else if (dc->state == D_DONE) {
749 __remove_discard_cmd(sbi, dc);
750 }
751 }
752 mutex_unlock(&dcc->cmd_lock);
753
754 blk_finish_plug(&plug);
755
756 iter = 0;
757 congestion_wait(BLK_RW_SYNC, HZ/50);
667 758
668 be->error = bio->bi_error; 759 wait_event_interruptible(*q,
669 complete(&be->event); 760 kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
761 goto repeat;
670} 762}
671 763
764
672/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 765/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
673static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, 766static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
674 struct block_device *bdev, block_t blkstart, block_t blklen) 767 struct block_device *bdev, block_t blkstart, block_t blklen)
675{ 768{
676 struct bio *bio = NULL; 769 struct bio *bio = NULL;
770 block_t lblkstart = blkstart;
677 int err; 771 int err;
678 772
679 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 773 trace_f2fs_issue_discard(bdev, blkstart, blklen);
680 774
681 if (sbi->s_ndevs) { 775 if (sbi->s_ndevs) {
682 int devi = f2fs_target_device_index(sbi, blkstart); 776 int devi = f2fs_target_device_index(sbi, blkstart);
@@ -688,14 +782,12 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
688 SECTOR_FROM_BLOCK(blklen), 782 SECTOR_FROM_BLOCK(blklen),
689 GFP_NOFS, 0, &bio); 783 GFP_NOFS, 0, &bio);
690 if (!err && bio) { 784 if (!err && bio) {
691 struct bio_entry *be = __add_bio_entry(sbi, bio); 785 bio->bi_end_io = f2fs_submit_discard_endio;
692
693 bio->bi_private = be;
694 bio->bi_end_io = f2fs_submit_bio_wait_endio;
695 bio->bi_opf |= REQ_SYNC; 786 bio->bi_opf |= REQ_SYNC;
696 submit_bio(bio);
697 }
698 787
788 __add_discard_cmd(sbi, bio, lblkstart, blklen);
789 wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
790 }
699 return err; 791 return err;
700} 792}
701 793
@@ -703,24 +795,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
703static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, 795static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
704 struct block_device *bdev, block_t blkstart, block_t blklen) 796 struct block_device *bdev, block_t blkstart, block_t blklen)
705{ 797{
706 sector_t nr_sects = SECTOR_FROM_BLOCK(blklen); 798 sector_t sector, nr_sects;
707 sector_t sector;
708 int devi = 0; 799 int devi = 0;
709 800
710 if (sbi->s_ndevs) { 801 if (sbi->s_ndevs) {
711 devi = f2fs_target_device_index(sbi, blkstart); 802 devi = f2fs_target_device_index(sbi, blkstart);
712 blkstart -= FDEV(devi).start_blk; 803 blkstart -= FDEV(devi).start_blk;
713 } 804 }
714 sector = SECTOR_FROM_BLOCK(blkstart);
715
716 if (sector & (bdev_zone_sectors(bdev) - 1) ||
717 nr_sects != bdev_zone_sectors(bdev)) {
718 f2fs_msg(sbi->sb, KERN_INFO,
719 "(%d) %s: Unaligned discard attempted (block %x + %x)",
720 devi, sbi->s_ndevs ? FDEV(devi).path: "",
721 blkstart, blklen);
722 return -EIO;
723 }
724 805
725 /* 806 /*
726 * We need to know the type of the zone: for conventional zones, 807 * We need to know the type of the zone: for conventional zones,
@@ -735,7 +816,18 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
735 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); 816 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
736 case BLK_ZONE_TYPE_SEQWRITE_REQ: 817 case BLK_ZONE_TYPE_SEQWRITE_REQ:
737 case BLK_ZONE_TYPE_SEQWRITE_PREF: 818 case BLK_ZONE_TYPE_SEQWRITE_PREF:
738 trace_f2fs_issue_reset_zone(sbi->sb, blkstart); 819 sector = SECTOR_FROM_BLOCK(blkstart);
820 nr_sects = SECTOR_FROM_BLOCK(blklen);
821
822 if (sector & (bdev_zone_sectors(bdev) - 1) ||
823 nr_sects != bdev_zone_sectors(bdev)) {
824 f2fs_msg(sbi->sb, KERN_INFO,
825 "(%d) %s: Unaligned discard attempted (block %x + %x)",
826 devi, sbi->s_ndevs ? FDEV(devi).path: "",
827 blkstart, blklen);
828 return -EIO;
829 }
830 trace_f2fs_issue_reset_zone(bdev, blkstart);
739 return blkdev_reset_zones(bdev, sector, 831 return blkdev_reset_zones(bdev, sector,
740 nr_sects, GFP_NOFS); 832 nr_sects, GFP_NOFS);
741 default: 833 default:
@@ -800,13 +892,14 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
800 struct cp_control *cpc, struct seg_entry *se, 892 struct cp_control *cpc, struct seg_entry *se,
801 unsigned int start, unsigned int end) 893 unsigned int start, unsigned int end)
802{ 894{
803 struct list_head *head = &SM_I(sbi)->discard_list; 895 struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
804 struct discard_entry *new, *last; 896 struct discard_entry *new, *last;
805 897
806 if (!list_empty(head)) { 898 if (!list_empty(head)) {
807 last = list_last_entry(head, struct discard_entry, list); 899 last = list_last_entry(head, struct discard_entry, list);
808 if (START_BLOCK(sbi, cpc->trim_start) + start == 900 if (START_BLOCK(sbi, cpc->trim_start) + start ==
809 last->blkaddr + last->len) { 901 last->blkaddr + last->len &&
902 last->len < MAX_DISCARD_BLOCKS(sbi)) {
810 last->len += end - start; 903 last->len += end - start;
811 goto done; 904 goto done;
812 } 905 }
@@ -818,10 +911,11 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
818 new->len = end - start; 911 new->len = end - start;
819 list_add_tail(&new->list, head); 912 list_add_tail(&new->list, head);
820done: 913done:
821 SM_I(sbi)->nr_discards += end - start; 914 SM_I(sbi)->dcc_info->nr_discards += end - start;
822} 915}
823 916
824static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) 917static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
918 bool check_only)
825{ 919{
826 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 920 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
827 int max_blocks = sbi->blocks_per_seg; 921 int max_blocks = sbi->blocks_per_seg;
@@ -835,12 +929,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
835 int i; 929 int i;
836 930
837 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) 931 if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
838 return; 932 return false;
839 933
840 if (!force) { 934 if (!force) {
841 if (!test_opt(sbi, DISCARD) || !se->valid_blocks || 935 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
842 SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) 936 SM_I(sbi)->dcc_info->nr_discards >=
843 return; 937 SM_I(sbi)->dcc_info->max_discards)
938 return false;
844 } 939 }
845 940
846 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 941 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -848,7 +943,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
848 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : 943 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
849 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; 944 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
850 945
851 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 946 while (force || SM_I(sbi)->dcc_info->nr_discards <=
947 SM_I(sbi)->dcc_info->max_discards) {
852 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 948 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
853 if (start >= max_blocks) 949 if (start >= max_blocks)
854 break; 950 break;
@@ -858,13 +954,17 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
858 && (end - start) < cpc->trim_minlen) 954 && (end - start) < cpc->trim_minlen)
859 continue; 955 continue;
860 956
957 if (check_only)
958 return true;
959
861 __add_discard_entry(sbi, cpc, se, start, end); 960 __add_discard_entry(sbi, cpc, se, start, end);
862 } 961 }
962 return false;
863} 963}
864 964
865void release_discard_addrs(struct f2fs_sb_info *sbi) 965void release_discard_addrs(struct f2fs_sb_info *sbi)
866{ 966{
867 struct list_head *head = &(SM_I(sbi)->discard_list); 967 struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
868 struct discard_entry *entry, *this; 968 struct discard_entry *entry, *this;
869 969
870 /* drop caches */ 970 /* drop caches */
@@ -890,17 +990,14 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
890 990
891void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) 991void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
892{ 992{
893 struct list_head *head = &(SM_I(sbi)->discard_list); 993 struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
894 struct discard_entry *entry, *this; 994 struct discard_entry *entry, *this;
895 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 995 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
896 struct blk_plug plug;
897 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 996 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
898 unsigned int start = 0, end = -1; 997 unsigned int start = 0, end = -1;
899 unsigned int secno, start_segno; 998 unsigned int secno, start_segno;
900 bool force = (cpc->reason == CP_DISCARD); 999 bool force = (cpc->reason == CP_DISCARD);
901 1000
902 blk_start_plug(&plug);
903
904 mutex_lock(&dirty_i->seglist_lock); 1001 mutex_lock(&dirty_i->seglist_lock);
905 1002
906 while (1) { 1003 while (1) {
@@ -916,9 +1013,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
916 1013
917 dirty_i->nr_dirty[PRE] -= end - start; 1014 dirty_i->nr_dirty[PRE] -= end - start;
918 1015
919 if (force || !test_opt(sbi, DISCARD)) 1016 if (!test_opt(sbi, DISCARD))
920 continue; 1017 continue;
921 1018
1019 if (force && start >= cpc->trim_start &&
1020 (end - 1) <= cpc->trim_end)
1021 continue;
1022
922 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { 1023 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
923 f2fs_issue_discard(sbi, START_BLOCK(sbi, start), 1024 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
924 (end - start) << sbi->log_blocks_per_seg); 1025 (end - start) << sbi->log_blocks_per_seg);
@@ -935,6 +1036,8 @@ next:
935 start = start_segno + sbi->segs_per_sec; 1036 start = start_segno + sbi->segs_per_sec;
936 if (start < end) 1037 if (start < end)
937 goto next; 1038 goto next;
1039 else
1040 end = start - 1;
938 } 1041 }
939 mutex_unlock(&dirty_i->seglist_lock); 1042 mutex_unlock(&dirty_i->seglist_lock);
940 1043
@@ -946,11 +1049,62 @@ next:
946 cpc->trimmed += entry->len; 1049 cpc->trimmed += entry->len;
947skip: 1050skip:
948 list_del(&entry->list); 1051 list_del(&entry->list);
949 SM_I(sbi)->nr_discards -= entry->len; 1052 SM_I(sbi)->dcc_info->nr_discards -= entry->len;
950 kmem_cache_free(discard_entry_slab, entry); 1053 kmem_cache_free(discard_entry_slab, entry);
951 } 1054 }
1055}
952 1056
953 blk_finish_plug(&plug); 1057static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1058{
1059 dev_t dev = sbi->sb->s_bdev->bd_dev;
1060 struct discard_cmd_control *dcc;
1061 int err = 0;
1062
1063 if (SM_I(sbi)->dcc_info) {
1064 dcc = SM_I(sbi)->dcc_info;
1065 goto init_thread;
1066 }
1067
1068 dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
1069 if (!dcc)
1070 return -ENOMEM;
1071
1072 INIT_LIST_HEAD(&dcc->discard_entry_list);
1073 INIT_LIST_HEAD(&dcc->discard_cmd_list);
1074 mutex_init(&dcc->cmd_lock);
1075 atomic_set(&dcc->submit_discard, 0);
1076 dcc->nr_discards = 0;
1077 dcc->max_discards = 0;
1078
1079 init_waitqueue_head(&dcc->discard_wait_queue);
1080 SM_I(sbi)->dcc_info = dcc;
1081init_thread:
1082 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
1083 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
1084 if (IS_ERR(dcc->f2fs_issue_discard)) {
1085 err = PTR_ERR(dcc->f2fs_issue_discard);
1086 kfree(dcc);
1087 SM_I(sbi)->dcc_info = NULL;
1088 return err;
1089 }
1090
1091 return err;
1092}
1093
1094static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
1095{
1096 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1097
1098 if (dcc && dcc->f2fs_issue_discard) {
1099 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1100
1101 dcc->f2fs_issue_discard = NULL;
1102 kthread_stop(discard_thread);
1103 }
1104 if (free) {
1105 kfree(dcc);
1106 SM_I(sbi)->dcc_info = NULL;
1107 }
954} 1108}
955 1109
956static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 1110static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -995,14 +1149,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
995 1149
996 /* Update valid block bitmap */ 1150 /* Update valid block bitmap */
997 if (del > 0) { 1151 if (del > 0) {
998 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) 1152 if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
1153#ifdef CONFIG_F2FS_CHECK_FS
1154 if (f2fs_test_and_set_bit(offset,
1155 se->cur_valid_map_mir))
1156 f2fs_bug_on(sbi, 1);
1157 else
1158 WARN_ON(1);
1159#else
999 f2fs_bug_on(sbi, 1); 1160 f2fs_bug_on(sbi, 1);
1161#endif
1162 }
1000 if (f2fs_discard_en(sbi) && 1163 if (f2fs_discard_en(sbi) &&
1001 !f2fs_test_and_set_bit(offset, se->discard_map)) 1164 !f2fs_test_and_set_bit(offset, se->discard_map))
1002 sbi->discard_blks--; 1165 sbi->discard_blks--;
1003 } else { 1166 } else {
1004 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) 1167 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
1168#ifdef CONFIG_F2FS_CHECK_FS
1169 if (!f2fs_test_and_clear_bit(offset,
1170 se->cur_valid_map_mir))
1171 f2fs_bug_on(sbi, 1);
1172 else
1173 WARN_ON(1);
1174#else
1005 f2fs_bug_on(sbi, 1); 1175 f2fs_bug_on(sbi, 1);
1176#endif
1177 }
1006 if (f2fs_discard_en(sbi) && 1178 if (f2fs_discard_en(sbi) &&
1007 f2fs_test_and_clear_bit(offset, se->discard_map)) 1179 f2fs_test_and_clear_bit(offset, se->discard_map))
1008 sbi->discard_blks++; 1180 sbi->discard_blks++;
@@ -1167,17 +1339,6 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
1167 f2fs_put_page(page, 1); 1339 f2fs_put_page(page, 1);
1168} 1340}
1169 1341
1170static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
1171{
1172 struct curseg_info *curseg = CURSEG_I(sbi, type);
1173 unsigned int segno = curseg->segno + 1;
1174 struct free_segmap_info *free_i = FREE_I(sbi);
1175
1176 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
1177 return !test_bit(segno, free_i->free_segmap);
1178 return 0;
1179}
1180
1181/* 1342/*
1182 * Find a new segment from the free segments bitmap to right order 1343 * Find a new segment from the free segments bitmap to right order
1183 * This function should be returned with success, otherwise BUG 1344 * This function should be returned with success, otherwise BUG
@@ -1382,16 +1543,39 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1382{ 1543{
1383 struct curseg_info *curseg = CURSEG_I(sbi, type); 1544 struct curseg_info *curseg = CURSEG_I(sbi, type);
1384 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; 1545 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1546 int i, cnt;
1547 bool reversed = false;
1548
1549 /* need_SSR() already forces to do this */
1550 if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
1551 return 1;
1385 1552
1386 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) 1553 /* For node segments, let's do SSR more intensively */
1387 return v_ops->get_victim(sbi, 1554 if (IS_NODESEG(type)) {
1388 &(curseg)->next_segno, BG_GC, type, SSR); 1555 if (type >= CURSEG_WARM_NODE) {
1556 reversed = true;
1557 i = CURSEG_COLD_NODE;
1558 } else {
1559 i = CURSEG_HOT_NODE;
1560 }
1561 cnt = NR_CURSEG_NODE_TYPE;
1562 } else {
1563 if (type >= CURSEG_WARM_DATA) {
1564 reversed = true;
1565 i = CURSEG_COLD_DATA;
1566 } else {
1567 i = CURSEG_HOT_DATA;
1568 }
1569 cnt = NR_CURSEG_DATA_TYPE;
1570 }
1389 1571
1390 /* For data segments, let's do SSR more intensively */ 1572 for (; cnt-- > 0; reversed ? i-- : i++) {
1391 for (; type >= CURSEG_HOT_DATA; type--) 1573 if (i == type)
1574 continue;
1392 if (v_ops->get_victim(sbi, &(curseg)->next_segno, 1575 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1393 BG_GC, type, SSR)) 1576 BG_GC, i, SSR))
1394 return 1; 1577 return 1;
1578 }
1395 return 0; 1579 return 0;
1396} 1580}
1397 1581
@@ -1402,20 +1586,17 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1402static void allocate_segment_by_default(struct f2fs_sb_info *sbi, 1586static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1403 int type, bool force) 1587 int type, bool force)
1404{ 1588{
1405 struct curseg_info *curseg = CURSEG_I(sbi, type);
1406
1407 if (force) 1589 if (force)
1408 new_curseg(sbi, type, true); 1590 new_curseg(sbi, type, true);
1409 else if (type == CURSEG_WARM_NODE) 1591 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
1410 new_curseg(sbi, type, false); 1592 type == CURSEG_WARM_NODE)
1411 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1412 new_curseg(sbi, type, false); 1593 new_curseg(sbi, type, false);
1413 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 1594 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1414 change_curseg(sbi, type, true); 1595 change_curseg(sbi, type, true);
1415 else 1596 else
1416 new_curseg(sbi, type, false); 1597 new_curseg(sbi, type, false);
1417 1598
1418 stat_inc_seg_type(sbi, curseg); 1599 stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
1419} 1600}
1420 1601
1421void allocate_new_segments(struct f2fs_sb_info *sbi) 1602void allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -1424,9 +1605,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
1424 unsigned int old_segno; 1605 unsigned int old_segno;
1425 int i; 1606 int i;
1426 1607
1427 if (test_opt(sbi, LFS))
1428 return;
1429
1430 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1608 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1431 curseg = CURSEG_I(sbi, i); 1609 curseg = CURSEG_I(sbi, i);
1432 old_segno = curseg->segno; 1610 old_segno = curseg->segno;
@@ -1439,6 +1617,24 @@ static const struct segment_allocation default_salloc_ops = {
1439 .allocate_segment = allocate_segment_by_default, 1617 .allocate_segment = allocate_segment_by_default,
1440}; 1618};
1441 1619
1620bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1621{
1622 __u64 trim_start = cpc->trim_start;
1623 bool has_candidate = false;
1624
1625 mutex_lock(&SIT_I(sbi)->sentry_lock);
1626 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
1627 if (add_discard_addrs(sbi, cpc, true)) {
1628 has_candidate = true;
1629 break;
1630 }
1631 }
1632 mutex_unlock(&SIT_I(sbi)->sentry_lock);
1633
1634 cpc->trim_start = trim_start;
1635 return has_candidate;
1636}
1637
1442int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 1638int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1443{ 1639{
1444 __u64 start = F2FS_BYTES_TO_BLK(range->start); 1640 __u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -1573,6 +1769,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1573 1769
1574 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1770 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1575 1771
1772 f2fs_wait_discard_bio(sbi, *new_blkaddr);
1773
1576 /* 1774 /*
1577 * __add_sum_entry should be resided under the curseg_mutex 1775 * __add_sum_entry should be resided under the curseg_mutex
1578 * because, this function updates a summary entry in the 1776 * because, this function updates a summary entry in the
@@ -1584,14 +1782,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1584 1782
1585 stat_inc_block_count(sbi, curseg); 1783 stat_inc_block_count(sbi, curseg);
1586 1784
1587 if (!__has_curseg_space(sbi, type))
1588 sit_i->s_ops->allocate_segment(sbi, type, false);
1589 /* 1785 /*
1590 * SIT information should be updated before segment allocation, 1786 * SIT information should be updated before segment allocation,
1591 * since SSR needs latest valid block information. 1787 * since SSR needs latest valid block information.
1592 */ 1788 */
1593 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 1789 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1594 1790
1791 if (!__has_curseg_space(sbi, type))
1792 sit_i->s_ops->allocate_segment(sbi, type, false);
1793
1595 mutex_unlock(&sit_i->sentry_lock); 1794 mutex_unlock(&sit_i->sentry_lock);
1596 1795
1597 if (page && IS_NODESEG(type)) 1796 if (page && IS_NODESEG(type))
@@ -1603,15 +1802,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1603static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 1802static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1604{ 1803{
1605 int type = __get_segment_type(fio->page, fio->type); 1804 int type = __get_segment_type(fio->page, fio->type);
1805 int err;
1606 1806
1607 if (fio->type == NODE || fio->type == DATA) 1807 if (fio->type == NODE || fio->type == DATA)
1608 mutex_lock(&fio->sbi->wio_mutex[fio->type]); 1808 mutex_lock(&fio->sbi->wio_mutex[fio->type]);
1609 1809reallocate:
1610 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, 1810 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
1611 &fio->new_blkaddr, sum, type); 1811 &fio->new_blkaddr, sum, type);
1612 1812
1613 /* writeout dirty page into bdev */ 1813 /* writeout dirty page into bdev */
1614 f2fs_submit_page_mbio(fio); 1814 err = f2fs_submit_page_mbio(fio);
1815 if (err == -EAGAIN) {
1816 fio->old_blkaddr = fio->new_blkaddr;
1817 goto reallocate;
1818 }
1615 1819
1616 if (fio->type == NODE || fio->type == DATA) 1820 if (fio->type == NODE || fio->type == DATA)
1617 mutex_unlock(&fio->sbi->wio_mutex[fio->type]); 1821 mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
@@ -1753,7 +1957,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
1753 if (PageWriteback(page)) { 1957 if (PageWriteback(page)) {
1754 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1958 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1755 1959
1756 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); 1960 f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
1961 0, page->index, type, WRITE);
1757 if (ordered) 1962 if (ordered)
1758 wait_on_page_writeback(page); 1963 wait_on_page_writeback(page);
1759 else 1964 else
@@ -2228,7 +2433,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2228 /* add discard candidates */ 2433 /* add discard candidates */
2229 if (cpc->reason != CP_DISCARD) { 2434 if (cpc->reason != CP_DISCARD) {
2230 cpc->trim_start = segno; 2435 cpc->trim_start = segno;
2231 add_discard_addrs(sbi, cpc); 2436 add_discard_addrs(sbi, cpc, false);
2232 } 2437 }
2233 2438
2234 if (to_journal) { 2439 if (to_journal) {
@@ -2263,8 +2468,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2263 f2fs_bug_on(sbi, sit_i->dirty_sentries); 2468 f2fs_bug_on(sbi, sit_i->dirty_sentries);
2264out: 2469out:
2265 if (cpc->reason == CP_DISCARD) { 2470 if (cpc->reason == CP_DISCARD) {
2471 __u64 trim_start = cpc->trim_start;
2472
2266 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) 2473 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
2267 add_discard_addrs(sbi, cpc); 2474 add_discard_addrs(sbi, cpc, false);
2475
2476 cpc->trim_start = trim_start;
2268 } 2477 }
2269 mutex_unlock(&sit_i->sentry_lock); 2478 mutex_unlock(&sit_i->sentry_lock);
2270 2479
@@ -2276,7 +2485,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2276 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 2485 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2277 struct sit_info *sit_i; 2486 struct sit_info *sit_i;
2278 unsigned int sit_segs, start; 2487 unsigned int sit_segs, start;
2279 char *src_bitmap, *dst_bitmap; 2488 char *src_bitmap;
2280 unsigned int bitmap_size; 2489 unsigned int bitmap_size;
2281 2490
2282 /* allocate memory for SIT information */ 2491 /* allocate memory for SIT information */
@@ -2305,6 +2514,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2305 !sit_i->sentries[start].ckpt_valid_map) 2514 !sit_i->sentries[start].ckpt_valid_map)
2306 return -ENOMEM; 2515 return -ENOMEM;
2307 2516
2517#ifdef CONFIG_F2FS_CHECK_FS
2518 sit_i->sentries[start].cur_valid_map_mir
2519 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2520 if (!sit_i->sentries[start].cur_valid_map_mir)
2521 return -ENOMEM;
2522#endif
2523
2308 if (f2fs_discard_en(sbi)) { 2524 if (f2fs_discard_en(sbi)) {
2309 sit_i->sentries[start].discard_map 2525 sit_i->sentries[start].discard_map
2310 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); 2526 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2331,17 +2547,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2331 bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 2547 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2332 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 2548 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2333 2549
2334 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL); 2550 sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2335 if (!dst_bitmap) 2551 if (!sit_i->sit_bitmap)
2336 return -ENOMEM; 2552 return -ENOMEM;
2337 2553
2554#ifdef CONFIG_F2FS_CHECK_FS
2555 sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2556 if (!sit_i->sit_bitmap_mir)
2557 return -ENOMEM;
2558#endif
2559
2338 /* init SIT information */ 2560 /* init SIT information */
2339 sit_i->s_ops = &default_salloc_ops; 2561 sit_i->s_ops = &default_salloc_ops;
2340 2562
2341 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 2563 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2342 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 2564 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2343 sit_i->written_valid_blocks = 0; 2565 sit_i->written_valid_blocks = 0;
2344 sit_i->sit_bitmap = dst_bitmap;
2345 sit_i->bitmap_size = bitmap_size; 2566 sit_i->bitmap_size = bitmap_size;
2346 sit_i->dirty_sentries = 0; 2567 sit_i->dirty_sentries = 0;
2347 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 2568 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
@@ -2626,11 +2847,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
2626 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 2847 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2627 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 2848 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2628 2849
2629 INIT_LIST_HEAD(&sm_info->discard_list);
2630 INIT_LIST_HEAD(&sm_info->wait_list);
2631 sm_info->nr_discards = 0;
2632 sm_info->max_discards = 0;
2633
2634 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 2850 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2635 2851
2636 INIT_LIST_HEAD(&sm_info->sit_entry_set); 2852 INIT_LIST_HEAD(&sm_info->sit_entry_set);
@@ -2641,6 +2857,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
2641 return err; 2857 return err;
2642 } 2858 }
2643 2859
2860 err = create_discard_cmd_control(sbi);
2861 if (err)
2862 return err;
2863
2644 err = build_sit_info(sbi); 2864 err = build_sit_info(sbi);
2645 if (err) 2865 if (err)
2646 return err; 2866 return err;
@@ -2734,6 +2954,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
2734 if (sit_i->sentries) { 2954 if (sit_i->sentries) {
2735 for (start = 0; start < MAIN_SEGS(sbi); start++) { 2955 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2736 kfree(sit_i->sentries[start].cur_valid_map); 2956 kfree(sit_i->sentries[start].cur_valid_map);
2957#ifdef CONFIG_F2FS_CHECK_FS
2958 kfree(sit_i->sentries[start].cur_valid_map_mir);
2959#endif
2737 kfree(sit_i->sentries[start].ckpt_valid_map); 2960 kfree(sit_i->sentries[start].ckpt_valid_map);
2738 kfree(sit_i->sentries[start].discard_map); 2961 kfree(sit_i->sentries[start].discard_map);
2739 } 2962 }
@@ -2746,6 +2969,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
2746 2969
2747 SM_I(sbi)->sit_info = NULL; 2970 SM_I(sbi)->sit_info = NULL;
2748 kfree(sit_i->sit_bitmap); 2971 kfree(sit_i->sit_bitmap);
2972#ifdef CONFIG_F2FS_CHECK_FS
2973 kfree(sit_i->sit_bitmap_mir);
2974#endif
2749 kfree(sit_i); 2975 kfree(sit_i);
2750} 2976}
2751 2977
@@ -2756,6 +2982,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
2756 if (!sm_info) 2982 if (!sm_info)
2757 return; 2983 return;
2758 destroy_flush_cmd_control(sbi, true); 2984 destroy_flush_cmd_control(sbi, true);
2985 destroy_discard_cmd_control(sbi, true);
2759 destroy_dirty_segmap(sbi); 2986 destroy_dirty_segmap(sbi);
2760 destroy_curseg(sbi); 2987 destroy_curseg(sbi);
2761 destroy_free_segmap(sbi); 2988 destroy_free_segmap(sbi);
@@ -2771,15 +2998,15 @@ int __init create_segment_manager_caches(void)
2771 if (!discard_entry_slab) 2998 if (!discard_entry_slab)
2772 goto fail; 2999 goto fail;
2773 3000
2774 bio_entry_slab = f2fs_kmem_cache_create("bio_entry", 3001 discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
2775 sizeof(struct bio_entry)); 3002 sizeof(struct discard_cmd));
2776 if (!bio_entry_slab) 3003 if (!discard_cmd_slab)
2777 goto destroy_discard_entry; 3004 goto destroy_discard_entry;
2778 3005
2779 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", 3006 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2780 sizeof(struct sit_entry_set)); 3007 sizeof(struct sit_entry_set));
2781 if (!sit_entry_set_slab) 3008 if (!sit_entry_set_slab)
2782 goto destroy_bio_entry; 3009 goto destroy_discard_cmd;
2783 3010
2784 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", 3011 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2785 sizeof(struct inmem_pages)); 3012 sizeof(struct inmem_pages));
@@ -2789,8 +3016,8 @@ int __init create_segment_manager_caches(void)
2789 3016
2790destroy_sit_entry_set: 3017destroy_sit_entry_set:
2791 kmem_cache_destroy(sit_entry_set_slab); 3018 kmem_cache_destroy(sit_entry_set_slab);
2792destroy_bio_entry: 3019destroy_discard_cmd:
2793 kmem_cache_destroy(bio_entry_slab); 3020 kmem_cache_destroy(discard_cmd_slab);
2794destroy_discard_entry: 3021destroy_discard_entry:
2795 kmem_cache_destroy(discard_entry_slab); 3022 kmem_cache_destroy(discard_entry_slab);
2796fail: 3023fail:
@@ -2800,7 +3027,7 @@ fail:
2800void destroy_segment_manager_caches(void) 3027void destroy_segment_manager_caches(void)
2801{ 3028{
2802 kmem_cache_destroy(sit_entry_set_slab); 3029 kmem_cache_destroy(sit_entry_set_slab);
2803 kmem_cache_destroy(bio_entry_slab); 3030 kmem_cache_destroy(discard_cmd_slab);
2804 kmem_cache_destroy(discard_entry_slab); 3031 kmem_cache_destroy(discard_entry_slab);
2805 kmem_cache_destroy(inmem_entry_slab); 3032 kmem_cache_destroy(inmem_entry_slab);
2806} 3033}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce83acb2..5e8ad4280a50 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
164 unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */ 164 unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
165 unsigned int padding:6; /* padding */ 165 unsigned int padding:6; /* padding */
166 unsigned char *cur_valid_map; /* validity bitmap of blocks */ 166 unsigned char *cur_valid_map; /* validity bitmap of blocks */
167#ifdef CONFIG_F2FS_CHECK_FS
168 unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
169#endif
167 /* 170 /*
168 * # of valid blocks and the validity bitmap stored in the the last 171 * # of valid blocks and the validity bitmap stored in the the last
169 * checkpoint pack. This information is used by the SSR mode. 172 * checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +189,12 @@ struct segment_allocation {
186 * the page is atomically written, and it is in inmem_pages list. 189 * the page is atomically written, and it is in inmem_pages list.
187 */ 190 */
188#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) 191#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
192#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
189 193
190#define IS_ATOMIC_WRITTEN_PAGE(page) \ 194#define IS_ATOMIC_WRITTEN_PAGE(page) \
191 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) 195 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
196#define IS_DUMMY_WRITTEN_PAGE(page) \
197 (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
192 198
193struct inmem_pages { 199struct inmem_pages {
194 struct list_head list; 200 struct list_head list;
@@ -203,6 +209,9 @@ struct sit_info {
203 block_t sit_blocks; /* # of blocks used by SIT area */ 209 block_t sit_blocks; /* # of blocks used by SIT area */
204 block_t written_valid_blocks; /* # of valid blocks in main area */ 210 block_t written_valid_blocks; /* # of valid blocks in main area */
205 char *sit_bitmap; /* SIT bitmap pointer */ 211 char *sit_bitmap; /* SIT bitmap pointer */
212#ifdef CONFIG_F2FS_CHECK_FS
213 char *sit_bitmap_mir; /* SIT bitmap mirror */
214#endif
206 unsigned int bitmap_size; /* SIT bitmap size */ 215 unsigned int bitmap_size; /* SIT bitmap size */
207 216
208 unsigned long *tmp_map; /* bitmap for temporal use */ 217 unsigned long *tmp_map; /* bitmap for temporal use */
@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
317 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); 326 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
318 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); 327 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
319 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); 328 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
329#ifdef CONFIG_F2FS_CHECK_FS
330 memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
331#endif
320 se->type = GET_SIT_TYPE(rs); 332 se->type = GET_SIT_TYPE(rs);
321 se->mtime = le64_to_cpu(rs->mtime); 333 se->mtime = le64_to_cpu(rs->mtime);
322} 334}
@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
414 void *dst_addr) 426 void *dst_addr)
415{ 427{
416 struct sit_info *sit_i = SIT_I(sbi); 428 struct sit_info *sit_i = SIT_I(sbi);
429
430#ifdef CONFIG_F2FS_CHECK_FS
431 if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
432 sit_i->bitmap_size))
433 f2fs_bug_on(sbi, 1);
434#endif
417 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); 435 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
418} 436}
419 437
@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
634 652
635 check_seg_range(sbi, start); 653 check_seg_range(sbi, start);
636 654
655#ifdef CONFIG_F2FS_CHECK_FS
656 if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
657 f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
658 f2fs_bug_on(sbi, 1);
659#endif
660
637 /* calculate sit block address */ 661 /* calculate sit block address */
638 if (f2fs_test_bit(offset, sit_i->sit_bitmap)) 662 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
639 blk_addr += sit_i->sit_blocks; 663 blk_addr += sit_i->sit_blocks;
@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
659 unsigned int block_off = SIT_BLOCK_OFFSET(start); 683 unsigned int block_off = SIT_BLOCK_OFFSET(start);
660 684
661 f2fs_change_bit(block_off, sit_i->sit_bitmap); 685 f2fs_change_bit(block_off, sit_i->sit_bitmap);
686#ifdef CONFIG_F2FS_CHECK_FS
687 f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
688#endif
662} 689}
663 690
664static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) 691static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -689,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
689 - (base + 1) + type; 716 - (base + 1) + type;
690} 717}
691 718
719static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
720 unsigned int secno)
721{
722 if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
723 sbi->fggc_threshold)
724 return true;
725 return false;
726}
727
692static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno) 728static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
693{ 729{
694 if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno)) 730 if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -700,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
700 * It is very important to gather dirty pages and write at once, so that we can 736 * It is very important to gather dirty pages and write at once, so that we can
701 * submit a big bio without interfering other data writes. 737 * submit a big bio without interfering other data writes.
702 * By default, 512 pages for directory data, 738 * By default, 512 pages for directory data,
703 * 512 pages (2MB) * 3 for three types of nodes, and 739 * 512 pages (2MB) * 8 for nodes, and
704 * max_bio_blocks for meta are set. 740 * 256 pages * 8 for meta are set.
705 */ 741 */
706static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) 742static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
707{ 743{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a831303bb777..96fe8ed73100 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,6 +89,7 @@ enum {
89 Opt_active_logs, 89 Opt_active_logs,
90 Opt_disable_ext_identify, 90 Opt_disable_ext_identify,
91 Opt_inline_xattr, 91 Opt_inline_xattr,
92 Opt_noinline_xattr,
92 Opt_inline_data, 93 Opt_inline_data,
93 Opt_inline_dentry, 94 Opt_inline_dentry,
94 Opt_noinline_dentry, 95 Opt_noinline_dentry,
@@ -101,6 +102,7 @@ enum {
101 Opt_noinline_data, 102 Opt_noinline_data,
102 Opt_data_flush, 103 Opt_data_flush,
103 Opt_mode, 104 Opt_mode,
105 Opt_io_size_bits,
104 Opt_fault_injection, 106 Opt_fault_injection,
105 Opt_lazytime, 107 Opt_lazytime,
106 Opt_nolazytime, 108 Opt_nolazytime,
@@ -121,6 +123,7 @@ static match_table_t f2fs_tokens = {
121 {Opt_active_logs, "active_logs=%u"}, 123 {Opt_active_logs, "active_logs=%u"},
122 {Opt_disable_ext_identify, "disable_ext_identify"}, 124 {Opt_disable_ext_identify, "disable_ext_identify"},
123 {Opt_inline_xattr, "inline_xattr"}, 125 {Opt_inline_xattr, "inline_xattr"},
126 {Opt_noinline_xattr, "noinline_xattr"},
124 {Opt_inline_data, "inline_data"}, 127 {Opt_inline_data, "inline_data"},
125 {Opt_inline_dentry, "inline_dentry"}, 128 {Opt_inline_dentry, "inline_dentry"},
126 {Opt_noinline_dentry, "noinline_dentry"}, 129 {Opt_noinline_dentry, "noinline_dentry"},
@@ -133,6 +136,7 @@ static match_table_t f2fs_tokens = {
133 {Opt_noinline_data, "noinline_data"}, 136 {Opt_noinline_data, "noinline_data"},
134 {Opt_data_flush, "data_flush"}, 137 {Opt_data_flush, "data_flush"},
135 {Opt_mode, "mode=%s"}, 138 {Opt_mode, "mode=%s"},
139 {Opt_io_size_bits, "io_bits=%u"},
136 {Opt_fault_injection, "fault_injection=%u"}, 140 {Opt_fault_injection, "fault_injection=%u"},
137 {Opt_lazytime, "lazytime"}, 141 {Opt_lazytime, "lazytime"},
138 {Opt_nolazytime, "nolazytime"}, 142 {Opt_nolazytime, "nolazytime"},
@@ -143,6 +147,7 @@ static match_table_t f2fs_tokens = {
143enum { 147enum {
144 GC_THREAD, /* struct f2fs_gc_thread */ 148 GC_THREAD, /* struct f2fs_gc_thread */
145 SM_INFO, /* struct f2fs_sm_info */ 149 SM_INFO, /* struct f2fs_sm_info */
150 DCC_INFO, /* struct discard_cmd_control */
146 NM_INFO, /* struct f2fs_nm_info */ 151 NM_INFO, /* struct f2fs_nm_info */
147 F2FS_SBI, /* struct f2fs_sb_info */ 152 F2FS_SBI, /* struct f2fs_sb_info */
148#ifdef CONFIG_F2FS_FAULT_INJECTION 153#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +171,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
166 return (unsigned char *)sbi->gc_thread; 171 return (unsigned char *)sbi->gc_thread;
167 else if (struct_type == SM_INFO) 172 else if (struct_type == SM_INFO)
168 return (unsigned char *)SM_I(sbi); 173 return (unsigned char *)SM_I(sbi);
174 else if (struct_type == DCC_INFO)
175 return (unsigned char *)SM_I(sbi)->dcc_info;
169 else if (struct_type == NM_INFO) 176 else if (struct_type == NM_INFO)
170 return (unsigned char *)NM_I(sbi); 177 return (unsigned char *)NM_I(sbi);
171 else if (struct_type == F2FS_SBI) 178 else if (struct_type == F2FS_SBI)
@@ -281,7 +288,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
281F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); 288F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
282F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); 289F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
283F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); 290F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
284F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 291F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
285F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); 292F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
286F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 293F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
287F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 294F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -439,6 +446,9 @@ static int parse_options(struct super_block *sb, char *options)
439 case Opt_inline_xattr: 446 case Opt_inline_xattr:
440 set_opt(sbi, INLINE_XATTR); 447 set_opt(sbi, INLINE_XATTR);
441 break; 448 break;
449 case Opt_noinline_xattr:
450 clear_opt(sbi, INLINE_XATTR);
451 break;
442#else 452#else
443 case Opt_user_xattr: 453 case Opt_user_xattr:
444 f2fs_msg(sb, KERN_INFO, 454 f2fs_msg(sb, KERN_INFO,
@@ -452,6 +462,10 @@ static int parse_options(struct super_block *sb, char *options)
452 f2fs_msg(sb, KERN_INFO, 462 f2fs_msg(sb, KERN_INFO,
453 "inline_xattr options not supported"); 463 "inline_xattr options not supported");
454 break; 464 break;
465 case Opt_noinline_xattr:
466 f2fs_msg(sb, KERN_INFO,
467 "noinline_xattr options not supported");
468 break;
455#endif 469#endif
456#ifdef CONFIG_F2FS_FS_POSIX_ACL 470#ifdef CONFIG_F2FS_FS_POSIX_ACL
457 case Opt_acl: 471 case Opt_acl:
@@ -535,11 +549,23 @@ static int parse_options(struct super_block *sb, char *options)
535 } 549 }
536 kfree(name); 550 kfree(name);
537 break; 551 break;
552 case Opt_io_size_bits:
553 if (args->from && match_int(args, &arg))
554 return -EINVAL;
555 if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
556 f2fs_msg(sb, KERN_WARNING,
557 "Not support %d, larger than %d",
558 1 << arg, BIO_MAX_PAGES);
559 return -EINVAL;
560 }
561 sbi->write_io_size_bits = arg;
562 break;
538 case Opt_fault_injection: 563 case Opt_fault_injection:
539 if (args->from && match_int(args, &arg)) 564 if (args->from && match_int(args, &arg))
540 return -EINVAL; 565 return -EINVAL;
541#ifdef CONFIG_F2FS_FAULT_INJECTION 566#ifdef CONFIG_F2FS_FAULT_INJECTION
542 f2fs_build_fault_attr(sbi, arg); 567 f2fs_build_fault_attr(sbi, arg);
568 set_opt(sbi, FAULT_INJECTION);
543#else 569#else
544 f2fs_msg(sb, KERN_INFO, 570 f2fs_msg(sb, KERN_INFO,
545 "FAULT_INJECTION was not selected"); 571 "FAULT_INJECTION was not selected");
@@ -558,6 +584,13 @@ static int parse_options(struct super_block *sb, char *options)
558 return -EINVAL; 584 return -EINVAL;
559 } 585 }
560 } 586 }
587
588 if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
589 f2fs_msg(sb, KERN_ERR,
590 "Should set mode=lfs with %uKB-sized IO",
591 F2FS_IO_SIZE_KB(sbi));
592 return -EINVAL;
593 }
561 return 0; 594 return 0;
562} 595}
563 596
@@ -591,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
591 624
592static int f2fs_drop_inode(struct inode *inode) 625static int f2fs_drop_inode(struct inode *inode)
593{ 626{
627 int ret;
594 /* 628 /*
595 * This is to avoid a deadlock condition like below. 629 * This is to avoid a deadlock condition like below.
596 * writeback_single_inode(inode) 630 * writeback_single_inode(inode)
@@ -623,10 +657,12 @@ static int f2fs_drop_inode(struct inode *inode)
623 spin_lock(&inode->i_lock); 657 spin_lock(&inode->i_lock);
624 atomic_dec(&inode->i_count); 658 atomic_dec(&inode->i_count);
625 } 659 }
660 trace_f2fs_drop_inode(inode, 0);
626 return 0; 661 return 0;
627 } 662 }
628 663 ret = generic_drop_inode(inode);
629 return generic_drop_inode(inode); 664 trace_f2fs_drop_inode(inode, ret);
665 return ret;
630} 666}
631 667
632int f2fs_inode_dirtied(struct inode *inode, bool sync) 668int f2fs_inode_dirtied(struct inode *inode, bool sync)
@@ -750,6 +786,9 @@ static void f2fs_put_super(struct super_block *sb)
750 write_checkpoint(sbi, &cpc); 786 write_checkpoint(sbi, &cpc);
751 } 787 }
752 788
789 /* be sure to wait for any on-going discard commands */
790 f2fs_wait_discard_bio(sbi, NULL_ADDR);
791
753 /* write_checkpoint can update stat informaion */ 792 /* write_checkpoint can update stat informaion */
754 f2fs_destroy_stats(sbi); 793 f2fs_destroy_stats(sbi);
755 794
@@ -782,7 +821,7 @@ static void f2fs_put_super(struct super_block *sb)
782 kfree(sbi->raw_super); 821 kfree(sbi->raw_super);
783 822
784 destroy_device_list(sbi); 823 destroy_device_list(sbi);
785 824 mempool_destroy(sbi->write_io_dummy);
786 destroy_percpu_info(sbi); 825 destroy_percpu_info(sbi);
787 kfree(sbi); 826 kfree(sbi);
788} 827}
@@ -882,6 +921,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
882 seq_puts(seq, ",nouser_xattr"); 921 seq_puts(seq, ",nouser_xattr");
883 if (test_opt(sbi, INLINE_XATTR)) 922 if (test_opt(sbi, INLINE_XATTR))
884 seq_puts(seq, ",inline_xattr"); 923 seq_puts(seq, ",inline_xattr");
924 else
925 seq_puts(seq, ",noinline_xattr");
885#endif 926#endif
886#ifdef CONFIG_F2FS_FS_POSIX_ACL 927#ifdef CONFIG_F2FS_FS_POSIX_ACL
887 if (test_opt(sbi, POSIX_ACL)) 928 if (test_opt(sbi, POSIX_ACL))
@@ -918,6 +959,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
918 else if (test_opt(sbi, LFS)) 959 else if (test_opt(sbi, LFS))
919 seq_puts(seq, "lfs"); 960 seq_puts(seq, "lfs");
920 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 961 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
962 if (F2FS_IO_SIZE_BITS(sbi))
963 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
964#ifdef CONFIG_F2FS_FAULT_INJECTION
965 if (test_opt(sbi, FAULT_INJECTION))
966 seq_puts(seq, ",fault_injection");
967#endif
921 968
922 return 0; 969 return 0;
923} 970}
@@ -995,6 +1042,7 @@ static void default_options(struct f2fs_sb_info *sbi)
995 sbi->active_logs = NR_CURSEG_TYPE; 1042 sbi->active_logs = NR_CURSEG_TYPE;
996 1043
997 set_opt(sbi, BG_GC); 1044 set_opt(sbi, BG_GC);
1045 set_opt(sbi, INLINE_XATTR);
998 set_opt(sbi, INLINE_DATA); 1046 set_opt(sbi, INLINE_DATA);
999 set_opt(sbi, INLINE_DENTRY); 1047 set_opt(sbi, INLINE_DENTRY);
1000 set_opt(sbi, EXTENT_CACHE); 1048 set_opt(sbi, EXTENT_CACHE);
@@ -1686,36 +1734,55 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1686static int f2fs_scan_devices(struct f2fs_sb_info *sbi) 1734static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1687{ 1735{
1688 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1736 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1737 unsigned int max_devices = MAX_DEVICES;
1689 int i; 1738 int i;
1690 1739
1691 for (i = 0; i < MAX_DEVICES; i++) { 1740 /* Initialize single device information */
1692 if (!RDEV(i).path[0]) 1741 if (!RDEV(0).path[0]) {
1742 if (!bdev_is_zoned(sbi->sb->s_bdev))
1693 return 0; 1743 return 0;
1744 max_devices = 1;
1745 }
1694 1746
1695 if (i == 0) { 1747 /*
1696 sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) * 1748 * Initialize multiple devices information, or single
1697 MAX_DEVICES, GFP_KERNEL); 1749 * zoned block device information.
1698 if (!sbi->devs) 1750 */
1699 return -ENOMEM; 1751 sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1700 } 1752 GFP_KERNEL);
1753 if (!sbi->devs)
1754 return -ENOMEM;
1701 1755
1702 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); 1756 for (i = 0; i < max_devices; i++) {
1703 FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
1704 if (i == 0) {
1705 FDEV(i).start_blk = 0;
1706 FDEV(i).end_blk = FDEV(i).start_blk +
1707 (FDEV(i).total_segments <<
1708 sbi->log_blocks_per_seg) - 1 +
1709 le32_to_cpu(raw_super->segment0_blkaddr);
1710 } else {
1711 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1712 FDEV(i).end_blk = FDEV(i).start_blk +
1713 (FDEV(i).total_segments <<
1714 sbi->log_blocks_per_seg) - 1;
1715 }
1716 1757
1717 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, 1758 if (i > 0 && !RDEV(i).path[0])
1759 break;
1760
1761 if (max_devices == 1) {
1762 /* Single zoned block device mount */
1763 FDEV(0).bdev =
1764 blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1765 sbi->sb->s_mode, sbi->sb->s_type);
1766 } else {
1767 /* Multi-device mount */
1768 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1769 FDEV(i).total_segments =
1770 le32_to_cpu(RDEV(i).total_segments);
1771 if (i == 0) {
1772 FDEV(i).start_blk = 0;
1773 FDEV(i).end_blk = FDEV(i).start_blk +
1774 (FDEV(i).total_segments <<
1775 sbi->log_blocks_per_seg) - 1 +
1776 le32_to_cpu(raw_super->segment0_blkaddr);
1777 } else {
1778 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1779 FDEV(i).end_blk = FDEV(i).start_blk +
1780 (FDEV(i).total_segments <<
1781 sbi->log_blocks_per_seg) - 1;
1782 }
1783 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1718 sbi->sb->s_mode, sbi->sb->s_type); 1784 sbi->sb->s_mode, sbi->sb->s_type);
1785 }
1719 if (IS_ERR(FDEV(i).bdev)) 1786 if (IS_ERR(FDEV(i).bdev))
1720 return PTR_ERR(FDEV(i).bdev); 1787 return PTR_ERR(FDEV(i).bdev);
1721 1788
@@ -1735,6 +1802,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1735 "Failed to initialize F2FS blkzone information"); 1802 "Failed to initialize F2FS blkzone information");
1736 return -EINVAL; 1803 return -EINVAL;
1737 } 1804 }
1805 if (max_devices == 1)
1806 break;
1738 f2fs_msg(sbi->sb, KERN_INFO, 1807 f2fs_msg(sbi->sb, KERN_INFO,
1739 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", 1808 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1740 i, FDEV(i).path, 1809 i, FDEV(i).path,
@@ -1751,6 +1820,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1751 FDEV(i).total_segments, 1820 FDEV(i).total_segments,
1752 FDEV(i).start_blk, FDEV(i).end_blk); 1821 FDEV(i).start_blk, FDEV(i).end_blk);
1753 } 1822 }
1823 f2fs_msg(sbi->sb, KERN_INFO,
1824 "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1754 return 0; 1825 return 0;
1755} 1826}
1756 1827
@@ -1868,12 +1939,19 @@ try_onemore:
1868 if (err) 1939 if (err)
1869 goto free_options; 1940 goto free_options;
1870 1941
1942 if (F2FS_IO_SIZE(sbi) > 1) {
1943 sbi->write_io_dummy =
1944 mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
1945 if (!sbi->write_io_dummy)
1946 goto free_options;
1947 }
1948
1871 /* get an inode for meta space */ 1949 /* get an inode for meta space */
1872 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 1950 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1873 if (IS_ERR(sbi->meta_inode)) { 1951 if (IS_ERR(sbi->meta_inode)) {
1874 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); 1952 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1875 err = PTR_ERR(sbi->meta_inode); 1953 err = PTR_ERR(sbi->meta_inode);
1876 goto free_options; 1954 goto free_io_dummy;
1877 } 1955 }
1878 1956
1879 err = get_valid_checkpoint(sbi); 1957 err = get_valid_checkpoint(sbi);
@@ -2048,6 +2126,8 @@ skip_recovery:
2048 sbi->valid_super_block ? 1 : 2, err); 2126 sbi->valid_super_block ? 1 : 2, err);
2049 } 2127 }
2050 2128
2129 f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
2130 cur_cp_version(F2FS_CKPT(sbi)));
2051 f2fs_update_time(sbi, CP_TIME); 2131 f2fs_update_time(sbi, CP_TIME);
2052 f2fs_update_time(sbi, REQ_TIME); 2132 f2fs_update_time(sbi, REQ_TIME);
2053 return 0; 2133 return 0;
@@ -2091,6 +2171,8 @@ free_devices:
2091free_meta_inode: 2171free_meta_inode:
2092 make_bad_inode(sbi->meta_inode); 2172 make_bad_inode(sbi->meta_inode);
2093 iput(sbi->meta_inode); 2173 iput(sbi->meta_inode);
2174free_io_dummy:
2175 mempool_destroy(sbi->write_io_dummy);
2094free_options: 2176free_options:
2095 destroy_percpu_info(sbi); 2177 destroy_percpu_info(sbi);
2096 kfree(options); 2178 kfree(options);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c47ce2f330a1..7298a4488f7f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
217 return entry; 217 return entry;
218} 218}
219 219
220static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
221 void **last_addr, int index,
222 size_t len, const char *name)
223{
224 struct f2fs_xattr_entry *entry;
225 unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
226
227 list_for_each_xattr(entry, base_addr) {
228 if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
229 (void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
230 base_addr + inline_size) {
231 *last_addr = entry;
232 return NULL;
233 }
234 if (entry->e_name_index != index)
235 continue;
236 if (entry->e_name_len != len)
237 continue;
238 if (!memcmp(entry->e_name, name, len))
239 break;
240 }
241 return entry;
242}
243
244static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
245 unsigned int index, unsigned int len,
246 const char *name, struct f2fs_xattr_entry **xe,
247 void **base_addr)
248{
249 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
250 void *cur_addr, *txattr_addr, *last_addr = NULL;
251 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
252 unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
253 unsigned int inline_size = 0;
254 int err = 0;
255
256 inline_size = inline_xattr_size(inode);
257
258 if (!size && !inline_size)
259 return -ENODATA;
260
261 txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
262 GFP_F2FS_ZERO);
263 if (!txattr_addr)
264 return -ENOMEM;
265
266 /* read from inline xattr */
267 if (inline_size) {
268 struct page *page = NULL;
269 void *inline_addr;
270
271 if (ipage) {
272 inline_addr = inline_xattr_addr(ipage);
273 } else {
274 page = get_node_page(sbi, inode->i_ino);
275 if (IS_ERR(page)) {
276 err = PTR_ERR(page);
277 goto out;
278 }
279 inline_addr = inline_xattr_addr(page);
280 }
281 memcpy(txattr_addr, inline_addr, inline_size);
282 f2fs_put_page(page, 1);
283
284 *xe = __find_inline_xattr(txattr_addr, &last_addr,
285 index, len, name);
286 if (*xe)
287 goto check;
288 }
289
290 /* read from xattr node block */
291 if (xnid) {
292 struct page *xpage;
293 void *xattr_addr;
294
295 /* The inode already has an extended attribute block. */
296 xpage = get_node_page(sbi, xnid);
297 if (IS_ERR(xpage)) {
298 err = PTR_ERR(xpage);
299 goto out;
300 }
301
302 xattr_addr = page_address(xpage);
303 memcpy(txattr_addr + inline_size, xattr_addr, size);
304 f2fs_put_page(xpage, 1);
305 }
306
307 if (last_addr)
308 cur_addr = XATTR_HDR(last_addr) - 1;
309 else
310 cur_addr = txattr_addr;
311
312 *xe = __find_xattr(cur_addr, index, len, name);
313check:
314 if (IS_XATTR_LAST_ENTRY(*xe)) {
315 err = -ENODATA;
316 goto out;
317 }
318
319 *base_addr = txattr_addr;
320 return 0;
321out:
322 kzfree(txattr_addr);
323 return err;
324}
325
220static int read_all_xattrs(struct inode *inode, struct page *ipage, 326static int read_all_xattrs(struct inode *inode, struct page *ipage,
221 void **base_addr) 327 void **base_addr)
222{ 328{
@@ -348,23 +454,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
348 } 454 }
349 455
350 xattr_addr = page_address(xpage); 456 xattr_addr = page_address(xpage);
351 memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - 457 memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
352 sizeof(struct node_footer));
353 set_page_dirty(xpage); 458 set_page_dirty(xpage);
354 f2fs_put_page(xpage, 1); 459 f2fs_put_page(xpage, 1);
355 460
356 /* need to checkpoint during fsync */
357 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
358 return 0; 461 return 0;
359} 462}
360 463
361int f2fs_getxattr(struct inode *inode, int index, const char *name, 464int f2fs_getxattr(struct inode *inode, int index, const char *name,
362 void *buffer, size_t buffer_size, struct page *ipage) 465 void *buffer, size_t buffer_size, struct page *ipage)
363{ 466{
364 struct f2fs_xattr_entry *entry; 467 struct f2fs_xattr_entry *entry = NULL;
365 void *base_addr;
366 int error = 0; 468 int error = 0;
367 size_t size, len; 469 unsigned int size, len;
470 void *base_addr = NULL;
368 471
369 if (name == NULL) 472 if (name == NULL)
370 return -EINVAL; 473 return -EINVAL;
@@ -373,21 +476,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
373 if (len > F2FS_NAME_LEN) 476 if (len > F2FS_NAME_LEN)
374 return -ERANGE; 477 return -ERANGE;
375 478
376 error = read_all_xattrs(inode, ipage, &base_addr); 479 error = lookup_all_xattrs(inode, ipage, index, len, name,
480 &entry, &base_addr);
377 if (error) 481 if (error)
378 return error; 482 return error;
379 483
380 entry = __find_xattr(base_addr, index, len, name);
381 if (IS_XATTR_LAST_ENTRY(entry)) {
382 error = -ENODATA;
383 goto cleanup;
384 }
385
386 size = le16_to_cpu(entry->e_value_size); 484 size = le16_to_cpu(entry->e_value_size);
387 485
388 if (buffer && size > buffer_size) { 486 if (buffer && size > buffer_size) {
389 error = -ERANGE; 487 error = -ERANGE;
390 goto cleanup; 488 goto out;
391 } 489 }
392 490
393 if (buffer) { 491 if (buffer) {
@@ -395,8 +493,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
395 memcpy(buffer, pval, size); 493 memcpy(buffer, pval, size);
396 } 494 }
397 error = size; 495 error = size;
398 496out:
399cleanup:
400 kzfree(base_addr); 497 kzfree(base_addr);
401 return error; 498 return error;
402} 499}
@@ -445,6 +542,13 @@ cleanup:
445 return error; 542 return error;
446} 543}
447 544
545static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
546 const void *value, size_t size)
547{
548 void *pval = entry->e_name + entry->e_name_len;
549 return (entry->e_value_size == size) && !memcmp(pval, value, size);
550}
551
448static int __f2fs_setxattr(struct inode *inode, int index, 552static int __f2fs_setxattr(struct inode *inode, int index,
449 const char *name, const void *value, size_t size, 553 const char *name, const void *value, size_t size,
450 struct page *ipage, int flags) 554 struct page *ipage, int flags)
@@ -479,12 +583,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
479 583
480 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; 584 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
481 585
482 if ((flags & XATTR_REPLACE) && !found) { 586 if (found) {
587 if ((flags & XATTR_CREATE)) {
588 error = -EEXIST;
589 goto exit;
590 }
591
592 if (f2fs_xattr_value_same(here, value, size))
593 goto exit;
594 } else if ((flags & XATTR_REPLACE)) {
483 error = -ENODATA; 595 error = -ENODATA;
484 goto exit; 596 goto exit;
485 } else if ((flags & XATTR_CREATE) && found) {
486 error = -EEXIST;
487 goto exit;
488 } 597 }
489 598
490 last = here; 599 last = here;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index f990de20cdcd..d5a94928c116 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
72 for (entry = XATTR_FIRST_ENTRY(addr);\ 72 for (entry = XATTR_FIRST_ENTRY(addr);\
73 !IS_XATTR_LAST_ENTRY(entry);\ 73 !IS_XATTR_LAST_ENTRY(entry);\
74 entry = XATTR_NEXT_ENTRY(entry)) 74 entry = XATTR_NEXT_ENTRY(entry))
75 75#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
76#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ 76#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
77 sizeof(struct node_footer) - sizeof(__u32)) 77#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
78 VALID_XATTR_BLOCK_SIZE)
78 79
79#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ 80#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
80 sizeof(struct f2fs_xattr_header) - \ 81 sizeof(struct f2fs_xattr_header) - \