aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-08 18:11:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-08 18:11:48 -0400
commit942d33da999b86821c9aee9615fcb81207ee04c7 (patch)
treedb14ab92982f936c0a2ea2202f5e301310f33bdd /fs/f2fs
parent246e6a0d781091c4657890ffa497c2576bd99095 (diff)
parent59bbd474abb9dd6a0c1a74df758ec29c7a8b150f (diff)
Merge tag 'f2fs-for-v3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch-set includes the following major enhancement patches. - introduce a new gloabl lock scheme - add tracepoints on several major functions - fix the overall cleaning process focused on victim selection - apply the block plugging to merge IOs as much as possible - enhance management of free nids and its list - enhance the readahead mode for node pages - address several cretical deadlock conditions - reduce lock_page calls The other minor bug fixes and enhancements are as follows. - calculation mistakes: overflow - bio types: READ, READA, and READ_SYNC - fix the recovery flow, data races, and null pointer errors" * tag 'f2fs-for-v3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (68 commits) f2fs: cover free_nid management with spin_lock f2fs: optimize scan_nat_page() f2fs: code cleanup for scan_nat_page() and build_free_nids() f2fs: bugfix for alloc_nid_failed() f2fs: recover when journal contains deleted files f2fs: continue to mount after failing recovery f2fs: avoid deadlock during evict after f2fs_gc f2fs: modify the number of issued pages to merge IOs f2fs: remove useless #include <linux/proc_fs.h> as we're now using sysfs as debug entry. f2fs: fix inconsistent using of NM_WOUT_THRESHOLD f2fs: check truncation of mapping after lock_page f2fs: enhance alloc_nid and build_free_nids flows f2fs: add a tracepoint on f2fs_new_inode f2fs: check nid == 0 in add_free_nid f2fs: add REQ_META about metadata requests for submit f2fs: give a chance to merge IOs by IO scheduler f2fs: avoid frequent background GC f2fs: add tracepoints to debug checkpoint request f2fs: add tracepoints for write page operations f2fs: add tracepoints to debug the block allocation ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/checkpoint.c63
-rw-r--r--fs/f2fs/data.c202
-rw-r--r--fs/f2fs/debug.c10
-rw-r--r--fs/f2fs/dir.c110
-rw-r--r--fs/f2fs/f2fs.h93
-rw-r--r--fs/f2fs/file.c116
-rw-r--r--fs/f2fs/gc.c123
-rw-r--r--fs/f2fs/gc.h12
-rw-r--r--fs/f2fs/inode.c68
-rw-r--r--fs/f2fs/namei.c80
-rw-r--r--fs/f2fs/node.c411
-rw-r--r--fs/f2fs/node.h20
-rw-r--r--fs/f2fs/recovery.c83
-rw-r--r--fs/f2fs/segment.c137
-rw-r--r--fs/f2fs/segment.h41
-rw-r--r--fs/f2fs/super.c67
-rw-r--r--fs/f2fs/xattr.c28
17 files changed, 982 insertions, 682 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 2b6fc131e2ce..b1de01da1a40 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -20,6 +20,7 @@
20#include "f2fs.h" 20#include "f2fs.h"
21#include "node.h" 21#include "node.h"
22#include "segment.h" 22#include "segment.h"
23#include <trace/events/f2fs.h>
23 24
24static struct kmem_cache *orphan_entry_slab; 25static struct kmem_cache *orphan_entry_slab;
25static struct kmem_cache *inode_entry_slab; 26static struct kmem_cache *inode_entry_slab;
@@ -57,13 +58,19 @@ repeat:
57 cond_resched(); 58 cond_resched();
58 goto repeat; 59 goto repeat;
59 } 60 }
60 if (f2fs_readpage(sbi, page, index, READ_SYNC)) { 61 if (PageUptodate(page))
62 goto out;
63
64 if (f2fs_readpage(sbi, page, index, READ_SYNC))
65 goto repeat;
66
67 lock_page(page);
68 if (page->mapping != mapping) {
61 f2fs_put_page(page, 1); 69 f2fs_put_page(page, 1);
62 goto repeat; 70 goto repeat;
63 } 71 }
72out:
64 mark_page_accessed(page); 73 mark_page_accessed(page);
65
66 /* We do not allow returning an errorneous page */
67 return page; 74 return page;
68} 75}
69 76
@@ -541,54 +548,44 @@ retry:
541 */ 548 */
542static void block_operations(struct f2fs_sb_info *sbi) 549static void block_operations(struct f2fs_sb_info *sbi)
543{ 550{
544 int t;
545 struct writeback_control wbc = { 551 struct writeback_control wbc = {
546 .sync_mode = WB_SYNC_ALL, 552 .sync_mode = WB_SYNC_ALL,
547 .nr_to_write = LONG_MAX, 553 .nr_to_write = LONG_MAX,
548 .for_reclaim = 0, 554 .for_reclaim = 0,
549 }; 555 };
556 struct blk_plug plug;
550 557
551 /* Stop renaming operation */ 558 blk_start_plug(&plug);
552 mutex_lock_op(sbi, RENAME);
553 mutex_lock_op(sbi, DENTRY_OPS);
554 559
555retry_dents: 560retry_flush_dents:
556 /* write all the dirty dentry pages */ 561 mutex_lock_all(sbi);
557 sync_dirty_dir_inodes(sbi);
558 562
559 mutex_lock_op(sbi, DATA_WRITE); 563 /* write all the dirty dentry pages */
560 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 564 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
561 mutex_unlock_op(sbi, DATA_WRITE); 565 mutex_unlock_all(sbi);
562 goto retry_dents; 566 sync_dirty_dir_inodes(sbi);
567 goto retry_flush_dents;
563 } 568 }
564 569
565 /* block all the operations */
566 for (t = DATA_NEW; t <= NODE_TRUNC; t++)
567 mutex_lock_op(sbi, t);
568
569 mutex_lock(&sbi->write_inode);
570
571 /* 570 /*
572 * POR: we should ensure that there is no dirty node pages 571 * POR: we should ensure that there is no dirty node pages
573 * until finishing nat/sit flush. 572 * until finishing nat/sit flush.
574 */ 573 */
575retry: 574retry_flush_nodes:
576 sync_node_pages(sbi, 0, &wbc); 575 mutex_lock(&sbi->node_write);
577
578 mutex_lock_op(sbi, NODE_WRITE);
579 576
580 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 577 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
581 mutex_unlock_op(sbi, NODE_WRITE); 578 mutex_unlock(&sbi->node_write);
582 goto retry; 579 sync_node_pages(sbi, 0, &wbc);
580 goto retry_flush_nodes;
583 } 581 }
584 mutex_unlock(&sbi->write_inode); 582 blk_finish_plug(&plug);
585} 583}
586 584
587static void unblock_operations(struct f2fs_sb_info *sbi) 585static void unblock_operations(struct f2fs_sb_info *sbi)
588{ 586{
589 int t; 587 mutex_unlock(&sbi->node_write);
590 for (t = NODE_WRITE; t >= RENAME; t--) 588 mutex_unlock_all(sbi);
591 mutex_unlock_op(sbi, t);
592} 589}
593 590
594static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 591static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
@@ -727,9 +724,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
727 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 724 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
728 unsigned long long ckpt_ver; 725 unsigned long long ckpt_ver;
729 726
727 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
728
730 mutex_lock(&sbi->cp_mutex); 729 mutex_lock(&sbi->cp_mutex);
731 block_operations(sbi); 730 block_operations(sbi);
732 731
732 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
733
733 f2fs_submit_bio(sbi, DATA, true); 734 f2fs_submit_bio(sbi, DATA, true);
734 f2fs_submit_bio(sbi, NODE, true); 735 f2fs_submit_bio(sbi, NODE, true);
735 f2fs_submit_bio(sbi, META, true); 736 f2fs_submit_bio(sbi, META, true);
@@ -746,13 +747,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
746 flush_nat_entries(sbi); 747 flush_nat_entries(sbi);
747 flush_sit_entries(sbi); 748 flush_sit_entries(sbi);
748 749
749 reset_victim_segmap(sbi);
750
751 /* unlock all the fs_lock[] in do_checkpoint() */ 750 /* unlock all the fs_lock[] in do_checkpoint() */
752 do_checkpoint(sbi, is_umount); 751 do_checkpoint(sbi, is_umount);
753 752
754 unblock_operations(sbi); 753 unblock_operations(sbi);
755 mutex_unlock(&sbi->cp_mutex); 754 mutex_unlock(&sbi->cp_mutex);
755
756 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
756} 757}
757 758
758void init_orphan_info(struct f2fs_sb_info *sbi) 759void init_orphan_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d0ed4ba4b61b..91ff93b0b0f4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
24#include "segment.h" 24#include "segment.h"
25#include <trace/events/f2fs.h>
25 26
26/* 27/*
27 * Lock ordering for the change of data block address: 28 * Lock ordering for the change of data block address:
@@ -55,6 +56,8 @@ int reserve_new_block(struct dnode_of_data *dn)
55 if (!inc_valid_block_count(sbi, dn->inode, 1)) 56 if (!inc_valid_block_count(sbi, dn->inode, 1))
56 return -ENOSPC; 57 return -ENOSPC;
57 58
59 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
60
58 __set_data_blkaddr(dn, NEW_ADDR); 61 __set_data_blkaddr(dn, NEW_ADDR);
59 dn->data_blkaddr = NEW_ADDR; 62 dn->data_blkaddr = NEW_ADDR;
60 sync_inode_page(dn); 63 sync_inode_page(dn);
@@ -134,7 +137,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
134 goto end_update; 137 goto end_update;
135 } 138 }
136 139
137 /* Frone merge */ 140 /* Front merge */
138 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { 141 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
139 fi->ext.fofs--; 142 fi->ext.fofs--;
140 fi->ext.blk_addr--; 143 fi->ext.blk_addr--;
@@ -170,7 +173,7 @@ end_update:
170 return; 173 return;
171} 174}
172 175
173struct page *find_data_page(struct inode *inode, pgoff_t index) 176struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
174{ 177{
175 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 178 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
176 struct address_space *mapping = inode->i_mapping; 179 struct address_space *mapping = inode->i_mapping;
@@ -184,7 +187,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
184 f2fs_put_page(page, 0); 187 f2fs_put_page(page, 0);
185 188
186 set_new_dnode(&dn, inode, NULL, NULL, 0); 189 set_new_dnode(&dn, inode, NULL, NULL, 0);
187 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 190 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
188 if (err) 191 if (err)
189 return ERR_PTR(err); 192 return ERR_PTR(err);
190 f2fs_put_dnode(&dn); 193 f2fs_put_dnode(&dn);
@@ -200,12 +203,20 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
200 if (!page) 203 if (!page)
201 return ERR_PTR(-ENOMEM); 204 return ERR_PTR(-ENOMEM);
202 205
203 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 206 if (PageUptodate(page)) {
204 if (err) { 207 unlock_page(page);
205 f2fs_put_page(page, 1); 208 return page;
206 return ERR_PTR(err); 209 }
210
211 err = f2fs_readpage(sbi, page, dn.data_blkaddr,
212 sync ? READ_SYNC : READA);
213 if (sync) {
214 wait_on_page_locked(page);
215 if (!PageUptodate(page)) {
216 f2fs_put_page(page, 0);
217 return ERR_PTR(-EIO);
218 }
207 } 219 }
208 unlock_page(page);
209 return page; 220 return page;
210} 221}
211 222
@@ -223,14 +234,14 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
223 int err; 234 int err;
224 235
225 set_new_dnode(&dn, inode, NULL, NULL, 0); 236 set_new_dnode(&dn, inode, NULL, NULL, 0);
226 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 237 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
227 if (err) 238 if (err)
228 return ERR_PTR(err); 239 return ERR_PTR(err);
229 f2fs_put_dnode(&dn); 240 f2fs_put_dnode(&dn);
230 241
231 if (dn.data_blkaddr == NULL_ADDR) 242 if (dn.data_blkaddr == NULL_ADDR)
232 return ERR_PTR(-ENOENT); 243 return ERR_PTR(-ENOENT);
233 244repeat:
234 page = grab_cache_page(mapping, index); 245 page = grab_cache_page(mapping, index);
235 if (!page) 246 if (!page)
236 return ERR_PTR(-ENOMEM); 247 return ERR_PTR(-ENOMEM);
@@ -242,9 +253,17 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
242 BUG_ON(dn.data_blkaddr == NULL_ADDR); 253 BUG_ON(dn.data_blkaddr == NULL_ADDR);
243 254
244 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 255 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
245 if (err) { 256 if (err)
246 f2fs_put_page(page, 1);
247 return ERR_PTR(err); 257 return ERR_PTR(err);
258
259 lock_page(page);
260 if (!PageUptodate(page)) {
261 f2fs_put_page(page, 1);
262 return ERR_PTR(-EIO);
263 }
264 if (page->mapping != mapping) {
265 f2fs_put_page(page, 1);
266 goto repeat;
248 } 267 }
249 return page; 268 return page;
250} 269}
@@ -252,6 +271,9 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
252/* 271/*
253 * Caller ensures that this data page is never allocated. 272 * Caller ensures that this data page is never allocated.
254 * A new zero-filled data page is allocated in the page cache. 273 * A new zero-filled data page is allocated in the page cache.
274 *
275 * Also, caller should grab and release a mutex by calling mutex_lock_op() and
276 * mutex_unlock_op().
255 */ 277 */
256struct page *get_new_data_page(struct inode *inode, pgoff_t index, 278struct page *get_new_data_page(struct inode *inode, pgoff_t index,
257 bool new_i_size) 279 bool new_i_size)
@@ -263,7 +285,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index,
263 int err; 285 int err;
264 286
265 set_new_dnode(&dn, inode, NULL, NULL, 0); 287 set_new_dnode(&dn, inode, NULL, NULL, 0);
266 err = get_dnode_of_data(&dn, index, 0); 288 err = get_dnode_of_data(&dn, index, ALLOC_NODE);
267 if (err) 289 if (err)
268 return ERR_PTR(err); 290 return ERR_PTR(err);
269 291
@@ -274,7 +296,7 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index,
274 } 296 }
275 } 297 }
276 f2fs_put_dnode(&dn); 298 f2fs_put_dnode(&dn);
277 299repeat:
278 page = grab_cache_page(mapping, index); 300 page = grab_cache_page(mapping, index);
279 if (!page) 301 if (!page)
280 return ERR_PTR(-ENOMEM); 302 return ERR_PTR(-ENOMEM);
@@ -284,14 +306,21 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index,
284 306
285 if (dn.data_blkaddr == NEW_ADDR) { 307 if (dn.data_blkaddr == NEW_ADDR) {
286 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 308 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
309 SetPageUptodate(page);
287 } else { 310 } else {
288 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 311 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
289 if (err) { 312 if (err)
290 f2fs_put_page(page, 1);
291 return ERR_PTR(err); 313 return ERR_PTR(err);
314 lock_page(page);
315 if (!PageUptodate(page)) {
316 f2fs_put_page(page, 1);
317 return ERR_PTR(-EIO);
318 }
319 if (page->mapping != mapping) {
320 f2fs_put_page(page, 1);
321 goto repeat;
292 } 322 }
293 } 323 }
294 SetPageUptodate(page);
295 324
296 if (new_i_size && 325 if (new_i_size &&
297 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { 326 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
@@ -326,21 +355,15 @@ static void read_end_io(struct bio *bio, int err)
326 355
327/* 356/*
328 * Fill the locked page with data located in the block address. 357 * Fill the locked page with data located in the block address.
329 * Read operation is synchronous, and caller must unlock the page. 358 * Return unlocked page.
330 */ 359 */
331int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, 360int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
332 block_t blk_addr, int type) 361 block_t blk_addr, int type)
333{ 362{
334 struct block_device *bdev = sbi->sb->s_bdev; 363 struct block_device *bdev = sbi->sb->s_bdev;
335 bool sync = (type == READ_SYNC);
336 struct bio *bio; 364 struct bio *bio;
337 365
338 /* This page can be already read by other threads */ 366 trace_f2fs_readpage(page, blk_addr, type);
339 if (PageUptodate(page)) {
340 if (!sync)
341 unlock_page(page);
342 return 0;
343 }
344 367
345 down_read(&sbi->bio_sem); 368 down_read(&sbi->bio_sem);
346 369
@@ -355,18 +378,12 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
355 kfree(bio->bi_private); 378 kfree(bio->bi_private);
356 bio_put(bio); 379 bio_put(bio);
357 up_read(&sbi->bio_sem); 380 up_read(&sbi->bio_sem);
381 f2fs_put_page(page, 1);
358 return -EFAULT; 382 return -EFAULT;
359 } 383 }
360 384
361 submit_bio(type, bio); 385 submit_bio(type, bio);
362 up_read(&sbi->bio_sem); 386 up_read(&sbi->bio_sem);
363
364 /* wait for read completion if sync */
365 if (sync) {
366 lock_page(page);
367 if (PageError(page))
368 return -EIO;
369 }
370 return 0; 387 return 0;
371} 388}
372 389
@@ -388,14 +405,18 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
388 /* Get the page offset from the block offset(iblock) */ 405 /* Get the page offset from the block offset(iblock) */
389 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 406 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
390 407
391 if (check_extent_cache(inode, pgofs, bh_result)) 408 if (check_extent_cache(inode, pgofs, bh_result)) {
409 trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
392 return 0; 410 return 0;
411 }
393 412
394 /* When reading holes, we need its node page */ 413 /* When reading holes, we need its node page */
395 set_new_dnode(&dn, inode, NULL, NULL, 0); 414 set_new_dnode(&dn, inode, NULL, NULL, 0);
396 err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE); 415 err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
397 if (err) 416 if (err) {
417 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
398 return (err == -ENOENT) ? 0 : err; 418 return (err == -ENOENT) ? 0 : err;
419 }
399 420
400 /* It does not support data allocation */ 421 /* It does not support data allocation */
401 BUG_ON(create); 422 BUG_ON(create);
@@ -420,6 +441,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
420 bh_result->b_size = (i << blkbits); 441 bh_result->b_size = (i << blkbits);
421 } 442 }
422 f2fs_put_dnode(&dn); 443 f2fs_put_dnode(&dn);
444 trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
423 return 0; 445 return 0;
424} 446}
425 447
@@ -438,13 +460,12 @@ static int f2fs_read_data_pages(struct file *file,
438int do_write_data_page(struct page *page) 460int do_write_data_page(struct page *page)
439{ 461{
440 struct inode *inode = page->mapping->host; 462 struct inode *inode = page->mapping->host;
441 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
442 block_t old_blk_addr, new_blk_addr; 463 block_t old_blk_addr, new_blk_addr;
443 struct dnode_of_data dn; 464 struct dnode_of_data dn;
444 int err = 0; 465 int err = 0;
445 466
446 set_new_dnode(&dn, inode, NULL, NULL, 0); 467 set_new_dnode(&dn, inode, NULL, NULL, 0);
447 err = get_dnode_of_data(&dn, page->index, RDONLY_NODE); 468 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
448 if (err) 469 if (err)
449 return err; 470 return err;
450 471
@@ -468,8 +489,6 @@ int do_write_data_page(struct page *page)
468 write_data_page(inode, page, &dn, 489 write_data_page(inode, page, &dn,
469 old_blk_addr, &new_blk_addr); 490 old_blk_addr, &new_blk_addr);
470 update_extent_cache(new_blk_addr, &dn); 491 update_extent_cache(new_blk_addr, &dn);
471 F2FS_I(inode)->data_version =
472 le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
473 } 492 }
474out_writepage: 493out_writepage:
475 f2fs_put_dnode(&dn); 494 f2fs_put_dnode(&dn);
@@ -485,10 +504,11 @@ static int f2fs_write_data_page(struct page *page,
485 const pgoff_t end_index = ((unsigned long long) i_size) 504 const pgoff_t end_index = ((unsigned long long) i_size)
486 >> PAGE_CACHE_SHIFT; 505 >> PAGE_CACHE_SHIFT;
487 unsigned offset; 506 unsigned offset;
507 bool need_balance_fs = false;
488 int err = 0; 508 int err = 0;
489 509
490 if (page->index < end_index) 510 if (page->index < end_index)
491 goto out; 511 goto write;
492 512
493 /* 513 /*
494 * If the offset is out-of-range of file size, 514 * If the offset is out-of-range of file size,
@@ -500,50 +520,46 @@ static int f2fs_write_data_page(struct page *page,
500 dec_page_count(sbi, F2FS_DIRTY_DENTS); 520 dec_page_count(sbi, F2FS_DIRTY_DENTS);
501 inode_dec_dirty_dents(inode); 521 inode_dec_dirty_dents(inode);
502 } 522 }
503 goto unlock_out; 523 goto out;
504 } 524 }
505 525
506 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 526 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
507out: 527write:
508 if (sbi->por_doing) 528 if (sbi->por_doing) {
509 goto redirty_out; 529 err = AOP_WRITEPAGE_ACTIVATE;
510
511 if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page))
512 goto redirty_out; 530 goto redirty_out;
531 }
513 532
514 mutex_lock_op(sbi, DATA_WRITE); 533 /* Dentry blocks are controlled by checkpoint */
515 if (S_ISDIR(inode->i_mode)) { 534 if (S_ISDIR(inode->i_mode)) {
516 dec_page_count(sbi, F2FS_DIRTY_DENTS); 535 dec_page_count(sbi, F2FS_DIRTY_DENTS);
517 inode_dec_dirty_dents(inode); 536 inode_dec_dirty_dents(inode);
537 err = do_write_data_page(page);
538 } else {
539 int ilock = mutex_lock_op(sbi);
540 err = do_write_data_page(page);
541 mutex_unlock_op(sbi, ilock);
542 need_balance_fs = true;
518 } 543 }
519 err = do_write_data_page(page); 544 if (err == -ENOENT)
520 if (err && err != -ENOENT) { 545 goto out;
521 wbc->pages_skipped++; 546 else if (err)
522 set_page_dirty(page); 547 goto redirty_out;
523 }
524 mutex_unlock_op(sbi, DATA_WRITE);
525 548
526 if (wbc->for_reclaim) 549 if (wbc->for_reclaim)
527 f2fs_submit_bio(sbi, DATA, true); 550 f2fs_submit_bio(sbi, DATA, true);
528 551
529 if (err == -ENOENT)
530 goto unlock_out;
531
532 clear_cold_data(page); 552 clear_cold_data(page);
553out:
533 unlock_page(page); 554 unlock_page(page);
534 555 if (need_balance_fs)
535 if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode))
536 f2fs_balance_fs(sbi); 556 f2fs_balance_fs(sbi);
537 return 0; 557 return 0;
538 558
539unlock_out:
540 unlock_page(page);
541 return (err == -ENOENT) ? 0 : err;
542
543redirty_out: 559redirty_out:
544 wbc->pages_skipped++; 560 wbc->pages_skipped++;
545 set_page_dirty(page); 561 set_page_dirty(page);
546 return AOP_WRITEPAGE_ACTIVATE; 562 return err;
547} 563}
548 564
549#define MAX_DESIRED_PAGES_WP 4096 565#define MAX_DESIRED_PAGES_WP 4096
@@ -562,19 +578,26 @@ static int f2fs_write_data_pages(struct address_space *mapping,
562{ 578{
563 struct inode *inode = mapping->host; 579 struct inode *inode = mapping->host;
564 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 580 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
581 bool locked = false;
565 int ret; 582 int ret;
566 long excess_nrtw = 0, desired_nrtw; 583 long excess_nrtw = 0, desired_nrtw;
567 584
585 /* deal with chardevs and other special file */
586 if (!mapping->a_ops->writepage)
587 return 0;
588
568 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 589 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
569 desired_nrtw = MAX_DESIRED_PAGES_WP; 590 desired_nrtw = MAX_DESIRED_PAGES_WP;
570 excess_nrtw = desired_nrtw - wbc->nr_to_write; 591 excess_nrtw = desired_nrtw - wbc->nr_to_write;
571 wbc->nr_to_write = desired_nrtw; 592 wbc->nr_to_write = desired_nrtw;
572 } 593 }
573 594
574 if (!S_ISDIR(inode->i_mode)) 595 if (!S_ISDIR(inode->i_mode)) {
575 mutex_lock(&sbi->writepages); 596 mutex_lock(&sbi->writepages);
597 locked = true;
598 }
576 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 599 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
577 if (!S_ISDIR(inode->i_mode)) 600 if (locked)
578 mutex_unlock(&sbi->writepages); 601 mutex_unlock(&sbi->writepages);
579 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 602 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
580 603
@@ -594,39 +617,33 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
594 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 617 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
595 struct dnode_of_data dn; 618 struct dnode_of_data dn;
596 int err = 0; 619 int err = 0;
620 int ilock;
597 621
598 /* for nobh_write_end */ 622 /* for nobh_write_end */
599 *fsdata = NULL; 623 *fsdata = NULL;
600 624
601 f2fs_balance_fs(sbi); 625 f2fs_balance_fs(sbi);
602 626repeat:
603 page = grab_cache_page_write_begin(mapping, index, flags); 627 page = grab_cache_page_write_begin(mapping, index, flags);
604 if (!page) 628 if (!page)
605 return -ENOMEM; 629 return -ENOMEM;
606 *pagep = page; 630 *pagep = page;
607 631
608 mutex_lock_op(sbi, DATA_NEW); 632 ilock = mutex_lock_op(sbi);
609 633
610 set_new_dnode(&dn, inode, NULL, NULL, 0); 634 set_new_dnode(&dn, inode, NULL, NULL, 0);
611 err = get_dnode_of_data(&dn, index, 0); 635 err = get_dnode_of_data(&dn, index, ALLOC_NODE);
612 if (err) { 636 if (err)
613 mutex_unlock_op(sbi, DATA_NEW); 637 goto err;
614 f2fs_put_page(page, 1);
615 return err;
616 }
617 638
618 if (dn.data_blkaddr == NULL_ADDR) { 639 if (dn.data_blkaddr == NULL_ADDR)
619 err = reserve_new_block(&dn); 640 err = reserve_new_block(&dn);
620 if (err) { 641
621 f2fs_put_dnode(&dn);
622 mutex_unlock_op(sbi, DATA_NEW);
623 f2fs_put_page(page, 1);
624 return err;
625 }
626 }
627 f2fs_put_dnode(&dn); 642 f2fs_put_dnode(&dn);
643 if (err)
644 goto err;
628 645
629 mutex_unlock_op(sbi, DATA_NEW); 646 mutex_unlock_op(sbi, ilock);
630 647
631 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 648 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
632 return 0; 649 return 0;
@@ -637,21 +654,34 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
637 654
638 /* Reading beyond i_size is simple: memset to zero */ 655 /* Reading beyond i_size is simple: memset to zero */
639 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 656 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
640 return 0; 657 goto out;
641 } 658 }
642 659
643 if (dn.data_blkaddr == NEW_ADDR) { 660 if (dn.data_blkaddr == NEW_ADDR) {
644 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 661 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
645 } else { 662 } else {
646 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 663 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
647 if (err) { 664 if (err)
648 f2fs_put_page(page, 1);
649 return err; 665 return err;
666 lock_page(page);
667 if (!PageUptodate(page)) {
668 f2fs_put_page(page, 1);
669 return -EIO;
670 }
671 if (page->mapping != mapping) {
672 f2fs_put_page(page, 1);
673 goto repeat;
650 } 674 }
651 } 675 }
676out:
652 SetPageUptodate(page); 677 SetPageUptodate(page);
653 clear_cold_data(page); 678 clear_cold_data(page);
654 return 0; 679 return 0;
680
681err:
682 mutex_unlock_op(sbi, ilock);
683 f2fs_put_page(page, 1);
684 return err;
655} 685}
656 686
657static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 687static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
@@ -682,7 +712,7 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
682static int f2fs_release_data_page(struct page *page, gfp_t wait) 712static int f2fs_release_data_page(struct page *page, gfp_t wait)
683{ 713{
684 ClearPagePrivate(page); 714 ClearPagePrivate(page);
685 return 0; 715 return 1;
686} 716}
687 717
688static int f2fs_set_data_page_dirty(struct page *page) 718static int f2fs_set_data_page_dirty(struct page *page)
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 025b9e2f935d..8d9943786c31 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -13,7 +13,6 @@
13 13
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <linux/proc_fs.h>
17#include <linux/f2fs_fs.h> 16#include <linux/f2fs_fs.h>
18#include <linux/blkdev.h> 17#include <linux/blkdev.h>
19#include <linux/debugfs.h> 18#include <linux/debugfs.h>
@@ -106,7 +105,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
106 } 105 }
107 } 106 }
108 mutex_unlock(&sit_i->sentry_lock); 107 mutex_unlock(&sit_i->sentry_lock);
109 dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100; 108 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
110 si->bimodal = bimodal / dist; 109 si->bimodal = bimodal / dist;
111 if (si->dirty_count) 110 if (si->dirty_count)
112 si->avg_vblocks = total_vblocks / ndirty; 111 si->avg_vblocks = total_vblocks / ndirty;
@@ -138,14 +137,13 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
138 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 137 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
139 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); 138 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi);
140 if (sbi->segs_per_sec > 1) 139 if (sbi->segs_per_sec > 1)
141 si->base_mem += sbi->total_sections * 140 si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry);
142 sizeof(struct sec_entry);
143 si->base_mem += __bitmap_size(sbi, SIT_BITMAP); 141 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
144 142
145 /* build free segmap */ 143 /* build free segmap */
146 si->base_mem += sizeof(struct free_segmap_info); 144 si->base_mem += sizeof(struct free_segmap_info);
147 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); 145 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
148 si->base_mem += f2fs_bitmap_size(sbi->total_sections); 146 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
149 147
150 /* build curseg */ 148 /* build curseg */
151 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; 149 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
@@ -154,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
154 /* build dirty segmap */ 152 /* build dirty segmap */
155 si->base_mem += sizeof(struct dirty_seglist_info); 153 si->base_mem += sizeof(struct dirty_seglist_info);
156 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 154 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
157 si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 155 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
158 156
159 /* buld nm */ 157 /* buld nm */
160 si->base_mem += sizeof(struct f2fs_nm_info); 158 si->base_mem += sizeof(struct f2fs_nm_info);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 1be948768e2f..1ac6b93036b7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -148,7 +148,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
148 148
149 for (; bidx < end_block; bidx++) { 149 for (; bidx < end_block; bidx++) {
150 /* no need to allocate new dentry pages to all the indices */ 150 /* no need to allocate new dentry pages to all the indices */
151 dentry_page = find_data_page(dir, bidx); 151 dentry_page = find_data_page(dir, bidx, true);
152 if (IS_ERR(dentry_page)) { 152 if (IS_ERR(dentry_page)) {
153 room = true; 153 room = true;
154 continue; 154 continue;
@@ -189,6 +189,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
189 unsigned int max_depth; 189 unsigned int max_depth;
190 unsigned int level; 190 unsigned int level;
191 191
192 if (namelen > F2FS_NAME_LEN)
193 return NULL;
194
192 if (npages == 0) 195 if (npages == 0)
193 return NULL; 196 return NULL;
194 197
@@ -246,9 +249,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
246void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, 249void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
247 struct page *page, struct inode *inode) 250 struct page *page, struct inode *inode)
248{ 251{
249 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
250
251 mutex_lock_op(sbi, DENTRY_OPS);
252 lock_page(page); 252 lock_page(page);
253 wait_on_page_writeback(page); 253 wait_on_page_writeback(page);
254 de->ino = cpu_to_le32(inode->i_ino); 254 de->ino = cpu_to_le32(inode->i_ino);
@@ -262,7 +262,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
262 F2FS_I(inode)->i_pino = dir->i_ino; 262 F2FS_I(inode)->i_pino = dir->i_ino;
263 263
264 f2fs_put_page(page, 1); 264 f2fs_put_page(page, 1);
265 mutex_unlock_op(sbi, DENTRY_OPS);
266} 265}
267 266
268void init_dent_inode(const struct qstr *name, struct page *ipage) 267void init_dent_inode(const struct qstr *name, struct page *ipage)
@@ -281,6 +280,43 @@ void init_dent_inode(const struct qstr *name, struct page *ipage)
281 set_page_dirty(ipage); 280 set_page_dirty(ipage);
282} 281}
283 282
283static int make_empty_dir(struct inode *inode, struct inode *parent)
284{
285 struct page *dentry_page;
286 struct f2fs_dentry_block *dentry_blk;
287 struct f2fs_dir_entry *de;
288 void *kaddr;
289
290 dentry_page = get_new_data_page(inode, 0, true);
291 if (IS_ERR(dentry_page))
292 return PTR_ERR(dentry_page);
293
294 kaddr = kmap_atomic(dentry_page);
295 dentry_blk = (struct f2fs_dentry_block *)kaddr;
296
297 de = &dentry_blk->dentry[0];
298 de->name_len = cpu_to_le16(1);
299 de->hash_code = 0;
300 de->ino = cpu_to_le32(inode->i_ino);
301 memcpy(dentry_blk->filename[0], ".", 1);
302 set_de_type(de, inode);
303
304 de = &dentry_blk->dentry[1];
305 de->hash_code = 0;
306 de->name_len = cpu_to_le16(2);
307 de->ino = cpu_to_le32(parent->i_ino);
308 memcpy(dentry_blk->filename[1], "..", 2);
309 set_de_type(de, inode);
310
311 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
312 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
313 kunmap_atomic(kaddr);
314
315 set_page_dirty(dentry_page);
316 f2fs_put_page(dentry_page, 1);
317 return 0;
318}
319
284static int init_inode_metadata(struct inode *inode, 320static int init_inode_metadata(struct inode *inode,
285 struct inode *dir, const struct qstr *name) 321 struct inode *dir, const struct qstr *name)
286{ 322{
@@ -291,7 +327,7 @@ static int init_inode_metadata(struct inode *inode,
291 return err; 327 return err;
292 328
293 if (S_ISDIR(inode->i_mode)) { 329 if (S_ISDIR(inode->i_mode)) {
294 err = f2fs_make_empty(inode, dir); 330 err = make_empty_dir(inode, dir);
295 if (err) { 331 if (err) {
296 remove_inode_page(inode); 332 remove_inode_page(inode);
297 return err; 333 return err;
@@ -314,7 +350,7 @@ static int init_inode_metadata(struct inode *inode,
314 } 350 }
315 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 351 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
316 inc_nlink(inode); 352 inc_nlink(inode);
317 f2fs_write_inode(inode, NULL); 353 update_inode_page(inode);
318 } 354 }
319 return 0; 355 return 0;
320} 356}
@@ -338,7 +374,7 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
338 } 374 }
339 375
340 if (need_dir_update) 376 if (need_dir_update)
341 f2fs_write_inode(dir, NULL); 377 update_inode_page(dir);
342 else 378 else
343 mark_inode_dirty(dir); 379 mark_inode_dirty(dir);
344 380
@@ -370,6 +406,10 @@ next:
370 goto next; 406 goto next;
371} 407}
372 408
409/*
410 * Caller should grab and release a mutex by calling mutex_lock_op() and
411 * mutex_unlock_op().
412 */
373int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) 413int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
374{ 414{
375 unsigned int bit_pos; 415 unsigned int bit_pos;
@@ -379,7 +419,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
379 f2fs_hash_t dentry_hash; 419 f2fs_hash_t dentry_hash;
380 struct f2fs_dir_entry *de; 420 struct f2fs_dir_entry *de;
381 unsigned int nbucket, nblock; 421 unsigned int nbucket, nblock;
382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
383 size_t namelen = name->len; 422 size_t namelen = name->len;
384 struct page *dentry_page = NULL; 423 struct page *dentry_page = NULL;
385 struct f2fs_dentry_block *dentry_blk = NULL; 424 struct f2fs_dentry_block *dentry_blk = NULL;
@@ -409,12 +448,9 @@ start:
409 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); 448 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
410 449
411 for (block = bidx; block <= (bidx + nblock - 1); block++) { 450 for (block = bidx; block <= (bidx + nblock - 1); block++) {
412 mutex_lock_op(sbi, DENTRY_OPS);
413 dentry_page = get_new_data_page(dir, block, true); 451 dentry_page = get_new_data_page(dir, block, true);
414 if (IS_ERR(dentry_page)) { 452 if (IS_ERR(dentry_page))
415 mutex_unlock_op(sbi, DENTRY_OPS);
416 return PTR_ERR(dentry_page); 453 return PTR_ERR(dentry_page);
417 }
418 454
419 dentry_blk = kmap(dentry_page); 455 dentry_blk = kmap(dentry_page);
420 bit_pos = room_for_filename(dentry_blk, slots); 456 bit_pos = room_for_filename(dentry_blk, slots);
@@ -423,7 +459,6 @@ start:
423 459
424 kunmap(dentry_page); 460 kunmap(dentry_page);
425 f2fs_put_page(dentry_page, 1); 461 f2fs_put_page(dentry_page, 1);
426 mutex_unlock_op(sbi, DENTRY_OPS);
427 } 462 }
428 463
429 /* Move to next level to find the empty slot for new dentry */ 464 /* Move to next level to find the empty slot for new dentry */
@@ -453,7 +488,6 @@ add_dentry:
453fail: 488fail:
454 kunmap(dentry_page); 489 kunmap(dentry_page);
455 f2fs_put_page(dentry_page, 1); 490 f2fs_put_page(dentry_page, 1);
456 mutex_unlock_op(sbi, DENTRY_OPS);
457 return err; 491 return err;
458} 492}
459 493
@@ -473,8 +507,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
473 void *kaddr = page_address(page); 507 void *kaddr = page_address(page);
474 int i; 508 int i;
475 509
476 mutex_lock_op(sbi, DENTRY_OPS);
477
478 lock_page(page); 510 lock_page(page);
479 wait_on_page_writeback(page); 511 wait_on_page_writeback(page);
480 512
@@ -494,7 +526,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
494 526
495 if (inode && S_ISDIR(inode->i_mode)) { 527 if (inode && S_ISDIR(inode->i_mode)) {
496 drop_nlink(dir); 528 drop_nlink(dir);
497 f2fs_write_inode(dir, NULL); 529 update_inode_page(dir);
498 } else { 530 } else {
499 mark_inode_dirty(dir); 531 mark_inode_dirty(dir);
500 } 532 }
@@ -506,7 +538,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
506 drop_nlink(inode); 538 drop_nlink(inode);
507 i_size_write(inode, 0); 539 i_size_write(inode, 0);
508 } 540 }
509 f2fs_write_inode(inode, NULL); 541 update_inode_page(inode);
542
510 if (inode->i_nlink == 0) 543 if (inode->i_nlink == 0)
511 add_orphan_inode(sbi, inode->i_ino); 544 add_orphan_inode(sbi, inode->i_ino);
512 } 545 }
@@ -519,45 +552,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
519 inode_dec_dirty_dents(dir); 552 inode_dec_dirty_dents(dir);
520 } 553 }
521 f2fs_put_page(page, 1); 554 f2fs_put_page(page, 1);
522
523 mutex_unlock_op(sbi, DENTRY_OPS);
524}
525
526int f2fs_make_empty(struct inode *inode, struct inode *parent)
527{
528 struct page *dentry_page;
529 struct f2fs_dentry_block *dentry_blk;
530 struct f2fs_dir_entry *de;
531 void *kaddr;
532
533 dentry_page = get_new_data_page(inode, 0, true);
534 if (IS_ERR(dentry_page))
535 return PTR_ERR(dentry_page);
536
537 kaddr = kmap_atomic(dentry_page);
538 dentry_blk = (struct f2fs_dentry_block *)kaddr;
539
540 de = &dentry_blk->dentry[0];
541 de->name_len = cpu_to_le16(1);
542 de->hash_code = f2fs_dentry_hash(".", 1);
543 de->ino = cpu_to_le32(inode->i_ino);
544 memcpy(dentry_blk->filename[0], ".", 1);
545 set_de_type(de, inode);
546
547 de = &dentry_blk->dentry[1];
548 de->hash_code = f2fs_dentry_hash("..", 2);
549 de->name_len = cpu_to_le16(2);
550 de->ino = cpu_to_le32(parent->i_ino);
551 memcpy(dentry_blk->filename[1], "..", 2);
552 set_de_type(de, inode);
553
554 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
555 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
556 kunmap_atomic(kaddr);
557
558 set_page_dirty(dentry_page);
559 f2fs_put_page(dentry_page, 1);
560 return 0;
561} 555}
562 556
563bool f2fs_empty_dir(struct inode *dir) 557bool f2fs_empty_dir(struct inode *dir)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 201c8d3b0f86..20aab02f2a42 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -125,11 +125,15 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
125 * file keeping -1 as its node offset to 125 * file keeping -1 as its node offset to
126 * distinguish from index node blocks. 126 * distinguish from index node blocks.
127 */ 127 */
128#define RDONLY_NODE 1 /* 128enum {
129 * specify a read-only mode when getting 129 ALLOC_NODE, /* allocate a new node page if needed */
130 * a node block. 0 is read-write mode. 130 LOOKUP_NODE, /* look up a node without readahead */
131 * used by get_dnode_of_data(). 131 LOOKUP_NODE_RA, /*
132 * look up a node with readahead called
133 * by get_datablock_ro.
132 */ 134 */
135};
136
133#define F2FS_LINK_MAX 32000 /* maximum link count per file */ 137#define F2FS_LINK_MAX 32000 /* maximum link count per file */
134 138
135/* for in-memory extent cache entry */ 139/* for in-memory extent cache entry */
@@ -144,6 +148,7 @@ struct extent_info {
144 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. 148 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
145 */ 149 */
146#define FADVISE_COLD_BIT 0x01 150#define FADVISE_COLD_BIT 0x01
151#define FADVISE_CP_BIT 0x02
147 152
148struct f2fs_inode_info { 153struct f2fs_inode_info {
149 struct inode vfs_inode; /* serve a vfs inode */ 154 struct inode vfs_inode; /* serve a vfs inode */
@@ -155,7 +160,6 @@ struct f2fs_inode_info {
155 160
156 /* Use below internally in f2fs*/ 161 /* Use below internally in f2fs*/
157 unsigned long flags; /* use to pass per-file flags */ 162 unsigned long flags; /* use to pass per-file flags */
158 unsigned long long data_version;/* latest version of data for fsync */
159 atomic_t dirty_dents; /* # of dirty dentry pages */ 163 atomic_t dirty_dents; /* # of dirty dentry pages */
160 f2fs_hash_t chash; /* hash value of given file name */ 164 f2fs_hash_t chash; /* hash value of given file name */
161 unsigned int clevel; /* maximum level of given file name */ 165 unsigned int clevel; /* maximum level of given file name */
@@ -186,7 +190,6 @@ static inline void set_raw_extent(struct extent_info *ext,
186struct f2fs_nm_info { 190struct f2fs_nm_info {
187 block_t nat_blkaddr; /* base disk address of NAT */ 191 block_t nat_blkaddr; /* base disk address of NAT */
188 nid_t max_nid; /* maximum possible node ids */ 192 nid_t max_nid; /* maximum possible node ids */
189 nid_t init_scan_nid; /* the first nid to be scanned */
190 nid_t next_scan_nid; /* the next nid to be scanned */ 193 nid_t next_scan_nid; /* the next nid to be scanned */
191 194
192 /* NAT cache management */ 195 /* NAT cache management */
@@ -305,23 +308,12 @@ enum count_type {
305}; 308};
306 309
307/* 310/*
308 * FS_LOCK nesting subclasses for the lock validator: 311 * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
309 * 312 * The checkpoint procedure blocks all the locks in this fs_lock array.
310 * The locking order between these classes is 313 * Some FS operations grab free locks, and if there is no free lock,
311 * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW 314 * then wait to grab a lock in a round-robin manner.
312 * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC
313 */ 315 */
314enum lock_type { 316#define NR_GLOBAL_LOCKS 8
315 RENAME, /* for renaming operations */
316 DENTRY_OPS, /* for directory operations */
317 DATA_WRITE, /* for data write */
318 DATA_NEW, /* for data allocation */
319 DATA_TRUNC, /* for data truncate */
320 NODE_NEW, /* for node allocation */
321 NODE_TRUNC, /* for node truncate */
322 NODE_WRITE, /* for node write */
323 NR_LOCK_TYPE,
324};
325 317
326/* 318/*
327 * The below are the page types of bios used in submti_bio(). 319 * The below are the page types of bios used in submti_bio().
@@ -361,11 +353,13 @@ struct f2fs_sb_info {
361 /* for checkpoint */ 353 /* for checkpoint */
362 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 354 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
363 struct inode *meta_inode; /* cache meta blocks */ 355 struct inode *meta_inode; /* cache meta blocks */
364 struct mutex cp_mutex; /* for checkpoint procedure */ 356 struct mutex cp_mutex; /* checkpoint procedure lock */
365 struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */ 357 struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */
366 struct mutex write_inode; /* mutex for write inode */ 358 struct mutex node_write; /* locking node writes */
367 struct mutex writepages; /* mutex for writepages() */ 359 struct mutex writepages; /* mutex for writepages() */
360 unsigned char next_lock_num; /* round-robin global locks */
368 int por_doing; /* recovery is doing or not */ 361 int por_doing; /* recovery is doing or not */
362 int on_build_free_nids; /* build_free_nids is doing */
369 363
370 /* for orphan inode management */ 364 /* for orphan inode management */
371 struct list_head orphan_inode_list; /* orphan inode list */ 365 struct list_head orphan_inode_list; /* orphan inode list */
@@ -406,6 +400,7 @@ struct f2fs_sb_info {
406 /* for cleaning operations */ 400 /* for cleaning operations */
407 struct mutex gc_mutex; /* mutex for GC */ 401 struct mutex gc_mutex; /* mutex for GC */
408 struct f2fs_gc_kthread *gc_thread; /* GC thread */ 402 struct f2fs_gc_kthread *gc_thread; /* GC thread */
403 unsigned int cur_victim_sec; /* current victim section num */
409 404
410 /* 405 /*
411 * for stat information. 406 * for stat information.
@@ -498,22 +493,51 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
498 cp->ckpt_flags = cpu_to_le32(ckpt_flags); 493 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
499} 494}
500 495
501static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) 496static inline void mutex_lock_all(struct f2fs_sb_info *sbi)
502{ 497{
503 mutex_lock_nested(&sbi->fs_lock[t], t); 498 int i = 0;
499 for (; i < NR_GLOBAL_LOCKS; i++)
500 mutex_lock(&sbi->fs_lock[i]);
504} 501}
505 502
506static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) 503static inline void mutex_unlock_all(struct f2fs_sb_info *sbi)
507{ 504{
508 mutex_unlock(&sbi->fs_lock[t]); 505 int i = 0;
506 for (; i < NR_GLOBAL_LOCKS; i++)
507 mutex_unlock(&sbi->fs_lock[i]);
508}
509
510static inline int mutex_lock_op(struct f2fs_sb_info *sbi)
511{
512 unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS;
513 int i = 0;
514
515 for (; i < NR_GLOBAL_LOCKS; i++)
516 if (mutex_trylock(&sbi->fs_lock[i]))
517 return i;
518
519 mutex_lock(&sbi->fs_lock[next_lock]);
520 sbi->next_lock_num++;
521 return next_lock;
522}
523
524static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
525{
526 if (ilock < 0)
527 return;
528 BUG_ON(ilock >= NR_GLOBAL_LOCKS);
529 mutex_unlock(&sbi->fs_lock[ilock]);
509} 530}
510 531
511/* 532/*
512 * Check whether the given nid is within node id range. 533 * Check whether the given nid is within node id range.
513 */ 534 */
514static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) 535static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
515{ 536{
516 BUG_ON((nid >= NM_I(sbi)->max_nid)); 537 WARN_ON((nid >= NM_I(sbi)->max_nid));
538 if (nid >= NM_I(sbi)->max_nid)
539 return -EINVAL;
540 return 0;
517} 541}
518 542
519#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 543#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
@@ -819,7 +843,6 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
819/* used for f2fs_inode_info->flags */ 843/* used for f2fs_inode_info->flags */
820enum { 844enum {
821 FI_NEW_INODE, /* indicate newly allocated inode */ 845 FI_NEW_INODE, /* indicate newly allocated inode */
822 FI_NEED_CP, /* need to do checkpoint during fsync */
823 FI_INC_LINK, /* need to increment i_nlink */ 846 FI_INC_LINK, /* need to increment i_nlink */
824 FI_ACL_MODE, /* indicate acl mode */ 847 FI_ACL_MODE, /* indicate acl mode */
825 FI_NO_ALLOC, /* should not allocate any blocks */ 848 FI_NO_ALLOC, /* should not allocate any blocks */
@@ -872,6 +895,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
872void f2fs_set_inode_flags(struct inode *); 895void f2fs_set_inode_flags(struct inode *);
873struct inode *f2fs_iget(struct super_block *, unsigned long); 896struct inode *f2fs_iget(struct super_block *, unsigned long);
874void update_inode(struct inode *, struct page *); 897void update_inode(struct inode *, struct page *);
898int update_inode_page(struct inode *);
875int f2fs_write_inode(struct inode *, struct writeback_control *); 899int f2fs_write_inode(struct inode *, struct writeback_control *);
876void f2fs_evict_inode(struct inode *); 900void f2fs_evict_inode(struct inode *);
877 901
@@ -973,7 +997,6 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *,
973 int, unsigned int, int); 997 int, unsigned int, int);
974void flush_sit_entries(struct f2fs_sb_info *); 998void flush_sit_entries(struct f2fs_sb_info *);
975int build_segment_manager(struct f2fs_sb_info *); 999int build_segment_manager(struct f2fs_sb_info *);
976void reset_victim_segmap(struct f2fs_sb_info *);
977void destroy_segment_manager(struct f2fs_sb_info *); 1000void destroy_segment_manager(struct f2fs_sb_info *);
978 1001
979/* 1002/*
@@ -1000,7 +1023,7 @@ void destroy_checkpoint_caches(void);
1000 */ 1023 */
1001int reserve_new_block(struct dnode_of_data *); 1024int reserve_new_block(struct dnode_of_data *);
1002void update_extent_cache(block_t, struct dnode_of_data *); 1025void update_extent_cache(block_t, struct dnode_of_data *);
1003struct page *find_data_page(struct inode *, pgoff_t); 1026struct page *find_data_page(struct inode *, pgoff_t, bool);
1004struct page *get_lock_data_page(struct inode *, pgoff_t); 1027struct page *get_lock_data_page(struct inode *, pgoff_t);
1005struct page *get_new_data_page(struct inode *, pgoff_t, bool); 1028struct page *get_new_data_page(struct inode *, pgoff_t, bool);
1006int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); 1029int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
@@ -1020,7 +1043,7 @@ void destroy_gc_caches(void);
1020/* 1043/*
1021 * recovery.c 1044 * recovery.c
1022 */ 1045 */
1023void recover_fsync_data(struct f2fs_sb_info *); 1046int recover_fsync_data(struct f2fs_sb_info *);
1024bool space_for_roll_forward(struct f2fs_sb_info *); 1047bool space_for_roll_forward(struct f2fs_sb_info *);
1025 1048
1026/* 1049/*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index db626282d424..1cae864f8dfc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -13,6 +13,7 @@
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/blkdev.h>
16#include <linux/falloc.h> 17#include <linux/falloc.h>
17#include <linux/types.h> 18#include <linux/types.h>
18#include <linux/compat.h> 19#include <linux/compat.h>
@@ -24,6 +25,7 @@
24#include "segment.h" 25#include "segment.h"
25#include "xattr.h" 26#include "xattr.h"
26#include "acl.h" 27#include "acl.h"
28#include <trace/events/f2fs.h>
27 29
28static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, 30static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
29 struct vm_fault *vmf) 31 struct vm_fault *vmf)
@@ -33,19 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
33 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
34 block_t old_blk_addr; 36 block_t old_blk_addr;
35 struct dnode_of_data dn; 37 struct dnode_of_data dn;
36 int err; 38 int err, ilock;
37 39
38 f2fs_balance_fs(sbi); 40 f2fs_balance_fs(sbi);
39 41
40 sb_start_pagefault(inode->i_sb); 42 sb_start_pagefault(inode->i_sb);
41 43
42 mutex_lock_op(sbi, DATA_NEW);
43
44 /* block allocation */ 44 /* block allocation */
45 ilock = mutex_lock_op(sbi);
45 set_new_dnode(&dn, inode, NULL, NULL, 0); 46 set_new_dnode(&dn, inode, NULL, NULL, 0);
46 err = get_dnode_of_data(&dn, page->index, 0); 47 err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
47 if (err) { 48 if (err) {
48 mutex_unlock_op(sbi, DATA_NEW); 49 mutex_unlock_op(sbi, ilock);
49 goto out; 50 goto out;
50 } 51 }
51 52
@@ -55,13 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
55 err = reserve_new_block(&dn); 56 err = reserve_new_block(&dn);
56 if (err) { 57 if (err) {
57 f2fs_put_dnode(&dn); 58 f2fs_put_dnode(&dn);
58 mutex_unlock_op(sbi, DATA_NEW); 59 mutex_unlock_op(sbi, ilock);
59 goto out; 60 goto out;
60 } 61 }
61 } 62 }
62 f2fs_put_dnode(&dn); 63 f2fs_put_dnode(&dn);
63 64 mutex_unlock_op(sbi, ilock);
64 mutex_unlock_op(sbi, DATA_NEW);
65 65
66 lock_page(page); 66 lock_page(page);
67 if (page->mapping != inode->i_mapping || 67 if (page->mapping != inode->i_mapping ||
@@ -102,28 +102,10 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
102 .remap_pages = generic_file_remap_pages, 102 .remap_pages = generic_file_remap_pages,
103}; 103};
104 104
105static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
106{
107 struct dentry *dentry;
108 nid_t pino;
109
110 inode = igrab(inode);
111 dentry = d_find_any_alias(inode);
112 if (!dentry) {
113 iput(inode);
114 return 0;
115 }
116 pino = dentry->d_parent->d_inode->i_ino;
117 dput(dentry);
118 iput(inode);
119 return !is_checkpointed_node(sbi, pino);
120}
121
122int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 105int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
123{ 106{
124 struct inode *inode = file->f_mapping->host; 107 struct inode *inode = file->f_mapping->host;
125 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 108 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
126 unsigned long long cur_version;
127 int ret = 0; 109 int ret = 0;
128 bool need_cp = false; 110 bool need_cp = false;
129 struct writeback_control wbc = { 111 struct writeback_control wbc = {
@@ -135,9 +117,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
135 if (inode->i_sb->s_flags & MS_RDONLY) 117 if (inode->i_sb->s_flags & MS_RDONLY)
136 return 0; 118 return 0;
137 119
120 trace_f2fs_sync_file_enter(inode);
138 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 121 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
139 if (ret) 122 if (ret) {
123 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
140 return ret; 124 return ret;
125 }
141 126
142 /* guarantee free sections for fsync */ 127 /* guarantee free sections for fsync */
143 f2fs_balance_fs(sbi); 128 f2fs_balance_fs(sbi);
@@ -147,28 +132,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
147 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 132 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
148 goto out; 133 goto out;
149 134
150 mutex_lock(&sbi->cp_mutex);
151 cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
152 mutex_unlock(&sbi->cp_mutex);
153
154 if (F2FS_I(inode)->data_version != cur_version &&
155 !(inode->i_state & I_DIRTY))
156 goto out;
157 F2FS_I(inode)->data_version--;
158
159 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 135 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
160 need_cp = true; 136 need_cp = true;
161 else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) 137 else if (is_cp_file(inode))
162 need_cp = true; 138 need_cp = true;
163 else if (!space_for_roll_forward(sbi)) 139 else if (!space_for_roll_forward(sbi))
164 need_cp = true; 140 need_cp = true;
165 else if (need_to_sync_dir(sbi, inode)) 141 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
166 need_cp = true; 142 need_cp = true;
167 143
168 if (need_cp) { 144 if (need_cp) {
169 /* all the dirty node pages should be flushed for POR */ 145 /* all the dirty node pages should be flushed for POR */
170 ret = f2fs_sync_fs(inode->i_sb, 1); 146 ret = f2fs_sync_fs(inode->i_sb, 1);
171 clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
172 } else { 147 } else {
173 /* if there is no written node page, write its inode page */ 148 /* if there is no written node page, write its inode page */
174 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 149 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
@@ -178,9 +153,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
178 } 153 }
179 filemap_fdatawait_range(sbi->node_inode->i_mapping, 154 filemap_fdatawait_range(sbi->node_inode->i_mapping,
180 0, LONG_MAX); 155 0, LONG_MAX);
156 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
181 } 157 }
182out: 158out:
183 mutex_unlock(&inode->i_mutex); 159 mutex_unlock(&inode->i_mutex);
160 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
184 return ret; 161 return ret;
185} 162}
186 163
@@ -216,6 +193,9 @@ static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
216 sync_inode_page(dn); 193 sync_inode_page(dn);
217 } 194 }
218 dn->ofs_in_node = ofs; 195 dn->ofs_in_node = ofs;
196
197 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
198 dn->ofs_in_node, nr_free);
219 return nr_free; 199 return nr_free;
220} 200}
221 201
@@ -232,11 +212,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
232 if (!offset) 212 if (!offset)
233 return; 213 return;
234 214
235 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT); 215 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
236 if (IS_ERR(page)) 216 if (IS_ERR(page))
237 return; 217 return;
238 218
239 lock_page(page); 219 lock_page(page);
220 if (page->mapping != inode->i_mapping) {
221 f2fs_put_page(page, 1);
222 return;
223 }
240 wait_on_page_writeback(page); 224 wait_on_page_writeback(page);
241 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 225 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
242 set_page_dirty(page); 226 set_page_dirty(page);
@@ -249,20 +233,22 @@ static int truncate_blocks(struct inode *inode, u64 from)
249 unsigned int blocksize = inode->i_sb->s_blocksize; 233 unsigned int blocksize = inode->i_sb->s_blocksize;
250 struct dnode_of_data dn; 234 struct dnode_of_data dn;
251 pgoff_t free_from; 235 pgoff_t free_from;
252 int count = 0; 236 int count = 0, ilock = -1;
253 int err; 237 int err;
254 238
239 trace_f2fs_truncate_blocks_enter(inode, from);
240
255 free_from = (pgoff_t) 241 free_from = (pgoff_t)
256 ((from + blocksize - 1) >> (sbi->log_blocksize)); 242 ((from + blocksize - 1) >> (sbi->log_blocksize));
257 243
258 mutex_lock_op(sbi, DATA_TRUNC); 244 ilock = mutex_lock_op(sbi);
259
260 set_new_dnode(&dn, inode, NULL, NULL, 0); 245 set_new_dnode(&dn, inode, NULL, NULL, 0);
261 err = get_dnode_of_data(&dn, free_from, RDONLY_NODE); 246 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
262 if (err) { 247 if (err) {
263 if (err == -ENOENT) 248 if (err == -ENOENT)
264 goto free_next; 249 goto free_next;
265 mutex_unlock_op(sbi, DATA_TRUNC); 250 mutex_unlock_op(sbi, ilock);
251 trace_f2fs_truncate_blocks_exit(inode, err);
266 return err; 252 return err;
267 } 253 }
268 254
@@ -273,6 +259,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
273 259
274 count -= dn.ofs_in_node; 260 count -= dn.ofs_in_node;
275 BUG_ON(count < 0); 261 BUG_ON(count < 0);
262
276 if (dn.ofs_in_node || IS_INODE(dn.node_page)) { 263 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
277 truncate_data_blocks_range(&dn, count); 264 truncate_data_blocks_range(&dn, count);
278 free_from += count; 265 free_from += count;
@@ -281,11 +268,12 @@ static int truncate_blocks(struct inode *inode, u64 from)
281 f2fs_put_dnode(&dn); 268 f2fs_put_dnode(&dn);
282free_next: 269free_next:
283 err = truncate_inode_blocks(inode, free_from); 270 err = truncate_inode_blocks(inode, free_from);
284 mutex_unlock_op(sbi, DATA_TRUNC); 271 mutex_unlock_op(sbi, ilock);
285 272
286 /* lastly zero out the first data page */ 273 /* lastly zero out the first data page */
287 truncate_partial_data_page(inode, from); 274 truncate_partial_data_page(inode, from);
288 275
276 trace_f2fs_truncate_blocks_exit(inode, err);
289 return err; 277 return err;
290} 278}
291 279
@@ -295,6 +283,8 @@ void f2fs_truncate(struct inode *inode)
295 S_ISLNK(inode->i_mode))) 283 S_ISLNK(inode->i_mode)))
296 return; 284 return;
297 285
286 trace_f2fs_truncate(inode);
287
298 if (!truncate_blocks(inode, i_size_read(inode))) { 288 if (!truncate_blocks(inode, i_size_read(inode))) {
299 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 289 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
300 mark_inode_dirty(inode); 290 mark_inode_dirty(inode);
@@ -389,15 +379,16 @@ static void fill_zero(struct inode *inode, pgoff_t index,
389{ 379{
390 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 380 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
391 struct page *page; 381 struct page *page;
382 int ilock;
392 383
393 if (!len) 384 if (!len)
394 return; 385 return;
395 386
396 f2fs_balance_fs(sbi); 387 f2fs_balance_fs(sbi);
397 388
398 mutex_lock_op(sbi, DATA_NEW); 389 ilock = mutex_lock_op(sbi);
399 page = get_new_data_page(inode, index, false); 390 page = get_new_data_page(inode, index, false);
400 mutex_unlock_op(sbi, DATA_NEW); 391 mutex_unlock_op(sbi, ilock);
401 392
402 if (!IS_ERR(page)) { 393 if (!IS_ERR(page)) {
403 wait_on_page_writeback(page); 394 wait_on_page_writeback(page);
@@ -414,15 +405,10 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
414 405
415 for (index = pg_start; index < pg_end; index++) { 406 for (index = pg_start; index < pg_end; index++) {
416 struct dnode_of_data dn; 407 struct dnode_of_data dn;
417 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
418
419 f2fs_balance_fs(sbi);
420 408
421 mutex_lock_op(sbi, DATA_TRUNC);
422 set_new_dnode(&dn, inode, NULL, NULL, 0); 409 set_new_dnode(&dn, inode, NULL, NULL, 0);
423 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 410 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
424 if (err) { 411 if (err) {
425 mutex_unlock_op(sbi, DATA_TRUNC);
426 if (err == -ENOENT) 412 if (err == -ENOENT)
427 continue; 413 continue;
428 return err; 414 return err;
@@ -431,7 +417,6 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
431 if (dn.data_blkaddr != NULL_ADDR) 417 if (dn.data_blkaddr != NULL_ADDR)
432 truncate_data_blocks_range(&dn, 1); 418 truncate_data_blocks_range(&dn, 1);
433 f2fs_put_dnode(&dn); 419 f2fs_put_dnode(&dn);
434 mutex_unlock_op(sbi, DATA_TRUNC);
435 } 420 }
436 return 0; 421 return 0;
437} 422}
@@ -461,12 +446,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
461 if (pg_start < pg_end) { 446 if (pg_start < pg_end) {
462 struct address_space *mapping = inode->i_mapping; 447 struct address_space *mapping = inode->i_mapping;
463 loff_t blk_start, blk_end; 448 loff_t blk_start, blk_end;
449 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
450 int ilock;
451
452 f2fs_balance_fs(sbi);
464 453
465 blk_start = pg_start << PAGE_CACHE_SHIFT; 454 blk_start = pg_start << PAGE_CACHE_SHIFT;
466 blk_end = pg_end << PAGE_CACHE_SHIFT; 455 blk_end = pg_end << PAGE_CACHE_SHIFT;
467 truncate_inode_pages_range(mapping, blk_start, 456 truncate_inode_pages_range(mapping, blk_start,
468 blk_end - 1); 457 blk_end - 1);
458
459 ilock = mutex_lock_op(sbi);
469 ret = truncate_hole(inode, pg_start, pg_end); 460 ret = truncate_hole(inode, pg_start, pg_end);
461 mutex_unlock_op(sbi, ilock);
470 } 462 }
471 } 463 }
472 464
@@ -500,13 +492,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
500 492
501 for (index = pg_start; index <= pg_end; index++) { 493 for (index = pg_start; index <= pg_end; index++) {
502 struct dnode_of_data dn; 494 struct dnode_of_data dn;
495 int ilock;
503 496
504 mutex_lock_op(sbi, DATA_NEW); 497 ilock = mutex_lock_op(sbi);
505
506 set_new_dnode(&dn, inode, NULL, NULL, 0); 498 set_new_dnode(&dn, inode, NULL, NULL, 0);
507 ret = get_dnode_of_data(&dn, index, 0); 499 ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
508 if (ret) { 500 if (ret) {
509 mutex_unlock_op(sbi, DATA_NEW); 501 mutex_unlock_op(sbi, ilock);
510 break; 502 break;
511 } 503 }
512 504
@@ -514,13 +506,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
514 ret = reserve_new_block(&dn); 506 ret = reserve_new_block(&dn);
515 if (ret) { 507 if (ret) {
516 f2fs_put_dnode(&dn); 508 f2fs_put_dnode(&dn);
517 mutex_unlock_op(sbi, DATA_NEW); 509 mutex_unlock_op(sbi, ilock);
518 break; 510 break;
519 } 511 }
520 } 512 }
521 f2fs_put_dnode(&dn); 513 f2fs_put_dnode(&dn);
522 514 mutex_unlock_op(sbi, ilock);
523 mutex_unlock_op(sbi, DATA_NEW);
524 515
525 if (pg_start == pg_end) 516 if (pg_start == pg_end)
526 new_size = offset + len; 517 new_size = offset + len;
@@ -559,6 +550,7 @@ static long f2fs_fallocate(struct file *file, int mode,
559 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 550 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
560 mark_inode_dirty(inode); 551 mark_inode_dirty(inode);
561 } 552 }
553 trace_f2fs_fallocate(inode, mode, offset, len, ret);
562 return ret; 554 return ret;
563} 555}
564 556
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2e3eb2d4fc30..14961593e93c 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -11,7 +11,6 @@
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/backing-dev.h> 13#include <linux/backing-dev.h>
14#include <linux/proc_fs.h>
15#include <linux/init.h> 14#include <linux/init.h>
16#include <linux/f2fs_fs.h> 15#include <linux/f2fs_fs.h>
17#include <linux/kthread.h> 16#include <linux/kthread.h>
@@ -23,6 +22,7 @@
23#include "node.h" 22#include "node.h"
24#include "segment.h" 23#include "segment.h"
25#include "gc.h" 24#include "gc.h"
25#include <trace/events/f2fs.h>
26 26
27static struct kmem_cache *winode_slab; 27static struct kmem_cache *winode_slab;
28 28
@@ -81,9 +81,6 @@ static int gc_thread_func(void *data)
81 /* if return value is not zero, no victim was selected */ 81 /* if return value is not zero, no victim was selected */
82 if (f2fs_gc(sbi)) 82 if (f2fs_gc(sbi))
83 wait_ms = GC_THREAD_NOGC_SLEEP_TIME; 83 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
84 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
85 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
86
87 } while (!kthread_should_stop()); 84 } while (!kthread_should_stop());
88 return 0; 85 return 0;
89} 86}
@@ -131,7 +128,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
131{ 128{
132 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 129 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
133 130
134 if (p->alloc_mode) { 131 if (p->alloc_mode == SSR) {
135 p->gc_mode = GC_GREEDY; 132 p->gc_mode = GC_GREEDY;
136 p->dirty_segmap = dirty_i->dirty_segmap[type]; 133 p->dirty_segmap = dirty_i->dirty_segmap[type];
137 p->ofs_unit = 1; 134 p->ofs_unit = 1;
@@ -160,18 +157,21 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
160static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 157static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
161{ 158{
162 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 159 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
163 unsigned int segno; 160 unsigned int hint = 0;
161 unsigned int secno;
164 162
165 /* 163 /*
166 * If the gc_type is FG_GC, we can select victim segments 164 * If the gc_type is FG_GC, we can select victim segments
167 * selected by background GC before. 165 * selected by background GC before.
168 * Those segments guarantee they have small valid blocks. 166 * Those segments guarantee they have small valid blocks.
169 */ 167 */
170 segno = find_next_bit(dirty_i->victim_segmap[BG_GC], 168next:
171 TOTAL_SEGS(sbi), 0); 169 secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
172 if (segno < TOTAL_SEGS(sbi)) { 170 if (secno < TOTAL_SECS(sbi)) {
173 clear_bit(segno, dirty_i->victim_segmap[BG_GC]); 171 if (sec_usage_check(sbi, secno))
174 return segno; 172 goto next;
173 clear_bit(secno, dirty_i->victim_secmap);
174 return secno * sbi->segs_per_sec;
175 } 175 }
176 return NULL_SEGNO; 176 return NULL_SEGNO;
177} 177}
@@ -234,7 +234,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
234{ 234{
235 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 235 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
236 struct victim_sel_policy p; 236 struct victim_sel_policy p;
237 unsigned int segno; 237 unsigned int secno;
238 int nsearched = 0; 238 int nsearched = 0;
239 239
240 p.alloc_mode = alloc_mode; 240 p.alloc_mode = alloc_mode;
@@ -253,6 +253,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
253 253
254 while (1) { 254 while (1) {
255 unsigned long cost; 255 unsigned long cost;
256 unsigned int segno;
256 257
257 segno = find_next_bit(p.dirty_segmap, 258 segno = find_next_bit(p.dirty_segmap,
258 TOTAL_SEGS(sbi), p.offset); 259 TOTAL_SEGS(sbi), p.offset);
@@ -265,13 +266,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
265 break; 266 break;
266 } 267 }
267 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; 268 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
269 secno = GET_SECNO(sbi, segno);
268 270
269 if (test_bit(segno, dirty_i->victim_segmap[FG_GC])) 271 if (sec_usage_check(sbi, secno))
270 continue;
271 if (gc_type == BG_GC &&
272 test_bit(segno, dirty_i->victim_segmap[BG_GC]))
273 continue; 272 continue;
274 if (IS_CURSEC(sbi, GET_SECNO(sbi, segno))) 273 if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
275 continue; 274 continue;
276 275
277 cost = get_gc_cost(sbi, segno, &p); 276 cost = get_gc_cost(sbi, segno, &p);
@@ -291,13 +290,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
291 } 290 }
292got_it: 291got_it:
293 if (p.min_segno != NULL_SEGNO) { 292 if (p.min_segno != NULL_SEGNO) {
294 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
295 if (p.alloc_mode == LFS) { 293 if (p.alloc_mode == LFS) {
296 int i; 294 secno = GET_SECNO(sbi, p.min_segno);
297 for (i = 0; i < p.ofs_unit; i++) 295 if (gc_type == FG_GC)
298 set_bit(*result + i, 296 sbi->cur_victim_sec = secno;
299 dirty_i->victim_segmap[gc_type]); 297 else
298 set_bit(secno, dirty_i->victim_secmap);
300 } 299 }
300 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
301
302 trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
303 sbi->cur_victim_sec,
304 prefree_segments(sbi), free_segments(sbi));
301 } 305 }
302 mutex_unlock(&dirty_i->seglist_lock); 306 mutex_unlock(&dirty_i->seglist_lock);
303 307
@@ -381,6 +385,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
381 385
382next_step: 386next_step:
383 entry = sum; 387 entry = sum;
388
384 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 389 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
385 nid_t nid = le32_to_cpu(entry->nid); 390 nid_t nid = le32_to_cpu(entry->nid);
386 struct page *node_page; 391 struct page *node_page;
@@ -401,11 +406,18 @@ next_step:
401 continue; 406 continue;
402 407
403 /* set page dirty and write it */ 408 /* set page dirty and write it */
404 if (!PageWriteback(node_page)) 409 if (gc_type == FG_GC) {
410 f2fs_submit_bio(sbi, NODE, true);
411 wait_on_page_writeback(node_page);
405 set_page_dirty(node_page); 412 set_page_dirty(node_page);
413 } else {
414 if (!PageWriteback(node_page))
415 set_page_dirty(node_page);
416 }
406 f2fs_put_page(node_page, 1); 417 f2fs_put_page(node_page, 1);
407 stat_inc_node_blk_count(sbi, 1); 418 stat_inc_node_blk_count(sbi, 1);
408 } 419 }
420
409 if (initial) { 421 if (initial) {
410 initial = false; 422 initial = false;
411 goto next_step; 423 goto next_step;
@@ -418,6 +430,13 @@ next_step:
418 .for_reclaim = 0, 430 .for_reclaim = 0,
419 }; 431 };
420 sync_node_pages(sbi, 0, &wbc); 432 sync_node_pages(sbi, 0, &wbc);
433
434 /*
435 * In the case of FG_GC, it'd be better to reclaim this victim
436 * completely.
437 */
438 if (get_valid_blocks(sbi, segno, 1) != 0)
439 goto next_step;
421 } 440 }
422} 441}
423 442
@@ -481,21 +500,19 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
481 500
482static void move_data_page(struct inode *inode, struct page *page, int gc_type) 501static void move_data_page(struct inode *inode, struct page *page, int gc_type)
483{ 502{
484 if (page->mapping != inode->i_mapping)
485 goto out;
486
487 if (inode != page->mapping->host)
488 goto out;
489
490 if (PageWriteback(page))
491 goto out;
492
493 if (gc_type == BG_GC) { 503 if (gc_type == BG_GC) {
504 if (PageWriteback(page))
505 goto out;
494 set_page_dirty(page); 506 set_page_dirty(page);
495 set_cold_data(page); 507 set_cold_data(page);
496 } else { 508 } else {
497 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 509 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
498 mutex_lock_op(sbi, DATA_WRITE); 510
511 if (PageWriteback(page)) {
512 f2fs_submit_bio(sbi, DATA, true);
513 wait_on_page_writeback(page);
514 }
515
499 if (clear_page_dirty_for_io(page) && 516 if (clear_page_dirty_for_io(page) &&
500 S_ISDIR(inode->i_mode)) { 517 S_ISDIR(inode->i_mode)) {
501 dec_page_count(sbi, F2FS_DIRTY_DENTS); 518 dec_page_count(sbi, F2FS_DIRTY_DENTS);
@@ -503,7 +520,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
503 } 520 }
504 set_cold_data(page); 521 set_cold_data(page);
505 do_write_data_page(page); 522 do_write_data_page(page);
506 mutex_unlock_op(sbi, DATA_WRITE);
507 clear_cold_data(page); 523 clear_cold_data(page);
508 } 524 }
509out: 525out:
@@ -530,6 +546,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
530 546
531next_step: 547next_step:
532 entry = sum; 548 entry = sum;
549
533 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 550 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
534 struct page *data_page; 551 struct page *data_page;
535 struct inode *inode; 552 struct inode *inode;
@@ -567,7 +584,7 @@ next_step:
567 continue; 584 continue;
568 585
569 data_page = find_data_page(inode, 586 data_page = find_data_page(inode,
570 start_bidx + ofs_in_node); 587 start_bidx + ofs_in_node, false);
571 if (IS_ERR(data_page)) 588 if (IS_ERR(data_page))
572 goto next_iput; 589 goto next_iput;
573 590
@@ -588,11 +605,22 @@ next_step:
588next_iput: 605next_iput:
589 iput(inode); 606 iput(inode);
590 } 607 }
608
591 if (++phase < 4) 609 if (++phase < 4)
592 goto next_step; 610 goto next_step;
593 611
594 if (gc_type == FG_GC) 612 if (gc_type == FG_GC) {
595 f2fs_submit_bio(sbi, DATA, true); 613 f2fs_submit_bio(sbi, DATA, true);
614
615 /*
616 * In the case of FG_GC, it'd be better to reclaim this victim
617 * completely.
618 */
619 if (get_valid_blocks(sbi, segno, 1) != 0) {
620 phase = 2;
621 goto next_step;
622 }
623 }
596} 624}
597 625
598static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 626static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -611,18 +639,15 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
611{ 639{
612 struct page *sum_page; 640 struct page *sum_page;
613 struct f2fs_summary_block *sum; 641 struct f2fs_summary_block *sum;
642 struct blk_plug plug;
614 643
615 /* read segment summary of victim */ 644 /* read segment summary of victim */
616 sum_page = get_sum_page(sbi, segno); 645 sum_page = get_sum_page(sbi, segno);
617 if (IS_ERR(sum_page)) 646 if (IS_ERR(sum_page))
618 return; 647 return;
619 648
620 /* 649 blk_start_plug(&plug);
621 * CP needs to lock sum_page. In this time, we don't need 650
622 * to lock this page, because this summary page is not gone anywhere.
623 * Also, this page is not gonna be updated before GC is done.
624 */
625 unlock_page(sum_page);
626 sum = page_address(sum_page); 651 sum = page_address(sum_page);
627 652
628 switch (GET_SUM_TYPE((&sum->footer))) { 653 switch (GET_SUM_TYPE((&sum->footer))) {
@@ -633,10 +658,12 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
633 gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); 658 gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
634 break; 659 break;
635 } 660 }
661 blk_finish_plug(&plug);
662
636 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 663 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
637 stat_inc_call_count(sbi->stat_info); 664 stat_inc_call_count(sbi->stat_info);
638 665
639 f2fs_put_page(sum_page, 0); 666 f2fs_put_page(sum_page, 1);
640} 667}
641 668
642int f2fs_gc(struct f2fs_sb_info *sbi) 669int f2fs_gc(struct f2fs_sb_info *sbi)
@@ -652,8 +679,10 @@ gc_more:
652 if (!(sbi->sb->s_flags & MS_ACTIVE)) 679 if (!(sbi->sb->s_flags & MS_ACTIVE))
653 goto stop; 680 goto stop;
654 681
655 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) 682 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
656 gc_type = FG_GC; 683 gc_type = FG_GC;
684 write_checkpoint(sbi, false);
685 }
657 686
658 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 687 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
659 goto stop; 688 goto stop;
@@ -662,9 +691,11 @@ gc_more:
662 for (i = 0; i < sbi->segs_per_sec; i++) 691 for (i = 0; i < sbi->segs_per_sec; i++)
663 do_garbage_collect(sbi, segno + i, &ilist, gc_type); 692 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
664 693
665 if (gc_type == FG_GC && 694 if (gc_type == FG_GC) {
666 get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) 695 sbi->cur_victim_sec = NULL_SEGNO;
667 nfree++; 696 nfree++;
697 WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
698 }
668 699
669 if (has_not_enough_free_secs(sbi, nfree)) 700 if (has_not_enough_free_secs(sbi, nfree))
670 goto gc_more; 701 goto gc_more;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 30b2db003acd..2c6a6bd08322 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -13,9 +13,9 @@
13 * whether IO subsystem is idle 13 * whether IO subsystem is idle
14 * or not 14 * or not
15 */ 15 */
16#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */ 16#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
17#define GC_THREAD_MAX_SLEEP_TIME 30000 17#define GC_THREAD_MAX_SLEEP_TIME 60000
18#define GC_THREAD_NOGC_SLEEP_TIME 10000 18#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
19#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ 19#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
21 21
@@ -58,6 +58,9 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
58 58
59static inline long increase_sleep_time(long wait) 59static inline long increase_sleep_time(long wait)
60{ 60{
61 if (wait == GC_THREAD_NOGC_SLEEP_TIME)
62 return wait;
63
61 wait += GC_THREAD_MIN_SLEEP_TIME; 64 wait += GC_THREAD_MIN_SLEEP_TIME;
62 if (wait > GC_THREAD_MAX_SLEEP_TIME) 65 if (wait > GC_THREAD_MAX_SLEEP_TIME)
63 wait = GC_THREAD_MAX_SLEEP_TIME; 66 wait = GC_THREAD_MAX_SLEEP_TIME;
@@ -66,6 +69,9 @@ static inline long increase_sleep_time(long wait)
66 69
67static inline long decrease_sleep_time(long wait) 70static inline long decrease_sleep_time(long wait)
68{ 71{
72 if (wait == GC_THREAD_NOGC_SLEEP_TIME)
73 wait = GC_THREAD_MAX_SLEEP_TIME;
74
69 wait -= GC_THREAD_MIN_SLEEP_TIME; 75 wait -= GC_THREAD_MIN_SLEEP_TIME;
70 if (wait <= GC_THREAD_MIN_SLEEP_TIME) 76 if (wait <= GC_THREAD_MIN_SLEEP_TIME)
71 wait = GC_THREAD_MIN_SLEEP_TIME; 77 wait = GC_THREAD_MIN_SLEEP_TIME;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ddae412d30c8..91ac7f9d88ee 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,6 +16,8 @@
16#include "f2fs.h" 16#include "f2fs.h"
17#include "node.h" 17#include "node.h"
18 18
19#include <trace/events/f2fs.h>
20
19void f2fs_set_inode_flags(struct inode *inode) 21void f2fs_set_inode_flags(struct inode *inode)
20{ 22{
21 unsigned int flags = F2FS_I(inode)->i_flags; 23 unsigned int flags = F2FS_I(inode)->i_flags;
@@ -44,7 +46,11 @@ static int do_read_inode(struct inode *inode)
44 struct f2fs_inode *ri; 46 struct f2fs_inode *ri;
45 47
46 /* Check if ino is within scope */ 48 /* Check if ino is within scope */
47 check_nid_range(sbi, inode->i_ino); 49 if (check_nid_range(sbi, inode->i_ino)) {
50 f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
51 (unsigned long) inode->i_ino);
52 return -EINVAL;
53 }
48 54
49 node_page = get_node_page(sbi, inode->i_ino); 55 node_page = get_node_page(sbi, inode->i_ino);
50 if (IS_ERR(node_page)) 56 if (IS_ERR(node_page))
@@ -76,7 +82,6 @@ static int do_read_inode(struct inode *inode)
76 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); 82 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
77 fi->i_flags = le32_to_cpu(ri->i_flags); 83 fi->i_flags = le32_to_cpu(ri->i_flags);
78 fi->flags = 0; 84 fi->flags = 0;
79 fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
80 fi->i_advise = ri->i_advise; 85 fi->i_advise = ri->i_advise;
81 fi->i_pino = le32_to_cpu(ri->i_pino); 86 fi->i_pino = le32_to_cpu(ri->i_pino);
82 get_extent_info(&fi->ext, ri->i_ext); 87 get_extent_info(&fi->ext, ri->i_ext);
@@ -88,13 +93,16 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
88{ 93{
89 struct f2fs_sb_info *sbi = F2FS_SB(sb); 94 struct f2fs_sb_info *sbi = F2FS_SB(sb);
90 struct inode *inode; 95 struct inode *inode;
91 int ret; 96 int ret = 0;
92 97
93 inode = iget_locked(sb, ino); 98 inode = iget_locked(sb, ino);
94 if (!inode) 99 if (!inode)
95 return ERR_PTR(-ENOMEM); 100 return ERR_PTR(-ENOMEM);
96 if (!(inode->i_state & I_NEW)) 101
102 if (!(inode->i_state & I_NEW)) {
103 trace_f2fs_iget(inode);
97 return inode; 104 return inode;
105 }
98 if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) 106 if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
99 goto make_now; 107 goto make_now;
100 108
@@ -136,11 +144,12 @@ make_now:
136 goto bad_inode; 144 goto bad_inode;
137 } 145 }
138 unlock_new_inode(inode); 146 unlock_new_inode(inode);
139 147 trace_f2fs_iget(inode);
140 return inode; 148 return inode;
141 149
142bad_inode: 150bad_inode:
143 iget_failed(inode); 151 iget_failed(inode);
152 trace_f2fs_iget_exit(inode, ret);
144 return ERR_PTR(ret); 153 return ERR_PTR(ret);
145} 154}
146 155
@@ -192,47 +201,51 @@ void update_inode(struct inode *inode, struct page *node_page)
192 set_page_dirty(node_page); 201 set_page_dirty(node_page);
193} 202}
194 203
195int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 204int update_inode_page(struct inode *inode)
196{ 205{
197 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 206 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
198 struct page *node_page; 207 struct page *node_page;
199 bool need_lock = false;
200
201 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
202 inode->i_ino == F2FS_META_INO(sbi))
203 return 0;
204
205 if (wbc)
206 f2fs_balance_fs(sbi);
207 208
208 node_page = get_node_page(sbi, inode->i_ino); 209 node_page = get_node_page(sbi, inode->i_ino);
209 if (IS_ERR(node_page)) 210 if (IS_ERR(node_page))
210 return PTR_ERR(node_page); 211 return PTR_ERR(node_page);
211 212
212 if (!PageDirty(node_page)) {
213 need_lock = true;
214 f2fs_put_page(node_page, 1);
215 mutex_lock(&sbi->write_inode);
216 node_page = get_node_page(sbi, inode->i_ino);
217 if (IS_ERR(node_page)) {
218 mutex_unlock(&sbi->write_inode);
219 return PTR_ERR(node_page);
220 }
221 }
222 update_inode(inode, node_page); 213 update_inode(inode, node_page);
223 f2fs_put_page(node_page, 1); 214 f2fs_put_page(node_page, 1);
224 if (need_lock)
225 mutex_unlock(&sbi->write_inode);
226 return 0; 215 return 0;
227} 216}
228 217
218int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
219{
220 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
221 int ret, ilock;
222
223 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
224 inode->i_ino == F2FS_META_INO(sbi))
225 return 0;
226
227 if (wbc)
228 f2fs_balance_fs(sbi);
229
230 /*
231 * We need to lock here to prevent from producing dirty node pages
232 * during the urgent cleaning time when runing out of free sections.
233 */
234 ilock = mutex_lock_op(sbi);
235 ret = update_inode_page(inode);
236 mutex_unlock_op(sbi, ilock);
237 return ret;
238}
239
229/* 240/*
230 * Called at the last iput() if i_nlink is zero 241 * Called at the last iput() if i_nlink is zero
231 */ 242 */
232void f2fs_evict_inode(struct inode *inode) 243void f2fs_evict_inode(struct inode *inode)
233{ 244{
234 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 245 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
246 int ilock;
235 247
248 trace_f2fs_evict_inode(inode);
236 truncate_inode_pages(&inode->i_data, 0); 249 truncate_inode_pages(&inode->i_data, 0);
237 250
238 if (inode->i_ino == F2FS_NODE_INO(sbi) || 251 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
@@ -252,7 +265,10 @@ void f2fs_evict_inode(struct inode *inode)
252 if (F2FS_HAS_BLOCKS(inode)) 265 if (F2FS_HAS_BLOCKS(inode))
253 f2fs_truncate(inode); 266 f2fs_truncate(inode);
254 267
268 ilock = mutex_lock_op(sbi);
255 remove_inode_page(inode); 269 remove_inode_page(inode);
270 mutex_unlock_op(sbi, ilock);
271
256 sb_end_intwrite(inode->i_sb); 272 sb_end_intwrite(inode->i_sb);
257no_delete: 273no_delete:
258 clear_inode(inode); 274 clear_inode(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1a49b881bac0..47abc9722b17 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -15,8 +15,10 @@
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16 16
17#include "f2fs.h" 17#include "f2fs.h"
18#include "node.h"
18#include "xattr.h" 19#include "xattr.h"
19#include "acl.h" 20#include "acl.h"
21#include <trace/events/f2fs.h>
20 22
21static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 23static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
22{ 24{
@@ -25,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
25 nid_t ino; 27 nid_t ino;
26 struct inode *inode; 28 struct inode *inode;
27 bool nid_free = false; 29 bool nid_free = false;
28 int err; 30 int err, ilock;
29 31
30 inode = new_inode(sb); 32 inode = new_inode(sb);
31 if (!inode) 33 if (!inode)
32 return ERR_PTR(-ENOMEM); 34 return ERR_PTR(-ENOMEM);
33 35
34 mutex_lock_op(sbi, NODE_NEW); 36 ilock = mutex_lock_op(sbi);
35 if (!alloc_nid(sbi, &ino)) { 37 if (!alloc_nid(sbi, &ino)) {
36 mutex_unlock_op(sbi, NODE_NEW); 38 mutex_unlock_op(sbi, ilock);
37 err = -ENOSPC; 39 err = -ENOSPC;
38 goto fail; 40 goto fail;
39 } 41 }
40 mutex_unlock_op(sbi, NODE_NEW); 42 mutex_unlock_op(sbi, ilock);
41 43
42 inode->i_uid = current_fsuid(); 44 inode->i_uid = current_fsuid();
43 45
@@ -61,7 +63,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
61 nid_free = true; 63 nid_free = true;
62 goto out; 64 goto out;
63 } 65 }
64 66 trace_f2fs_new_inode(inode, 0);
65 mark_inode_dirty(inode); 67 mark_inode_dirty(inode);
66 return inode; 68 return inode;
67 69
@@ -69,6 +71,8 @@ out:
69 clear_nlink(inode); 71 clear_nlink(inode);
70 unlock_new_inode(inode); 72 unlock_new_inode(inode);
71fail: 73fail:
74 trace_f2fs_new_inode(inode, err);
75 make_bad_inode(inode);
72 iput(inode); 76 iput(inode);
73 if (nid_free) 77 if (nid_free)
74 alloc_nid_failed(sbi, ino); 78 alloc_nid_failed(sbi, ino);
@@ -82,7 +86,7 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
82 int ret; 86 int ret;
83 87
84 if (sublen > slen) 88 if (sublen > slen)
85 return 1; 89 return 0;
86 90
87 ret = memcmp(s + slen - sublen, sub, sublen); 91 ret = memcmp(s + slen - sublen, sub, sublen);
88 if (ret) { /* compare upper case */ 92 if (ret) { /* compare upper case */
@@ -90,16 +94,16 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
90 char upper_sub[8]; 94 char upper_sub[8];
91 for (i = 0; i < sublen && i < sizeof(upper_sub); i++) 95 for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
92 upper_sub[i] = toupper(sub[i]); 96 upper_sub[i] = toupper(sub[i]);
93 return memcmp(s + slen - sublen, upper_sub, sublen); 97 return !memcmp(s + slen - sublen, upper_sub, sublen);
94 } 98 }
95 99
96 return ret; 100 return !ret;
97} 101}
98 102
99/* 103/*
100 * Set multimedia files as cold files for hot/cold data separation 104 * Set multimedia files as cold files for hot/cold data separation
101 */ 105 */
102static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode, 106static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
103 const unsigned char *name) 107 const unsigned char *name)
104{ 108{
105 int i; 109 int i;
@@ -107,8 +111,8 @@ static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
107 111
108 int count = le32_to_cpu(sbi->raw_super->extension_count); 112 int count = le32_to_cpu(sbi->raw_super->extension_count);
109 for (i = 0; i < count; i++) { 113 for (i = 0; i < count; i++) {
110 if (!is_multimedia_file(name, extlist[i])) { 114 if (is_multimedia_file(name, extlist[i])) {
111 F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; 115 set_cold_file(inode);
112 break; 116 break;
113 } 117 }
114 } 118 }
@@ -121,7 +125,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
121 struct f2fs_sb_info *sbi = F2FS_SB(sb); 125 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 struct inode *inode; 126 struct inode *inode;
123 nid_t ino = 0; 127 nid_t ino = 0;
124 int err; 128 int err, ilock;
125 129
126 f2fs_balance_fs(sbi); 130 f2fs_balance_fs(sbi);
127 131
@@ -130,14 +134,16 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
130 return PTR_ERR(inode); 134 return PTR_ERR(inode);
131 135
132 if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) 136 if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
133 set_cold_file(sbi, inode, dentry->d_name.name); 137 set_cold_files(sbi, inode, dentry->d_name.name);
134 138
135 inode->i_op = &f2fs_file_inode_operations; 139 inode->i_op = &f2fs_file_inode_operations;
136 inode->i_fop = &f2fs_file_operations; 140 inode->i_fop = &f2fs_file_operations;
137 inode->i_mapping->a_ops = &f2fs_dblock_aops; 141 inode->i_mapping->a_ops = &f2fs_dblock_aops;
138 ino = inode->i_ino; 142 ino = inode->i_ino;
139 143
144 ilock = mutex_lock_op(sbi);
140 err = f2fs_add_link(dentry, inode); 145 err = f2fs_add_link(dentry, inode);
146 mutex_unlock_op(sbi, ilock);
141 if (err) 147 if (err)
142 goto out; 148 goto out;
143 149
@@ -150,6 +156,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
150out: 156out:
151 clear_nlink(inode); 157 clear_nlink(inode);
152 unlock_new_inode(inode); 158 unlock_new_inode(inode);
159 make_bad_inode(inode);
153 iput(inode); 160 iput(inode);
154 alloc_nid_failed(sbi, ino); 161 alloc_nid_failed(sbi, ino);
155 return err; 162 return err;
@@ -161,7 +168,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
161 struct inode *inode = old_dentry->d_inode; 168 struct inode *inode = old_dentry->d_inode;
162 struct super_block *sb = dir->i_sb; 169 struct super_block *sb = dir->i_sb;
163 struct f2fs_sb_info *sbi = F2FS_SB(sb); 170 struct f2fs_sb_info *sbi = F2FS_SB(sb);
164 int err; 171 int err, ilock;
165 172
166 f2fs_balance_fs(sbi); 173 f2fs_balance_fs(sbi);
167 174
@@ -169,14 +176,23 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
169 atomic_inc(&inode->i_count); 176 atomic_inc(&inode->i_count);
170 177
171 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 178 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
179 ilock = mutex_lock_op(sbi);
172 err = f2fs_add_link(dentry, inode); 180 err = f2fs_add_link(dentry, inode);
181 mutex_unlock_op(sbi, ilock);
173 if (err) 182 if (err)
174 goto out; 183 goto out;
175 184
185 /*
186 * This file should be checkpointed during fsync.
187 * We lost i_pino from now on.
188 */
189 set_cp_file(inode);
190
176 d_instantiate(dentry, inode); 191 d_instantiate(dentry, inode);
177 return 0; 192 return 0;
178out: 193out:
179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 194 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
195 make_bad_inode(inode);
180 iput(inode); 196 iput(inode);
181 return err; 197 return err;
182} 198}
@@ -197,7 +213,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
197 struct f2fs_dir_entry *de; 213 struct f2fs_dir_entry *de;
198 struct page *page; 214 struct page *page;
199 215
200 if (dentry->d_name.len > F2FS_MAX_NAME_LEN) 216 if (dentry->d_name.len > F2FS_NAME_LEN)
201 return ERR_PTR(-ENAMETOOLONG); 217 return ERR_PTR(-ENAMETOOLONG);
202 218
203 de = f2fs_find_entry(dir, &dentry->d_name, &page); 219 de = f2fs_find_entry(dir, &dentry->d_name, &page);
@@ -222,7 +238,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
222 struct f2fs_dir_entry *de; 238 struct f2fs_dir_entry *de;
223 struct page *page; 239 struct page *page;
224 int err = -ENOENT; 240 int err = -ENOENT;
241 int ilock;
225 242
243 trace_f2fs_unlink_enter(dir, dentry);
226 f2fs_balance_fs(sbi); 244 f2fs_balance_fs(sbi);
227 245
228 de = f2fs_find_entry(dir, &dentry->d_name, &page); 246 de = f2fs_find_entry(dir, &dentry->d_name, &page);
@@ -236,11 +254,14 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
236 goto fail; 254 goto fail;
237 } 255 }
238 256
257 ilock = mutex_lock_op(sbi);
239 f2fs_delete_entry(de, page, inode); 258 f2fs_delete_entry(de, page, inode);
259 mutex_unlock_op(sbi, ilock);
240 260
241 /* In order to evict this inode, we set it dirty */ 261 /* In order to evict this inode, we set it dirty */
242 mark_inode_dirty(inode); 262 mark_inode_dirty(inode);
243fail: 263fail:
264 trace_f2fs_unlink_exit(inode, err);
244 return err; 265 return err;
245} 266}
246 267
@@ -251,7 +272,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
251 struct f2fs_sb_info *sbi = F2FS_SB(sb); 272 struct f2fs_sb_info *sbi = F2FS_SB(sb);
252 struct inode *inode; 273 struct inode *inode;
253 size_t symlen = strlen(symname) + 1; 274 size_t symlen = strlen(symname) + 1;
254 int err; 275 int err, ilock;
255 276
256 f2fs_balance_fs(sbi); 277 f2fs_balance_fs(sbi);
257 278
@@ -262,7 +283,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
262 inode->i_op = &f2fs_symlink_inode_operations; 283 inode->i_op = &f2fs_symlink_inode_operations;
263 inode->i_mapping->a_ops = &f2fs_dblock_aops; 284 inode->i_mapping->a_ops = &f2fs_dblock_aops;
264 285
286 ilock = mutex_lock_op(sbi);
265 err = f2fs_add_link(dentry, inode); 287 err = f2fs_add_link(dentry, inode);
288 mutex_unlock_op(sbi, ilock);
266 if (err) 289 if (err)
267 goto out; 290 goto out;
268 291
@@ -275,6 +298,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
275out: 298out:
276 clear_nlink(inode); 299 clear_nlink(inode);
277 unlock_new_inode(inode); 300 unlock_new_inode(inode);
301 make_bad_inode(inode);
278 iput(inode); 302 iput(inode);
279 alloc_nid_failed(sbi, inode->i_ino); 303 alloc_nid_failed(sbi, inode->i_ino);
280 return err; 304 return err;
@@ -284,7 +308,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
284{ 308{
285 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 309 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
286 struct inode *inode; 310 struct inode *inode;
287 int err; 311 int err, ilock;
288 312
289 f2fs_balance_fs(sbi); 313 f2fs_balance_fs(sbi);
290 314
@@ -298,7 +322,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
298 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); 322 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
299 323
300 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 324 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
325 ilock = mutex_lock_op(sbi);
301 err = f2fs_add_link(dentry, inode); 326 err = f2fs_add_link(dentry, inode);
327 mutex_unlock_op(sbi, ilock);
302 if (err) 328 if (err)
303 goto out_fail; 329 goto out_fail;
304 330
@@ -313,6 +339,7 @@ out_fail:
313 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 339 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
314 clear_nlink(inode); 340 clear_nlink(inode);
315 unlock_new_inode(inode); 341 unlock_new_inode(inode);
342 make_bad_inode(inode);
316 iput(inode); 343 iput(inode);
317 alloc_nid_failed(sbi, inode->i_ino); 344 alloc_nid_failed(sbi, inode->i_ino);
318 return err; 345 return err;
@@ -333,6 +360,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
333 struct f2fs_sb_info *sbi = F2FS_SB(sb); 360 struct f2fs_sb_info *sbi = F2FS_SB(sb);
334 struct inode *inode; 361 struct inode *inode;
335 int err = 0; 362 int err = 0;
363 int ilock;
336 364
337 if (!new_valid_dev(rdev)) 365 if (!new_valid_dev(rdev))
338 return -EINVAL; 366 return -EINVAL;
@@ -346,7 +374,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
346 init_special_inode(inode, inode->i_mode, rdev); 374 init_special_inode(inode, inode->i_mode, rdev);
347 inode->i_op = &f2fs_special_inode_operations; 375 inode->i_op = &f2fs_special_inode_operations;
348 376
377 ilock = mutex_lock_op(sbi);
349 err = f2fs_add_link(dentry, inode); 378 err = f2fs_add_link(dentry, inode);
379 mutex_unlock_op(sbi, ilock);
350 if (err) 380 if (err)
351 goto out; 381 goto out;
352 382
@@ -357,6 +387,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
357out: 387out:
358 clear_nlink(inode); 388 clear_nlink(inode);
359 unlock_new_inode(inode); 389 unlock_new_inode(inode);
390 make_bad_inode(inode);
360 iput(inode); 391 iput(inode);
361 alloc_nid_failed(sbi, inode->i_ino); 392 alloc_nid_failed(sbi, inode->i_ino);
362 return err; 393 return err;
@@ -374,7 +405,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
374 struct f2fs_dir_entry *old_dir_entry = NULL; 405 struct f2fs_dir_entry *old_dir_entry = NULL;
375 struct f2fs_dir_entry *old_entry; 406 struct f2fs_dir_entry *old_entry;
376 struct f2fs_dir_entry *new_entry; 407 struct f2fs_dir_entry *new_entry;
377 int err = -ENOENT; 408 int err = -ENOENT, ilock = -1;
378 409
379 f2fs_balance_fs(sbi); 410 f2fs_balance_fs(sbi);
380 411
@@ -389,7 +420,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
389 goto out_old; 420 goto out_old;
390 } 421 }
391 422
392 mutex_lock_op(sbi, RENAME); 423 ilock = mutex_lock_op(sbi);
393 424
394 if (new_inode) { 425 if (new_inode) {
395 struct page *new_page; 426 struct page *new_page;
@@ -412,7 +443,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
412 drop_nlink(new_inode); 443 drop_nlink(new_inode);
413 if (!new_inode->i_nlink) 444 if (!new_inode->i_nlink)
414 add_orphan_inode(sbi, new_inode->i_ino); 445 add_orphan_inode(sbi, new_inode->i_ino);
415 f2fs_write_inode(new_inode, NULL); 446 update_inode_page(new_inode);
416 } else { 447 } else {
417 err = f2fs_add_link(new_dentry, old_inode); 448 err = f2fs_add_link(new_dentry, old_inode);
418 if (err) 449 if (err)
@@ -420,12 +451,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
420 451
421 if (old_dir_entry) { 452 if (old_dir_entry) {
422 inc_nlink(new_dir); 453 inc_nlink(new_dir);
423 f2fs_write_inode(new_dir, NULL); 454 update_inode_page(new_dir);
424 } 455 }
425 } 456 }
426 457
427 old_inode->i_ctime = CURRENT_TIME; 458 old_inode->i_ctime = CURRENT_TIME;
428 set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
429 mark_inode_dirty(old_inode); 459 mark_inode_dirty(old_inode);
430 460
431 f2fs_delete_entry(old_entry, old_page, NULL); 461 f2fs_delete_entry(old_entry, old_page, NULL);
@@ -439,10 +469,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
439 f2fs_put_page(old_dir_page, 0); 469 f2fs_put_page(old_dir_page, 0);
440 } 470 }
441 drop_nlink(old_dir); 471 drop_nlink(old_dir);
442 f2fs_write_inode(old_dir, NULL); 472 update_inode_page(old_dir);
443 } 473 }
444 474
445 mutex_unlock_op(sbi, RENAME); 475 mutex_unlock_op(sbi, ilock);
446 return 0; 476 return 0;
447 477
448out_dir: 478out_dir:
@@ -450,7 +480,7 @@ out_dir:
450 kunmap(old_dir_page); 480 kunmap(old_dir_page);
451 f2fs_put_page(old_dir_page, 0); 481 f2fs_put_page(old_dir_page, 0);
452 } 482 }
453 mutex_unlock_op(sbi, RENAME); 483 mutex_unlock_op(sbi, ilock);
454out_old: 484out_old:
455 kunmap(old_page); 485 kunmap(old_page);
456 f2fs_put_page(old_page, 0); 486 f2fs_put_page(old_page, 0);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e275218904ed..3df43b4efd89 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -19,6 +19,7 @@
19#include "f2fs.h" 19#include "f2fs.h"
20#include "node.h" 20#include "node.h"
21#include "segment.h" 21#include "segment.h"
22#include <trace/events/f2fs.h>
22 23
23static struct kmem_cache *nat_entry_slab; 24static struct kmem_cache *nat_entry_slab;
24static struct kmem_cache *free_nid_slab; 25static struct kmem_cache *free_nid_slab;
@@ -88,10 +89,13 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
88{ 89{
89 struct address_space *mapping = sbi->meta_inode->i_mapping; 90 struct address_space *mapping = sbi->meta_inode->i_mapping;
90 struct f2fs_nm_info *nm_i = NM_I(sbi); 91 struct f2fs_nm_info *nm_i = NM_I(sbi);
92 struct blk_plug plug;
91 struct page *page; 93 struct page *page;
92 pgoff_t index; 94 pgoff_t index;
93 int i; 95 int i;
94 96
97 blk_start_plug(&plug);
98
95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { 99 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
96 if (nid >= nm_i->max_nid) 100 if (nid >= nm_i->max_nid)
97 nid = 0; 101 nid = 0;
@@ -100,12 +104,16 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
100 page = grab_cache_page(mapping, index); 104 page = grab_cache_page(mapping, index);
101 if (!page) 105 if (!page)
102 continue; 106 continue;
103 if (f2fs_readpage(sbi, page, index, READ)) { 107 if (PageUptodate(page)) {
104 f2fs_put_page(page, 1); 108 f2fs_put_page(page, 1);
105 continue; 109 continue;
106 } 110 }
111 if (f2fs_readpage(sbi, page, index, READ))
112 continue;
113
107 f2fs_put_page(page, 0); 114 f2fs_put_page(page, 0);
108 } 115 }
116 blk_finish_plug(&plug);
109} 117}
110 118
111static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 119static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
@@ -236,7 +244,7 @@ static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
236{ 244{
237 struct f2fs_nm_info *nm_i = NM_I(sbi); 245 struct f2fs_nm_info *nm_i = NM_I(sbi);
238 246
239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD) 247 if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
240 return 0; 248 return 0;
241 249
242 write_lock(&nm_i->nat_tree_lock); 250 write_lock(&nm_i->nat_tree_lock);
@@ -320,15 +328,14 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
320 noffset[0] = 0; 328 noffset[0] = 0;
321 329
322 if (block < direct_index) { 330 if (block < direct_index) {
323 offset[n++] = block; 331 offset[n] = block;
324 level = 0;
325 goto got; 332 goto got;
326 } 333 }
327 block -= direct_index; 334 block -= direct_index;
328 if (block < direct_blks) { 335 if (block < direct_blks) {
329 offset[n++] = NODE_DIR1_BLOCK; 336 offset[n++] = NODE_DIR1_BLOCK;
330 noffset[n] = 1; 337 noffset[n] = 1;
331 offset[n++] = block; 338 offset[n] = block;
332 level = 1; 339 level = 1;
333 goto got; 340 goto got;
334 } 341 }
@@ -336,7 +343,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
336 if (block < direct_blks) { 343 if (block < direct_blks) {
337 offset[n++] = NODE_DIR2_BLOCK; 344 offset[n++] = NODE_DIR2_BLOCK;
338 noffset[n] = 2; 345 noffset[n] = 2;
339 offset[n++] = block; 346 offset[n] = block;
340 level = 1; 347 level = 1;
341 goto got; 348 goto got;
342 } 349 }
@@ -346,7 +353,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
346 noffset[n] = 3; 353 noffset[n] = 3;
347 offset[n++] = block / direct_blks; 354 offset[n++] = block / direct_blks;
348 noffset[n] = 4 + offset[n - 1]; 355 noffset[n] = 4 + offset[n - 1];
349 offset[n++] = block % direct_blks; 356 offset[n] = block % direct_blks;
350 level = 2; 357 level = 2;
351 goto got; 358 goto got;
352 } 359 }
@@ -356,7 +363,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
356 noffset[n] = 4 + dptrs_per_blk; 363 noffset[n] = 4 + dptrs_per_blk;
357 offset[n++] = block / direct_blks; 364 offset[n++] = block / direct_blks;
358 noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; 365 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
359 offset[n++] = block % direct_blks; 366 offset[n] = block % direct_blks;
360 level = 2; 367 level = 2;
361 goto got; 368 goto got;
362 } 369 }
@@ -371,7 +378,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4])
371 noffset[n] = 7 + (dptrs_per_blk * 2) + 378 noffset[n] = 7 + (dptrs_per_blk * 2) +
372 offset[n - 2] * (dptrs_per_blk + 1) + 379 offset[n - 2] * (dptrs_per_blk + 1) +
373 offset[n - 1]; 380 offset[n - 1];
374 offset[n++] = block % direct_blks; 381 offset[n] = block % direct_blks;
375 level = 3; 382 level = 3;
376 goto got; 383 goto got;
377 } else { 384 } else {
@@ -383,8 +390,11 @@ got:
383 390
384/* 391/*
385 * Caller should call f2fs_put_dnode(dn). 392 * Caller should call f2fs_put_dnode(dn).
393 * Also, it should grab and release a mutex by calling mutex_lock_op() and
394 * mutex_unlock_op() only if ro is not set RDONLY_NODE.
395 * In the case of RDONLY_NODE, we don't need to care about mutex.
386 */ 396 */
387int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) 397int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
388{ 398{
389 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 399 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
390 struct page *npage[4]; 400 struct page *npage[4];
@@ -403,7 +413,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
403 return PTR_ERR(npage[0]); 413 return PTR_ERR(npage[0]);
404 414
405 parent = npage[0]; 415 parent = npage[0];
406 nids[1] = get_nid(parent, offset[0], true); 416 if (level != 0)
417 nids[1] = get_nid(parent, offset[0], true);
407 dn->inode_page = npage[0]; 418 dn->inode_page = npage[0];
408 dn->inode_page_locked = true; 419 dn->inode_page_locked = true;
409 420
@@ -411,12 +422,9 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
411 for (i = 1; i <= level; i++) { 422 for (i = 1; i <= level; i++) {
412 bool done = false; 423 bool done = false;
413 424
414 if (!nids[i] && !ro) { 425 if (!nids[i] && mode == ALLOC_NODE) {
415 mutex_lock_op(sbi, NODE_NEW);
416
417 /* alloc new node */ 426 /* alloc new node */
418 if (!alloc_nid(sbi, &(nids[i]))) { 427 if (!alloc_nid(sbi, &(nids[i]))) {
419 mutex_unlock_op(sbi, NODE_NEW);
420 err = -ENOSPC; 428 err = -ENOSPC;
421 goto release_pages; 429 goto release_pages;
422 } 430 }
@@ -425,16 +433,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
425 npage[i] = new_node_page(dn, noffset[i]); 433 npage[i] = new_node_page(dn, noffset[i]);
426 if (IS_ERR(npage[i])) { 434 if (IS_ERR(npage[i])) {
427 alloc_nid_failed(sbi, nids[i]); 435 alloc_nid_failed(sbi, nids[i]);
428 mutex_unlock_op(sbi, NODE_NEW);
429 err = PTR_ERR(npage[i]); 436 err = PTR_ERR(npage[i]);
430 goto release_pages; 437 goto release_pages;
431 } 438 }
432 439
433 set_nid(parent, offset[i - 1], nids[i], i == 1); 440 set_nid(parent, offset[i - 1], nids[i], i == 1);
434 alloc_nid_done(sbi, nids[i]); 441 alloc_nid_done(sbi, nids[i]);
435 mutex_unlock_op(sbi, NODE_NEW);
436 done = true; 442 done = true;
437 } else if (ro && i == level && level > 1) { 443 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
438 npage[i] = get_node_page_ra(parent, offset[i - 1]); 444 npage[i] = get_node_page_ra(parent, offset[i - 1]);
439 if (IS_ERR(npage[i])) { 445 if (IS_ERR(npage[i])) {
440 err = PTR_ERR(npage[i]); 446 err = PTR_ERR(npage[i]);
@@ -507,6 +513,7 @@ invalidate:
507 513
508 f2fs_put_page(dn->node_page, 1); 514 f2fs_put_page(dn->node_page, 1);
509 dn->node_page = NULL; 515 dn->node_page = NULL;
516 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
510} 517}
511 518
512static int truncate_dnode(struct dnode_of_data *dn) 519static int truncate_dnode(struct dnode_of_data *dn)
@@ -547,9 +554,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
547 if (dn->nid == 0) 554 if (dn->nid == 0)
548 return NIDS_PER_BLOCK + 1; 555 return NIDS_PER_BLOCK + 1;
549 556
557 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
558
550 page = get_node_page(sbi, dn->nid); 559 page = get_node_page(sbi, dn->nid);
551 if (IS_ERR(page)) 560 if (IS_ERR(page)) {
561 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
552 return PTR_ERR(page); 562 return PTR_ERR(page);
563 }
553 564
554 rn = (struct f2fs_node *)page_address(page); 565 rn = (struct f2fs_node *)page_address(page);
555 if (depth < 3) { 566 if (depth < 3) {
@@ -591,10 +602,12 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
591 } else { 602 } else {
592 f2fs_put_page(page, 1); 603 f2fs_put_page(page, 1);
593 } 604 }
605 trace_f2fs_truncate_nodes_exit(dn->inode, freed);
594 return freed; 606 return freed;
595 607
596out_err: 608out_err:
597 f2fs_put_page(page, 1); 609 f2fs_put_page(page, 1);
610 trace_f2fs_truncate_nodes_exit(dn->inode, ret);
598 return ret; 611 return ret;
599} 612}
600 613
@@ -649,6 +662,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
649fail: 662fail:
650 for (i = depth - 3; i >= 0; i--) 663 for (i = depth - 3; i >= 0; i--)
651 f2fs_put_page(pages[i], 1); 664 f2fs_put_page(pages[i], 1);
665
666 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
667
652 return err; 668 return err;
653} 669}
654 670
@@ -658,6 +674,7 @@ fail:
658int truncate_inode_blocks(struct inode *inode, pgoff_t from) 674int truncate_inode_blocks(struct inode *inode, pgoff_t from)
659{ 675{
660 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 676 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
677 struct address_space *node_mapping = sbi->node_inode->i_mapping;
661 int err = 0, cont = 1; 678 int err = 0, cont = 1;
662 int level, offset[4], noffset[4]; 679 int level, offset[4], noffset[4];
663 unsigned int nofs = 0; 680 unsigned int nofs = 0;
@@ -665,11 +682,15 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
665 struct dnode_of_data dn; 682 struct dnode_of_data dn;
666 struct page *page; 683 struct page *page;
667 684
668 level = get_node_path(from, offset, noffset); 685 trace_f2fs_truncate_inode_blocks_enter(inode, from);
669 686
687 level = get_node_path(from, offset, noffset);
688restart:
670 page = get_node_page(sbi, inode->i_ino); 689 page = get_node_page(sbi, inode->i_ino);
671 if (IS_ERR(page)) 690 if (IS_ERR(page)) {
691 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
672 return PTR_ERR(page); 692 return PTR_ERR(page);
693 }
673 694
674 set_new_dnode(&dn, inode, page, NULL, 0); 695 set_new_dnode(&dn, inode, page, NULL, 0);
675 unlock_page(page); 696 unlock_page(page);
@@ -728,6 +749,10 @@ skip_partial:
728 if (offset[1] == 0 && 749 if (offset[1] == 0 &&
729 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { 750 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
730 lock_page(page); 751 lock_page(page);
752 if (page->mapping != node_mapping) {
753 f2fs_put_page(page, 1);
754 goto restart;
755 }
731 wait_on_page_writeback(page); 756 wait_on_page_writeback(page);
732 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 757 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
733 set_page_dirty(page); 758 set_page_dirty(page);
@@ -739,9 +764,14 @@ skip_partial:
739 } 764 }
740fail: 765fail:
741 f2fs_put_page(page, 0); 766 f2fs_put_page(page, 0);
767 trace_f2fs_truncate_inode_blocks_exit(inode, err);
742 return err > 0 ? 0 : err; 768 return err > 0 ? 0 : err;
743} 769}
744 770
771/*
772 * Caller should grab and release a mutex by calling mutex_lock_op() and
773 * mutex_unlock_op().
774 */
745int remove_inode_page(struct inode *inode) 775int remove_inode_page(struct inode *inode)
746{ 776{
747 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 777 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -749,21 +779,16 @@ int remove_inode_page(struct inode *inode)
749 nid_t ino = inode->i_ino; 779 nid_t ino = inode->i_ino;
750 struct dnode_of_data dn; 780 struct dnode_of_data dn;
751 781
752 mutex_lock_op(sbi, NODE_TRUNC);
753 page = get_node_page(sbi, ino); 782 page = get_node_page(sbi, ino);
754 if (IS_ERR(page)) { 783 if (IS_ERR(page))
755 mutex_unlock_op(sbi, NODE_TRUNC);
756 return PTR_ERR(page); 784 return PTR_ERR(page);
757 }
758 785
759 if (F2FS_I(inode)->i_xattr_nid) { 786 if (F2FS_I(inode)->i_xattr_nid) {
760 nid_t nid = F2FS_I(inode)->i_xattr_nid; 787 nid_t nid = F2FS_I(inode)->i_xattr_nid;
761 struct page *npage = get_node_page(sbi, nid); 788 struct page *npage = get_node_page(sbi, nid);
762 789
763 if (IS_ERR(npage)) { 790 if (IS_ERR(npage))
764 mutex_unlock_op(sbi, NODE_TRUNC);
765 return PTR_ERR(npage); 791 return PTR_ERR(npage);
766 }
767 792
768 F2FS_I(inode)->i_xattr_nid = 0; 793 F2FS_I(inode)->i_xattr_nid = 0;
769 set_new_dnode(&dn, inode, page, npage, nid); 794 set_new_dnode(&dn, inode, page, npage, nid);
@@ -775,23 +800,18 @@ int remove_inode_page(struct inode *inode)
775 BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); 800 BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
776 set_new_dnode(&dn, inode, page, page, ino); 801 set_new_dnode(&dn, inode, page, page, ino);
777 truncate_node(&dn); 802 truncate_node(&dn);
778
779 mutex_unlock_op(sbi, NODE_TRUNC);
780 return 0; 803 return 0;
781} 804}
782 805
783int new_inode_page(struct inode *inode, const struct qstr *name) 806int new_inode_page(struct inode *inode, const struct qstr *name)
784{ 807{
785 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
786 struct page *page; 808 struct page *page;
787 struct dnode_of_data dn; 809 struct dnode_of_data dn;
788 810
789 /* allocate inode page for new inode */ 811 /* allocate inode page for new inode */
790 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 812 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
791 mutex_lock_op(sbi, NODE_NEW);
792 page = new_node_page(&dn, 0); 813 page = new_node_page(&dn, 0);
793 init_dent_inode(name, page); 814 init_dent_inode(name, page);
794 mutex_unlock_op(sbi, NODE_NEW);
795 if (IS_ERR(page)) 815 if (IS_ERR(page))
796 return PTR_ERR(page); 816 return PTR_ERR(page);
797 f2fs_put_page(page, 1); 817 f2fs_put_page(page, 1);
@@ -844,6 +864,12 @@ fail:
844 return ERR_PTR(err); 864 return ERR_PTR(err);
845} 865}
846 866
867/*
868 * Caller should do after getting the following values.
869 * 0: f2fs_put_page(page, 0)
870 * LOCKED_PAGE: f2fs_put_page(page, 1)
871 * error: nothing
872 */
847static int read_node_page(struct page *page, int type) 873static int read_node_page(struct page *page, int type)
848{ 874{
849 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 875 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
@@ -851,8 +877,14 @@ static int read_node_page(struct page *page, int type)
851 877
852 get_node_info(sbi, page->index, &ni); 878 get_node_info(sbi, page->index, &ni);
853 879
854 if (ni.blk_addr == NULL_ADDR) 880 if (ni.blk_addr == NULL_ADDR) {
881 f2fs_put_page(page, 1);
855 return -ENOENT; 882 return -ENOENT;
883 }
884
885 if (PageUptodate(page))
886 return LOCKED_PAGE;
887
856 return f2fs_readpage(sbi, page, ni.blk_addr, type); 888 return f2fs_readpage(sbi, page, ni.blk_addr, type);
857} 889}
858 890
@@ -863,40 +895,53 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
863{ 895{
864 struct address_space *mapping = sbi->node_inode->i_mapping; 896 struct address_space *mapping = sbi->node_inode->i_mapping;
865 struct page *apage; 897 struct page *apage;
898 int err;
866 899
867 apage = find_get_page(mapping, nid); 900 apage = find_get_page(mapping, nid);
868 if (apage && PageUptodate(apage)) 901 if (apage && PageUptodate(apage)) {
869 goto release_out; 902 f2fs_put_page(apage, 0);
903 return;
904 }
870 f2fs_put_page(apage, 0); 905 f2fs_put_page(apage, 0);
871 906
872 apage = grab_cache_page(mapping, nid); 907 apage = grab_cache_page(mapping, nid);
873 if (!apage) 908 if (!apage)
874 return; 909 return;
875 910
876 if (read_node_page(apage, READA)) 911 err = read_node_page(apage, READA);
877 unlock_page(apage); 912 if (err == 0)
878 913 f2fs_put_page(apage, 0);
879release_out: 914 else if (err == LOCKED_PAGE)
880 f2fs_put_page(apage, 0); 915 f2fs_put_page(apage, 1);
881 return; 916 return;
882} 917}
883 918
884struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 919struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
885{ 920{
886 int err;
887 struct page *page;
888 struct address_space *mapping = sbi->node_inode->i_mapping; 921 struct address_space *mapping = sbi->node_inode->i_mapping;
889 922 struct page *page;
923 int err;
924repeat:
890 page = grab_cache_page(mapping, nid); 925 page = grab_cache_page(mapping, nid);
891 if (!page) 926 if (!page)
892 return ERR_PTR(-ENOMEM); 927 return ERR_PTR(-ENOMEM);
893 928
894 err = read_node_page(page, READ_SYNC); 929 err = read_node_page(page, READ_SYNC);
895 if (err) { 930 if (err < 0)
896 f2fs_put_page(page, 1);
897 return ERR_PTR(err); 931 return ERR_PTR(err);
898 } 932 else if (err == LOCKED_PAGE)
933 goto got_it;
899 934
935 lock_page(page);
936 if (!PageUptodate(page)) {
937 f2fs_put_page(page, 1);
938 return ERR_PTR(-EIO);
939 }
940 if (page->mapping != mapping) {
941 f2fs_put_page(page, 1);
942 goto repeat;
943 }
944got_it:
900 BUG_ON(nid != nid_of_node(page)); 945 BUG_ON(nid != nid_of_node(page));
901 mark_page_accessed(page); 946 mark_page_accessed(page);
902 return page; 947 return page;
@@ -910,31 +955,27 @@ struct page *get_node_page_ra(struct page *parent, int start)
910{ 955{
911 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 956 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
912 struct address_space *mapping = sbi->node_inode->i_mapping; 957 struct address_space *mapping = sbi->node_inode->i_mapping;
913 int i, end; 958 struct blk_plug plug;
914 int err = 0;
915 nid_t nid;
916 struct page *page; 959 struct page *page;
960 int err, i, end;
961 nid_t nid;
917 962
918 /* First, try getting the desired direct node. */ 963 /* First, try getting the desired direct node. */
919 nid = get_nid(parent, start, false); 964 nid = get_nid(parent, start, false);
920 if (!nid) 965 if (!nid)
921 return ERR_PTR(-ENOENT); 966 return ERR_PTR(-ENOENT);
922
923 page = find_get_page(mapping, nid);
924 if (page && PageUptodate(page))
925 goto page_hit;
926 f2fs_put_page(page, 0);
927
928repeat: 967repeat:
929 page = grab_cache_page(mapping, nid); 968 page = grab_cache_page(mapping, nid);
930 if (!page) 969 if (!page)
931 return ERR_PTR(-ENOMEM); 970 return ERR_PTR(-ENOMEM);
932 971
933 err = read_node_page(page, READA); 972 err = read_node_page(page, READ_SYNC);
934 if (err) { 973 if (err < 0)
935 f2fs_put_page(page, 1);
936 return ERR_PTR(err); 974 return ERR_PTR(err);
937 } 975 else if (err == LOCKED_PAGE)
976 goto page_hit;
977
978 blk_start_plug(&plug);
938 979
939 /* Then, try readahead for siblings of the desired node */ 980 /* Then, try readahead for siblings of the desired node */
940 end = start + MAX_RA_NODE; 981 end = start + MAX_RA_NODE;
@@ -946,18 +987,19 @@ repeat:
946 ra_node_page(sbi, nid); 987 ra_node_page(sbi, nid);
947 } 988 }
948 989
949page_hit: 990 blk_finish_plug(&plug);
950 lock_page(page);
951 if (PageError(page)) {
952 f2fs_put_page(page, 1);
953 return ERR_PTR(-EIO);
954 }
955 991
956 /* Has the page been truncated? */ 992 lock_page(page);
957 if (page->mapping != mapping) { 993 if (page->mapping != mapping) {
958 f2fs_put_page(page, 1); 994 f2fs_put_page(page, 1);
959 goto repeat; 995 goto repeat;
960 } 996 }
997page_hit:
998 if (!PageUptodate(page)) {
999 f2fs_put_page(page, 1);
1000 return ERR_PTR(-EIO);
1001 }
1002 mark_page_accessed(page);
961 return page; 1003 return page;
962} 1004}
963 1005
@@ -972,7 +1014,7 @@ void sync_inode_page(struct dnode_of_data *dn)
972 if (!dn->inode_page_locked) 1014 if (!dn->inode_page_locked)
973 unlock_page(dn->inode_page); 1015 unlock_page(dn->inode_page);
974 } else { 1016 } else {
975 f2fs_write_inode(dn->inode, NULL); 1017 update_inode_page(dn->inode);
976 } 1018 }
977} 1019}
978 1020
@@ -1087,17 +1129,8 @@ static int f2fs_write_node_page(struct page *page,
1087 block_t new_addr; 1129 block_t new_addr;
1088 struct node_info ni; 1130 struct node_info ni;
1089 1131
1090 if (wbc->for_reclaim) {
1091 dec_page_count(sbi, F2FS_DIRTY_NODES);
1092 wbc->pages_skipped++;
1093 set_page_dirty(page);
1094 return AOP_WRITEPAGE_ACTIVATE;
1095 }
1096
1097 wait_on_page_writeback(page); 1132 wait_on_page_writeback(page);
1098 1133
1099 mutex_lock_op(sbi, NODE_WRITE);
1100
1101 /* get old block addr of this node page */ 1134 /* get old block addr of this node page */
1102 nid = nid_of_node(page); 1135 nid = nid_of_node(page);
1103 BUG_ON(page->index != nid); 1136 BUG_ON(page->index != nid);
@@ -1105,17 +1138,25 @@ static int f2fs_write_node_page(struct page *page,
1105 get_node_info(sbi, nid, &ni); 1138 get_node_info(sbi, nid, &ni);
1106 1139
1107 /* This page is already truncated */ 1140 /* This page is already truncated */
1108 if (ni.blk_addr == NULL_ADDR) 1141 if (ni.blk_addr == NULL_ADDR) {
1142 dec_page_count(sbi, F2FS_DIRTY_NODES);
1143 unlock_page(page);
1109 return 0; 1144 return 0;
1145 }
1110 1146
1111 set_page_writeback(page); 1147 if (wbc->for_reclaim) {
1148 dec_page_count(sbi, F2FS_DIRTY_NODES);
1149 wbc->pages_skipped++;
1150 set_page_dirty(page);
1151 return AOP_WRITEPAGE_ACTIVATE;
1152 }
1112 1153
1113 /* insert node offset */ 1154 mutex_lock(&sbi->node_write);
1155 set_page_writeback(page);
1114 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); 1156 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
1115 set_node_addr(sbi, &ni, new_addr); 1157 set_node_addr(sbi, &ni, new_addr);
1116 dec_page_count(sbi, F2FS_DIRTY_NODES); 1158 dec_page_count(sbi, F2FS_DIRTY_NODES);
1117 1159 mutex_unlock(&sbi->node_write);
1118 mutex_unlock_op(sbi, NODE_WRITE);
1119 unlock_page(page); 1160 unlock_page(page);
1120 return 0; 1161 return 0;
1121} 1162}
@@ -1130,12 +1171,11 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1130 struct writeback_control *wbc) 1171 struct writeback_control *wbc)
1131{ 1172{
1132 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1173 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1133 struct block_device *bdev = sbi->sb->s_bdev;
1134 long nr_to_write = wbc->nr_to_write; 1174 long nr_to_write = wbc->nr_to_write;
1135 1175
1136 /* First check balancing cached NAT entries */ 1176 /* First check balancing cached NAT entries */
1137 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1177 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1138 write_checkpoint(sbi, false); 1178 f2fs_sync_fs(sbi->sb, true);
1139 return 0; 1179 return 0;
1140 } 1180 }
1141 1181
@@ -1144,10 +1184,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1144 return 0; 1184 return 0;
1145 1185
1146 /* if mounting is failed, skip writing node pages */ 1186 /* if mounting is failed, skip writing node pages */
1147 wbc->nr_to_write = bio_get_nr_vecs(bdev); 1187 wbc->nr_to_write = max_hw_blocks(sbi);
1148 sync_node_pages(sbi, 0, wbc); 1188 sync_node_pages(sbi, 0, wbc);
1149 wbc->nr_to_write = nr_to_write - 1189 wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write);
1150 (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
1151 return 0; 1190 return 0;
1152} 1191}
1153 1192
@@ -1178,7 +1217,7 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
1178static int f2fs_release_node_page(struct page *page, gfp_t wait) 1217static int f2fs_release_node_page(struct page *page, gfp_t wait)
1179{ 1218{
1180 ClearPagePrivate(page); 1219 ClearPagePrivate(page);
1181 return 0; 1220 return 1;
1182} 1221}
1183 1222
1184/* 1223/*
@@ -1195,14 +1234,13 @@ const struct address_space_operations f2fs_node_aops = {
1195static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) 1234static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
1196{ 1235{
1197 struct list_head *this; 1236 struct list_head *this;
1198 struct free_nid *i = NULL; 1237 struct free_nid *i;
1199 list_for_each(this, head) { 1238 list_for_each(this, head) {
1200 i = list_entry(this, struct free_nid, list); 1239 i = list_entry(this, struct free_nid, list);
1201 if (i->nid == n) 1240 if (i->nid == n)
1202 break; 1241 return i;
1203 i = NULL;
1204 } 1242 }
1205 return i; 1243 return NULL;
1206} 1244}
1207 1245
1208static void __del_from_free_nid_list(struct free_nid *i) 1246static void __del_from_free_nid_list(struct free_nid *i)
@@ -1211,11 +1249,29 @@ static void __del_from_free_nid_list(struct free_nid *i)
1211 kmem_cache_free(free_nid_slab, i); 1249 kmem_cache_free(free_nid_slab, i);
1212} 1250}
1213 1251
1214static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1252static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1215{ 1253{
1216 struct free_nid *i; 1254 struct free_nid *i;
1255 struct nat_entry *ne;
1256 bool allocated = false;
1217 1257
1218 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) 1258 if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
1259 return -1;
1260
1261 /* 0 nid should not be used */
1262 if (nid == 0)
1263 return 0;
1264
1265 if (!build)
1266 goto retry;
1267
1268 /* do not add allocated nids */
1269 read_lock(&nm_i->nat_tree_lock);
1270 ne = __lookup_nat_cache(nm_i, nid);
1271 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1272 allocated = true;
1273 read_unlock(&nm_i->nat_tree_lock);
1274 if (allocated)
1219 return 0; 1275 return 0;
1220retry: 1276retry:
1221 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1277 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1250,63 +1306,59 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1250 spin_unlock(&nm_i->free_nid_list_lock); 1306 spin_unlock(&nm_i->free_nid_list_lock);
1251} 1307}
1252 1308
1253static int scan_nat_page(struct f2fs_nm_info *nm_i, 1309static void scan_nat_page(struct f2fs_nm_info *nm_i,
1254 struct page *nat_page, nid_t start_nid) 1310 struct page *nat_page, nid_t start_nid)
1255{ 1311{
1256 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1312 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1257 block_t blk_addr; 1313 block_t blk_addr;
1258 int fcnt = 0;
1259 int i; 1314 int i;
1260 1315
1261 /* 0 nid should not be used */
1262 if (start_nid == 0)
1263 ++start_nid;
1264
1265 i = start_nid % NAT_ENTRY_PER_BLOCK; 1316 i = start_nid % NAT_ENTRY_PER_BLOCK;
1266 1317
1267 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1318 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1268 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1319
1320 if (start_nid >= nm_i->max_nid)
1321 break;
1322
1323 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1269 BUG_ON(blk_addr == NEW_ADDR); 1324 BUG_ON(blk_addr == NEW_ADDR);
1270 if (blk_addr == NULL_ADDR) 1325 if (blk_addr == NULL_ADDR) {
1271 fcnt += add_free_nid(nm_i, start_nid); 1326 if (add_free_nid(nm_i, start_nid, true) < 0)
1327 break;
1328 }
1272 } 1329 }
1273 return fcnt;
1274} 1330}
1275 1331
1276static void build_free_nids(struct f2fs_sb_info *sbi) 1332static void build_free_nids(struct f2fs_sb_info *sbi)
1277{ 1333{
1278 struct free_nid *fnid, *next_fnid;
1279 struct f2fs_nm_info *nm_i = NM_I(sbi); 1334 struct f2fs_nm_info *nm_i = NM_I(sbi);
1280 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1335 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1281 struct f2fs_summary_block *sum = curseg->sum_blk; 1336 struct f2fs_summary_block *sum = curseg->sum_blk;
1282 nid_t nid = 0; 1337 int i = 0;
1283 bool is_cycled = false; 1338 nid_t nid = nm_i->next_scan_nid;
1284 int fcnt = 0;
1285 int i;
1286 1339
1287 nid = nm_i->next_scan_nid; 1340 /* Enough entries */
1288 nm_i->init_scan_nid = nid; 1341 if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
1342 return;
1289 1343
1344 /* readahead nat pages to be scanned */
1290 ra_nat_pages(sbi, nid); 1345 ra_nat_pages(sbi, nid);
1291 1346
1292 while (1) { 1347 while (1) {
1293 struct page *page = get_current_nat_page(sbi, nid); 1348 struct page *page = get_current_nat_page(sbi, nid);
1294 1349
1295 fcnt += scan_nat_page(nm_i, page, nid); 1350 scan_nat_page(nm_i, page, nid);
1296 f2fs_put_page(page, 1); 1351 f2fs_put_page(page, 1);
1297 1352
1298 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1353 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1299 1354 if (nid >= nm_i->max_nid)
1300 if (nid >= nm_i->max_nid) {
1301 nid = 0; 1355 nid = 0;
1302 is_cycled = true; 1356
1303 } 1357 if (i++ == FREE_NID_PAGES)
1304 if (fcnt > MAX_FREE_NIDS)
1305 break;
1306 if (is_cycled && nm_i->init_scan_nid <= nid)
1307 break; 1358 break;
1308 } 1359 }
1309 1360
1361 /* go to the next free nat pages to find free nids abundantly */
1310 nm_i->next_scan_nid = nid; 1362 nm_i->next_scan_nid = nid;
1311 1363
1312 /* find free nids from current sum_pages */ 1364 /* find free nids from current sum_pages */
@@ -1315,22 +1367,11 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1315 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); 1367 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1316 nid = le32_to_cpu(nid_in_journal(sum, i)); 1368 nid = le32_to_cpu(nid_in_journal(sum, i));
1317 if (addr == NULL_ADDR) 1369 if (addr == NULL_ADDR)
1318 add_free_nid(nm_i, nid); 1370 add_free_nid(nm_i, nid, true);
1319 else 1371 else
1320 remove_free_nid(nm_i, nid); 1372 remove_free_nid(nm_i, nid);
1321 } 1373 }
1322 mutex_unlock(&curseg->curseg_mutex); 1374 mutex_unlock(&curseg->curseg_mutex);
1323
1324 /* remove the free nids from current allocated nids */
1325 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
1326 struct nat_entry *ne;
1327
1328 read_lock(&nm_i->nat_tree_lock);
1329 ne = __lookup_nat_cache(nm_i, fnid->nid);
1330 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1331 remove_free_nid(nm_i, fnid->nid);
1332 read_unlock(&nm_i->nat_tree_lock);
1333 }
1334} 1375}
1335 1376
1336/* 1377/*
@@ -1344,41 +1385,36 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1344 struct free_nid *i = NULL; 1385 struct free_nid *i = NULL;
1345 struct list_head *this; 1386 struct list_head *this;
1346retry: 1387retry:
1347 mutex_lock(&nm_i->build_lock); 1388 if (sbi->total_valid_node_count + 1 >= nm_i->max_nid)
1348 if (!nm_i->fcnt) { 1389 return false;
1349 /* scan NAT in order to build free nid list */
1350 build_free_nids(sbi);
1351 if (!nm_i->fcnt) {
1352 mutex_unlock(&nm_i->build_lock);
1353 return false;
1354 }
1355 }
1356 mutex_unlock(&nm_i->build_lock);
1357 1390
1358 /*
1359 * We check fcnt again since previous check is racy as
1360 * we didn't hold free_nid_list_lock. So other thread
1361 * could consume all of free nids.
1362 */
1363 spin_lock(&nm_i->free_nid_list_lock); 1391 spin_lock(&nm_i->free_nid_list_lock);
1364 if (!nm_i->fcnt) {
1365 spin_unlock(&nm_i->free_nid_list_lock);
1366 goto retry;
1367 }
1368 1392
1369 BUG_ON(list_empty(&nm_i->free_nid_list)); 1393 /* We should not use stale free nids created by build_free_nids */
1370 list_for_each(this, &nm_i->free_nid_list) { 1394 if (nm_i->fcnt && !sbi->on_build_free_nids) {
1371 i = list_entry(this, struct free_nid, list); 1395 BUG_ON(list_empty(&nm_i->free_nid_list));
1372 if (i->state == NID_NEW) 1396 list_for_each(this, &nm_i->free_nid_list) {
1373 break; 1397 i = list_entry(this, struct free_nid, list);
1374 } 1398 if (i->state == NID_NEW)
1399 break;
1400 }
1375 1401
1376 BUG_ON(i->state != NID_NEW); 1402 BUG_ON(i->state != NID_NEW);
1377 *nid = i->nid; 1403 *nid = i->nid;
1378 i->state = NID_ALLOC; 1404 i->state = NID_ALLOC;
1379 nm_i->fcnt--; 1405 nm_i->fcnt--;
1406 spin_unlock(&nm_i->free_nid_list_lock);
1407 return true;
1408 }
1380 spin_unlock(&nm_i->free_nid_list_lock); 1409 spin_unlock(&nm_i->free_nid_list_lock);
1381 return true; 1410
1411 /* Let's scan nat pages and its caches to get free nids */
1412 mutex_lock(&nm_i->build_lock);
1413 sbi->on_build_free_nids = 1;
1414 build_free_nids(sbi);
1415 sbi->on_build_free_nids = 0;
1416 mutex_unlock(&nm_i->build_lock);
1417 goto retry;
1382} 1418}
1383 1419
1384/* 1420/*
@@ -1391,10 +1427,8 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1391 1427
1392 spin_lock(&nm_i->free_nid_list_lock); 1428 spin_lock(&nm_i->free_nid_list_lock);
1393 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1429 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1394 if (i) { 1430 BUG_ON(!i || i->state != NID_ALLOC);
1395 BUG_ON(i->state != NID_ALLOC); 1431 __del_from_free_nid_list(i);
1396 __del_from_free_nid_list(i);
1397 }
1398 spin_unlock(&nm_i->free_nid_list_lock); 1432 spin_unlock(&nm_i->free_nid_list_lock);
1399} 1433}
1400 1434
@@ -1403,8 +1437,19 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1403 */ 1437 */
1404void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) 1438void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1405{ 1439{
1406 alloc_nid_done(sbi, nid); 1440 struct f2fs_nm_info *nm_i = NM_I(sbi);
1407 add_free_nid(NM_I(sbi), nid); 1441 struct free_nid *i;
1442
1443 spin_lock(&nm_i->free_nid_list_lock);
1444 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1445 BUG_ON(!i || i->state != NID_ALLOC);
1446 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
1447 __del_from_free_nid_list(i);
1448 } else {
1449 i->state = NID_NEW;
1450 nm_i->fcnt++;
1451 }
1452 spin_unlock(&nm_i->free_nid_list_lock);
1408} 1453}
1409 1454
1410void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, 1455void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1475,23 +1520,24 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1475 sum_entry = &sum->entries[0]; 1520 sum_entry = &sum->entries[0];
1476 1521
1477 for (i = 0; i < last_offset; i++, sum_entry++) { 1522 for (i = 0; i < last_offset; i++, sum_entry++) {
1523 /*
1524 * In order to read next node page,
1525 * we must clear PageUptodate flag.
1526 */
1527 ClearPageUptodate(page);
1528
1478 if (f2fs_readpage(sbi, page, addr, READ_SYNC)) 1529 if (f2fs_readpage(sbi, page, addr, READ_SYNC))
1479 goto out; 1530 goto out;
1480 1531
1532 lock_page(page);
1481 rn = (struct f2fs_node *)page_address(page); 1533 rn = (struct f2fs_node *)page_address(page);
1482 sum_entry->nid = rn->footer.nid; 1534 sum_entry->nid = rn->footer.nid;
1483 sum_entry->version = 0; 1535 sum_entry->version = 0;
1484 sum_entry->ofs_in_node = 0; 1536 sum_entry->ofs_in_node = 0;
1485 addr++; 1537 addr++;
1486
1487 /*
1488 * In order to read next node page,
1489 * we must clear PageUptodate flag.
1490 */
1491 ClearPageUptodate(page);
1492 } 1538 }
1493out:
1494 unlock_page(page); 1539 unlock_page(page);
1540out:
1495 __free_pages(page, 0); 1541 __free_pages(page, 0);
1496 return 0; 1542 return 0;
1497} 1543}
@@ -1614,13 +1660,11 @@ flush_now:
1614 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1660 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1615 } 1661 }
1616 1662
1617 if (nat_get_blkaddr(ne) == NULL_ADDR) { 1663 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1664 add_free_nid(NM_I(sbi), nid, false) <= 0) {
1618 write_lock(&nm_i->nat_tree_lock); 1665 write_lock(&nm_i->nat_tree_lock);
1619 __del_from_nat_cache(nm_i, ne); 1666 __del_from_nat_cache(nm_i, ne);
1620 write_unlock(&nm_i->nat_tree_lock); 1667 write_unlock(&nm_i->nat_tree_lock);
1621
1622 /* We can reuse this freed nid at this point */
1623 add_free_nid(NM_I(sbi), nid);
1624 } else { 1668 } else {
1625 write_lock(&nm_i->nat_tree_lock); 1669 write_lock(&nm_i->nat_tree_lock);
1626 __clear_nat_cache_dirty(nm_i, ne); 1670 __clear_nat_cache_dirty(nm_i, ne);
@@ -1661,19 +1705,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1661 spin_lock_init(&nm_i->free_nid_list_lock); 1705 spin_lock_init(&nm_i->free_nid_list_lock);
1662 rwlock_init(&nm_i->nat_tree_lock); 1706 rwlock_init(&nm_i->nat_tree_lock);
1663 1707
1664 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1665 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1666 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 1708 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1667 1709 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1668 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
1669 if (!nm_i->nat_bitmap)
1670 return -ENOMEM;
1671 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); 1710 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1672 if (!version_bitmap) 1711 if (!version_bitmap)
1673 return -EFAULT; 1712 return -EFAULT;
1674 1713
1675 /* copy version bitmap */ 1714 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
1676 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); 1715 GFP_KERNEL);
1716 if (!nm_i->nat_bitmap)
1717 return -ENOMEM;
1677 return 0; 1718 return 0;
1678} 1719}
1679 1720
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index afdb130f782e..0a2d72f0024d 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -29,6 +29,9 @@
29/* vector size for gang look-up from nat cache that consists of radix tree */ 29/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64 30#define NATVEC_SIZE 64
31 31
32/* return value for read_node_page */
33#define LOCKED_PAGE 1
34
32/* 35/*
33 * For node information 36 * For node information
34 */ 37 */
@@ -239,7 +242,7 @@ static inline bool IS_DNODE(struct page *node_page)
239 return false; 242 return false;
240 if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { 243 if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
241 ofs -= 6 + 2 * NIDS_PER_BLOCK; 244 ofs -= 6 + 2 * NIDS_PER_BLOCK;
242 if ((long int)ofs % (NIDS_PER_BLOCK + 1)) 245 if (!((long int)ofs % (NIDS_PER_BLOCK + 1)))
243 return false; 246 return false;
244 } 247 }
245 return true; 248 return true;
@@ -277,6 +280,21 @@ static inline int is_cold_file(struct inode *inode)
277 return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; 280 return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
278} 281}
279 282
283static inline void set_cold_file(struct inode *inode)
284{
285 F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
286}
287
288static inline int is_cp_file(struct inode *inode)
289{
290 return F2FS_I(inode)->i_advise & FADVISE_CP_BIT;
291}
292
293static inline void set_cp_file(struct inode *inode)
294{
295 F2FS_I(inode)->i_advise |= FADVISE_CP_BIT;
296}
297
280static inline int is_cold_data(struct page *page) 298static inline int is_cold_data(struct page *page)
281{ 299{
282 return PageChecked(page); 300 return PageChecked(page);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b235215ac138..60c8a5097058 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -53,7 +53,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
53 53
54 dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); 54 dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
55 if (IS_ERR(dir)) { 55 if (IS_ERR(dir)) {
56 err = -EINVAL; 56 err = PTR_ERR(dir);
57 goto out; 57 goto out;
58 } 58 }
59 59
@@ -112,11 +112,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
112 while (1) { 112 while (1) {
113 struct fsync_inode_entry *entry; 113 struct fsync_inode_entry *entry;
114 114
115 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) 115 err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
116 if (err)
116 goto out; 117 goto out;
117 118
119 lock_page(page);
120
118 if (cp_ver != cpver_of_node(page)) 121 if (cp_ver != cpver_of_node(page))
119 goto out; 122 goto unlock_out;
120 123
121 if (!is_fsync_dnode(page)) 124 if (!is_fsync_dnode(page))
122 goto next; 125 goto next;
@@ -129,24 +132,23 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
129 FI_INC_LINK); 132 FI_INC_LINK);
130 } else { 133 } else {
131 if (IS_INODE(page) && is_dent_dnode(page)) { 134 if (IS_INODE(page) && is_dent_dnode(page)) {
132 if (recover_inode_page(sbi, page)) { 135 err = recover_inode_page(sbi, page);
133 err = -ENOMEM; 136 if (err)
134 goto out; 137 goto unlock_out;
135 }
136 } 138 }
137 139
138 /* add this fsync inode to the list */ 140 /* add this fsync inode to the list */
139 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); 141 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
140 if (!entry) { 142 if (!entry) {
141 err = -ENOMEM; 143 err = -ENOMEM;
142 goto out; 144 goto unlock_out;
143 } 145 }
144 146
145 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 147 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
146 if (IS_ERR(entry->inode)) { 148 if (IS_ERR(entry->inode)) {
147 err = PTR_ERR(entry->inode); 149 err = PTR_ERR(entry->inode);
148 kmem_cache_free(fsync_entry_slab, entry); 150 kmem_cache_free(fsync_entry_slab, entry);
149 goto out; 151 goto unlock_out;
150 } 152 }
151 153
152 list_add_tail(&entry->list, head); 154 list_add_tail(&entry->list, head);
@@ -154,16 +156,20 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
154 } 156 }
155 if (IS_INODE(page)) { 157 if (IS_INODE(page)) {
156 err = recover_inode(entry->inode, page); 158 err = recover_inode(entry->inode, page);
157 if (err) 159 if (err == -ENOENT) {
158 goto out; 160 goto next;
161 } else if (err) {
162 err = -EINVAL;
163 goto unlock_out;
164 }
159 } 165 }
160next: 166next:
161 /* check next segment */ 167 /* check next segment */
162 blkaddr = next_blkaddr_of_node(page); 168 blkaddr = next_blkaddr_of_node(page);
163 ClearPageUptodate(page);
164 } 169 }
165out: 170unlock_out:
166 unlock_page(page); 171 unlock_page(page);
172out:
167 __free_pages(page, 0); 173 __free_pages(page, 0);
168 return err; 174 return err;
169} 175}
@@ -232,13 +238,15 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
232 iput(inode); 238 iput(inode);
233} 239}
234 240
235static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, 241static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
236 struct page *page, block_t blkaddr) 242 struct page *page, block_t blkaddr)
237{ 243{
238 unsigned int start, end; 244 unsigned int start, end;
239 struct dnode_of_data dn; 245 struct dnode_of_data dn;
240 struct f2fs_summary sum; 246 struct f2fs_summary sum;
241 struct node_info ni; 247 struct node_info ni;
248 int err = 0;
249 int ilock;
242 250
243 start = start_bidx_of_node(ofs_of_node(page)); 251 start = start_bidx_of_node(ofs_of_node(page));
244 if (IS_INODE(page)) 252 if (IS_INODE(page))
@@ -246,9 +254,14 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
246 else 254 else
247 end = start + ADDRS_PER_BLOCK; 255 end = start + ADDRS_PER_BLOCK;
248 256
257 ilock = mutex_lock_op(sbi);
249 set_new_dnode(&dn, inode, NULL, NULL, 0); 258 set_new_dnode(&dn, inode, NULL, NULL, 0);
250 if (get_dnode_of_data(&dn, start, 0)) 259
251 return; 260 err = get_dnode_of_data(&dn, start, ALLOC_NODE);
261 if (err) {
262 mutex_unlock_op(sbi, ilock);
263 return err;
264 }
252 265
253 wait_on_page_writeback(dn.node_page); 266 wait_on_page_writeback(dn.node_page);
254 267
@@ -293,14 +306,17 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
293 306
294 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); 307 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
295 f2fs_put_dnode(&dn); 308 f2fs_put_dnode(&dn);
309 mutex_unlock_op(sbi, ilock);
310 return 0;
296} 311}
297 312
298static void recover_data(struct f2fs_sb_info *sbi, 313static int recover_data(struct f2fs_sb_info *sbi,
299 struct list_head *head, int type) 314 struct list_head *head, int type)
300{ 315{
301 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); 316 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
302 struct curseg_info *curseg; 317 struct curseg_info *curseg;
303 struct page *page; 318 struct page *page;
319 int err = 0;
304 block_t blkaddr; 320 block_t blkaddr;
305 321
306 /* get node pages in the current segment */ 322 /* get node pages in the current segment */
@@ -310,23 +326,29 @@ static void recover_data(struct f2fs_sb_info *sbi,
310 /* read node page */ 326 /* read node page */
311 page = alloc_page(GFP_NOFS | __GFP_ZERO); 327 page = alloc_page(GFP_NOFS | __GFP_ZERO);
312 if (IS_ERR(page)) 328 if (IS_ERR(page))
313 return; 329 return -ENOMEM;
330
314 lock_page(page); 331 lock_page(page);
315 332
316 while (1) { 333 while (1) {
317 struct fsync_inode_entry *entry; 334 struct fsync_inode_entry *entry;
318 335
319 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC)) 336 err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
337 if (err)
320 goto out; 338 goto out;
321 339
340 lock_page(page);
341
322 if (cp_ver != cpver_of_node(page)) 342 if (cp_ver != cpver_of_node(page))
323 goto out; 343 goto unlock_out;
324 344
325 entry = get_fsync_inode(head, ino_of_node(page)); 345 entry = get_fsync_inode(head, ino_of_node(page));
326 if (!entry) 346 if (!entry)
327 goto next; 347 goto next;
328 348
329 do_recover_data(sbi, entry->inode, page, blkaddr); 349 err = do_recover_data(sbi, entry->inode, page, blkaddr);
350 if (err)
351 goto out;
330 352
331 if (entry->blkaddr == blkaddr) { 353 if (entry->blkaddr == blkaddr) {
332 iput(entry->inode); 354 iput(entry->inode);
@@ -336,28 +358,32 @@ static void recover_data(struct f2fs_sb_info *sbi,
336next: 358next:
337 /* check next segment */ 359 /* check next segment */
338 blkaddr = next_blkaddr_of_node(page); 360 blkaddr = next_blkaddr_of_node(page);
339 ClearPageUptodate(page);
340 } 361 }
341out: 362unlock_out:
342 unlock_page(page); 363 unlock_page(page);
364out:
343 __free_pages(page, 0); 365 __free_pages(page, 0);
344 366
345 allocate_new_segments(sbi); 367 if (!err)
368 allocate_new_segments(sbi);
369 return err;
346} 370}
347 371
348void recover_fsync_data(struct f2fs_sb_info *sbi) 372int recover_fsync_data(struct f2fs_sb_info *sbi)
349{ 373{
350 struct list_head inode_list; 374 struct list_head inode_list;
375 int err;
351 376
352 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 377 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
353 sizeof(struct fsync_inode_entry), NULL); 378 sizeof(struct fsync_inode_entry), NULL);
354 if (unlikely(!fsync_entry_slab)) 379 if (unlikely(!fsync_entry_slab))
355 return; 380 return -ENOMEM;
356 381
357 INIT_LIST_HEAD(&inode_list); 382 INIT_LIST_HEAD(&inode_list);
358 383
359 /* step #1: find fsynced inode numbers */ 384 /* step #1: find fsynced inode numbers */
360 if (find_fsync_dnodes(sbi, &inode_list)) 385 err = find_fsync_dnodes(sbi, &inode_list);
386 if (err)
361 goto out; 387 goto out;
362 388
363 if (list_empty(&inode_list)) 389 if (list_empty(&inode_list))
@@ -365,11 +391,12 @@ void recover_fsync_data(struct f2fs_sb_info *sbi)
365 391
366 /* step #2: recover data */ 392 /* step #2: recover data */
367 sbi->por_doing = 1; 393 sbi->por_doing = 1;
368 recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 394 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
369 sbi->por_doing = 0; 395 sbi->por_doing = 0;
370 BUG_ON(!list_empty(&inode_list)); 396 BUG_ON(!list_empty(&inode_list));
371out: 397out:
372 destroy_fsync_dnodes(sbi, &inode_list); 398 destroy_fsync_dnodes(sbi, &inode_list);
373 kmem_cache_destroy(fsync_entry_slab); 399 kmem_cache_destroy(fsync_entry_slab);
374 write_checkpoint(sbi, false); 400 write_checkpoint(sbi, false);
401 return err;
375} 402}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 777f17e496e6..d8e84e49a5c3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -18,6 +18,7 @@
18#include "f2fs.h" 18#include "f2fs.h"
19#include "segment.h" 19#include "segment.h"
20#include "node.h" 20#include "node.h"
21#include <trace/events/f2fs.h>
21 22
22/* 23/*
23 * This function balances dirty node and dentry pages. 24 * This function balances dirty node and dentry pages.
@@ -49,9 +50,20 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
49 50
50 if (dirty_type == DIRTY) { 51 if (dirty_type == DIRTY) {
51 struct seg_entry *sentry = get_seg_entry(sbi, segno); 52 struct seg_entry *sentry = get_seg_entry(sbi, segno);
53 enum dirty_type t = DIRTY_HOT_DATA;
54
52 dirty_type = sentry->type; 55 dirty_type = sentry->type;
56
53 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) 57 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
54 dirty_i->nr_dirty[dirty_type]++; 58 dirty_i->nr_dirty[dirty_type]++;
59
60 /* Only one bitmap should be set */
61 for (; t <= DIRTY_COLD_NODE; t++) {
62 if (t == dirty_type)
63 continue;
64 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
65 dirty_i->nr_dirty[t]--;
66 }
55 } 67 }
56} 68}
57 69
@@ -64,13 +76,16 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
64 dirty_i->nr_dirty[dirty_type]--; 76 dirty_i->nr_dirty[dirty_type]--;
65 77
66 if (dirty_type == DIRTY) { 78 if (dirty_type == DIRTY) {
67 struct seg_entry *sentry = get_seg_entry(sbi, segno); 79 enum dirty_type t = DIRTY_HOT_DATA;
68 dirty_type = sentry->type; 80
69 if (test_and_clear_bit(segno, 81 /* clear all the bitmaps */
70 dirty_i->dirty_segmap[dirty_type])) 82 for (; t <= DIRTY_COLD_NODE; t++)
71 dirty_i->nr_dirty[dirty_type]--; 83 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
72 clear_bit(segno, dirty_i->victim_segmap[FG_GC]); 84 dirty_i->nr_dirty[t]--;
73 clear_bit(segno, dirty_i->victim_segmap[BG_GC]); 85
86 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
87 clear_bit(GET_SECNO(sbi, segno),
88 dirty_i->victim_secmap);
74 } 89 }
75} 90}
76 91
@@ -296,13 +311,12 @@ static void write_sum_page(struct f2fs_sb_info *sbi,
296 f2fs_put_page(page, 1); 311 f2fs_put_page(page, 1);
297} 312}
298 313
299static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, 314static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type)
300 int ofs_unit, int type)
301{ 315{
302 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 316 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
303 unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; 317 unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
304 unsigned int segno, next_segno, i; 318 unsigned int segno;
305 int ofs = 0; 319 unsigned int ofs = 0;
306 320
307 /* 321 /*
308 * If there is not enough reserved sections, 322 * If there is not enough reserved sections,
@@ -318,28 +332,46 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
318 if (IS_NODESEG(type)) 332 if (IS_NODESEG(type))
319 return NULL_SEGNO; 333 return NULL_SEGNO;
320next: 334next:
321 segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++); 335 segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs);
322 ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit; 336 ofs += sbi->segs_per_sec;
337
323 if (segno < TOTAL_SEGS(sbi)) { 338 if (segno < TOTAL_SEGS(sbi)) {
339 int i;
340
324 /* skip intermediate segments in a section */ 341 /* skip intermediate segments in a section */
325 if (segno % ofs_unit) 342 if (segno % sbi->segs_per_sec)
326 goto next; 343 goto next;
327 344
328 /* skip if whole section is not prefree */ 345 /* skip if the section is currently used */
329 next_segno = find_next_zero_bit(prefree_segmap, 346 if (sec_usage_check(sbi, GET_SECNO(sbi, segno)))
330 TOTAL_SEGS(sbi), segno + 1);
331 if (next_segno - segno < ofs_unit)
332 goto next; 347 goto next;
333 348
349 /* skip if whole section is not prefree */
350 for (i = 1; i < sbi->segs_per_sec; i++)
351 if (!test_bit(segno + i, prefree_segmap))
352 goto next;
353
334 /* skip if whole section was not free at the last checkpoint */ 354 /* skip if whole section was not free at the last checkpoint */
335 for (i = 0; i < ofs_unit; i++) 355 for (i = 0; i < sbi->segs_per_sec; i++)
336 if (get_seg_entry(sbi, segno)->ckpt_valid_blocks) 356 if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks)
337 goto next; 357 goto next;
358
338 return segno; 359 return segno;
339 } 360 }
340 return NULL_SEGNO; 361 return NULL_SEGNO;
341} 362}
342 363
364static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
365{
366 struct curseg_info *curseg = CURSEG_I(sbi, type);
367 unsigned int segno = curseg->segno;
368 struct free_segmap_info *free_i = FREE_I(sbi);
369
370 if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec)
371 return !test_bit(segno + 1, free_i->free_segmap);
372 return 0;
373}
374
343/* 375/*
344 * Find a new segment from the free segments bitmap to right order 376 * Find a new segment from the free segments bitmap to right order
345 * This function should be returned with success, otherwise BUG 377 * This function should be returned with success, otherwise BUG
@@ -348,9 +380,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
348 unsigned int *newseg, bool new_sec, int dir) 380 unsigned int *newseg, bool new_sec, int dir)
349{ 381{
350 struct free_segmap_info *free_i = FREE_I(sbi); 382 struct free_segmap_info *free_i = FREE_I(sbi);
351 unsigned int total_secs = sbi->total_sections;
352 unsigned int segno, secno, zoneno; 383 unsigned int segno, secno, zoneno;
353 unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone; 384 unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone;
354 unsigned int hint = *newseg / sbi->segs_per_sec; 385 unsigned int hint = *newseg / sbi->segs_per_sec;
355 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); 386 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
356 unsigned int left_start = hint; 387 unsigned int left_start = hint;
@@ -363,16 +394,17 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
363 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 394 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
364 segno = find_next_zero_bit(free_i->free_segmap, 395 segno = find_next_zero_bit(free_i->free_segmap,
365 TOTAL_SEGS(sbi), *newseg + 1); 396 TOTAL_SEGS(sbi), *newseg + 1);
366 if (segno < TOTAL_SEGS(sbi)) 397 if (segno - *newseg < sbi->segs_per_sec -
398 (*newseg % sbi->segs_per_sec))
367 goto got_it; 399 goto got_it;
368 } 400 }
369find_other_zone: 401find_other_zone:
370 secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint); 402 secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint);
371 if (secno >= total_secs) { 403 if (secno >= TOTAL_SECS(sbi)) {
372 if (dir == ALLOC_RIGHT) { 404 if (dir == ALLOC_RIGHT) {
373 secno = find_next_zero_bit(free_i->free_secmap, 405 secno = find_next_zero_bit(free_i->free_secmap,
374 total_secs, 0); 406 TOTAL_SECS(sbi), 0);
375 BUG_ON(secno >= total_secs); 407 BUG_ON(secno >= TOTAL_SECS(sbi));
376 } else { 408 } else {
377 go_left = 1; 409 go_left = 1;
378 left_start = hint - 1; 410 left_start = hint - 1;
@@ -387,8 +419,8 @@ find_other_zone:
387 continue; 419 continue;
388 } 420 }
389 left_start = find_next_zero_bit(free_i->free_secmap, 421 left_start = find_next_zero_bit(free_i->free_secmap,
390 total_secs, 0); 422 TOTAL_SECS(sbi), 0);
391 BUG_ON(left_start >= total_secs); 423 BUG_ON(left_start >= TOTAL_SECS(sbi));
392 break; 424 break;
393 } 425 }
394 secno = left_start; 426 secno = left_start;
@@ -561,20 +593,20 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
561 int type, bool force) 593 int type, bool force)
562{ 594{
563 struct curseg_info *curseg = CURSEG_I(sbi, type); 595 struct curseg_info *curseg = CURSEG_I(sbi, type);
564 unsigned int ofs_unit;
565 596
566 if (force) { 597 if (force) {
567 new_curseg(sbi, type, true); 598 new_curseg(sbi, type, true);
568 goto out; 599 goto out;
569 } 600 }
570 601
571 ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec; 602 curseg->next_segno = check_prefree_segments(sbi, type);
572 curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
573 603
574 if (curseg->next_segno != NULL_SEGNO) 604 if (curseg->next_segno != NULL_SEGNO)
575 change_curseg(sbi, type, false); 605 change_curseg(sbi, type, false);
576 else if (type == CURSEG_WARM_NODE) 606 else if (type == CURSEG_WARM_NODE)
577 new_curseg(sbi, type, false); 607 new_curseg(sbi, type, false);
608 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
609 new_curseg(sbi, type, false);
578 else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) 610 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
579 change_curseg(sbi, type, true); 611 change_curseg(sbi, type, true);
580 else 612 else
@@ -656,10 +688,16 @@ static void do_submit_bio(struct f2fs_sb_info *sbi,
656 if (type >= META_FLUSH) 688 if (type >= META_FLUSH)
657 rw = WRITE_FLUSH_FUA; 689 rw = WRITE_FLUSH_FUA;
658 690
691 if (btype == META)
692 rw |= REQ_META;
693
659 if (sbi->bio[btype]) { 694 if (sbi->bio[btype]) {
660 struct bio_private *p = sbi->bio[btype]->bi_private; 695 struct bio_private *p = sbi->bio[btype]->bi_private;
661 p->sbi = sbi; 696 p->sbi = sbi;
662 sbi->bio[btype]->bi_end_io = f2fs_end_io_write; 697 sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
698
699 trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]);
700
663 if (type == META_FLUSH) { 701 if (type == META_FLUSH) {
664 DECLARE_COMPLETION_ONSTACK(wait); 702 DECLARE_COMPLETION_ONSTACK(wait);
665 p->is_sync = true; 703 p->is_sync = true;
@@ -696,7 +734,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
696 do_submit_bio(sbi, type, false); 734 do_submit_bio(sbi, type, false);
697alloc_new: 735alloc_new:
698 if (sbi->bio[type] == NULL) { 736 if (sbi->bio[type] == NULL) {
699 sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev)); 737 sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi));
700 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 738 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
701 /* 739 /*
702 * The end_io will be assigned at the sumbission phase. 740 * The end_io will be assigned at the sumbission phase.
@@ -714,6 +752,7 @@ alloc_new:
714 sbi->last_block_in_bio[type] = blk_addr; 752 sbi->last_block_in_bio[type] = blk_addr;
715 753
716 up_write(&sbi->bio_sem); 754 up_write(&sbi->bio_sem);
755 trace_f2fs_submit_write_page(page, blk_addr, type);
717} 756}
718 757
719static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 758static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
@@ -1390,7 +1429,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1390 } 1429 }
1391 1430
1392 if (sbi->segs_per_sec > 1) { 1431 if (sbi->segs_per_sec > 1) {
1393 sit_i->sec_entries = vzalloc(sbi->total_sections * 1432 sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) *
1394 sizeof(struct sec_entry)); 1433 sizeof(struct sec_entry));
1395 if (!sit_i->sec_entries) 1434 if (!sit_i->sec_entries)
1396 return -ENOMEM; 1435 return -ENOMEM;
@@ -1403,10 +1442,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1403 bitmap_size = __bitmap_size(sbi, SIT_BITMAP); 1442 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1404 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); 1443 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1405 1444
1406 dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL); 1445 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1407 if (!dst_bitmap) 1446 if (!dst_bitmap)
1408 return -ENOMEM; 1447 return -ENOMEM;
1409 memcpy(dst_bitmap, src_bitmap, bitmap_size);
1410 1448
1411 /* init SIT information */ 1449 /* init SIT information */
1412 sit_i->s_ops = &default_salloc_ops; 1450 sit_i->s_ops = &default_salloc_ops;
@@ -1442,7 +1480,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1442 if (!free_i->free_segmap) 1480 if (!free_i->free_segmap)
1443 return -ENOMEM; 1481 return -ENOMEM;
1444 1482
1445 sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections); 1483 sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi));
1446 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); 1484 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1447 if (!free_i->free_secmap) 1485 if (!free_i->free_secmap)
1448 return -ENOMEM; 1486 return -ENOMEM;
@@ -1559,14 +1597,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1559 } 1597 }
1560} 1598}
1561 1599
1562static int init_victim_segmap(struct f2fs_sb_info *sbi) 1600static int init_victim_secmap(struct f2fs_sb_info *sbi)
1563{ 1601{
1564 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1602 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1565 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); 1603 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi));
1566 1604
1567 dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL); 1605 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
1568 dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL); 1606 if (!dirty_i->victim_secmap)
1569 if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1570 return -ENOMEM; 1607 return -ENOMEM;
1571 return 0; 1608 return 0;
1572} 1609}
@@ -1593,7 +1630,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1593 } 1630 }
1594 1631
1595 init_dirty_segmap(sbi); 1632 init_dirty_segmap(sbi);
1596 return init_victim_segmap(sbi); 1633 return init_victim_secmap(sbi);
1597} 1634}
1598 1635
1599/* 1636/*
@@ -1680,18 +1717,10 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1680 mutex_unlock(&dirty_i->seglist_lock); 1717 mutex_unlock(&dirty_i->seglist_lock);
1681} 1718}
1682 1719
1683void reset_victim_segmap(struct f2fs_sb_info *sbi) 1720static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
1684{
1685 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1686 memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1687}
1688
1689static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1690{ 1721{
1691 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 1722 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1692 1723 kfree(dirty_i->victim_secmap);
1693 kfree(dirty_i->victim_segmap[FG_GC]);
1694 kfree(dirty_i->victim_segmap[BG_GC]);
1695} 1724}
1696 1725
1697static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) 1726static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
@@ -1706,7 +1735,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1706 for (i = 0; i < NR_DIRTY_TYPE; i++) 1735 for (i = 0; i < NR_DIRTY_TYPE; i++)
1707 discard_dirty_segmap(sbi, i); 1736 discard_dirty_segmap(sbi, i);
1708 1737
1709 destroy_victim_segmap(sbi); 1738 destroy_victim_secmap(sbi);
1710 SM_I(sbi)->dirty_info = NULL; 1739 SM_I(sbi)->dirty_info = NULL;
1711 kfree(dirty_i); 1740 kfree(dirty_i);
1712} 1741}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 552dadbb2327..062424a0e4c3 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -8,10 +8,13 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11#include <linux/blkdev.h>
12
11/* constant macro */ 13/* constant macro */
12#define NULL_SEGNO ((unsigned int)(~0)) 14#define NULL_SEGNO ((unsigned int)(~0))
15#define NULL_SECNO ((unsigned int)(~0))
13 16
14/* V: Logical segment # in volume, R: Relative segment # in main area */ 17/* L: Logical segment # in volume, R: Relative segment # in main area */
15#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 18#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
16#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 19#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
17 20
@@ -23,13 +26,13 @@
23 ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ 26 ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
24 (t == CURSEG_WARM_NODE)) 27 (t == CURSEG_WARM_NODE))
25 28
26#define IS_CURSEG(sbi, segno) \ 29#define IS_CURSEG(sbi, seg) \
27 ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ 30 ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
28 (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ 31 (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
29 (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ 32 (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
30 (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ 33 (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
31 (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ 34 (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
32 (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) 35 (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
33 36
34#define IS_CURSEC(sbi, secno) \ 37#define IS_CURSEC(sbi, secno) \
35 ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ 38 ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -81,9 +84,12 @@
81#define f2fs_bitmap_size(nr) \ 84#define f2fs_bitmap_size(nr) \
82 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 85 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
83#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) 86#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
87#define TOTAL_SECS(sbi) (sbi->total_sections)
84 88
85#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 89#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
86 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 90 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
91#define SECTOR_TO_BLOCK(sbi, sectors) \
92 (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
87 93
88/* during checkpoint, bio_private is used to synchronize the last bio */ 94/* during checkpoint, bio_private is used to synchronize the last bio */
89struct bio_private { 95struct bio_private {
@@ -213,7 +219,7 @@ struct dirty_seglist_info {
213 unsigned long *dirty_segmap[NR_DIRTY_TYPE]; 219 unsigned long *dirty_segmap[NR_DIRTY_TYPE];
214 struct mutex seglist_lock; /* lock for segment bitmaps */ 220 struct mutex seglist_lock; /* lock for segment bitmaps */
215 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ 221 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
216 unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ 222 unsigned long *victim_secmap; /* background GC victims */
217}; 223};
218 224
219/* victim selection function for cleaning and SSR */ 225/* victim selection function for cleaning and SSR */
@@ -464,8 +470,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
464 470
465static inline int utilization(struct f2fs_sb_info *sbi) 471static inline int utilization(struct f2fs_sb_info *sbi)
466{ 472{
467 return (long int)valid_user_blocks(sbi) * 100 / 473 return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count);
468 (long int)sbi->user_block_count;
469} 474}
470 475
471/* 476/*
@@ -616,3 +621,17 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
616 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) 621 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
617 - (base + 1) + type; 622 - (base + 1) + type;
618} 623}
624
625static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
626{
627 if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
628 return true;
629 return false;
630}
631
632static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
633{
634 struct block_device *bdev = sbi->sb->s_bdev;
635 struct request_queue *q = bdev_get_queue(bdev);
636 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
637}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 62e017743af6..8555f7df82c7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -12,7 +12,6 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/statfs.h> 14#include <linux/statfs.h>
15#include <linux/proc_fs.h>
16#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
18#include <linux/kthread.h> 17#include <linux/kthread.h>
@@ -21,12 +20,17 @@
21#include <linux/seq_file.h> 20#include <linux/seq_file.h>
22#include <linux/random.h> 21#include <linux/random.h>
23#include <linux/exportfs.h> 22#include <linux/exportfs.h>
23#include <linux/blkdev.h>
24#include <linux/f2fs_fs.h> 24#include <linux/f2fs_fs.h>
25 25
26#include "f2fs.h" 26#include "f2fs.h"
27#include "node.h" 27#include "node.h"
28#include "segment.h"
28#include "xattr.h" 29#include "xattr.h"
29 30
31#define CREATE_TRACE_POINTS
32#include <trace/events/f2fs.h>
33
30static struct kmem_cache *f2fs_inode_cachep; 34static struct kmem_cache *f2fs_inode_cachep;
31 35
32enum { 36enum {
@@ -94,6 +98,20 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
94 return &fi->vfs_inode; 98 return &fi->vfs_inode;
95} 99}
96 100
101static int f2fs_drop_inode(struct inode *inode)
102{
103 /*
104 * This is to avoid a deadlock condition like below.
105 * writeback_single_inode(inode)
106 * - f2fs_write_data_page
107 * - f2fs_gc -> iput -> evict
108 * - inode_wait_for_writeback(inode)
109 */
110 if (!inode_unhashed(inode) && inode->i_state & I_SYNC)
111 return 0;
112 return generic_drop_inode(inode);
113}
114
97static void f2fs_i_callback(struct rcu_head *head) 115static void f2fs_i_callback(struct rcu_head *head)
98{ 116{
99 struct inode *inode = container_of(head, struct inode, i_rcu); 117 struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -132,13 +150,18 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
132{ 150{
133 struct f2fs_sb_info *sbi = F2FS_SB(sb); 151 struct f2fs_sb_info *sbi = F2FS_SB(sb);
134 152
153 trace_f2fs_sync_fs(sb, sync);
154
135 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) 155 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
136 return 0; 156 return 0;
137 157
138 if (sync) 158 if (sync) {
159 mutex_lock(&sbi->gc_mutex);
139 write_checkpoint(sbi, false); 160 write_checkpoint(sbi, false);
140 else 161 mutex_unlock(&sbi->gc_mutex);
162 } else {
141 f2fs_balance_fs(sbi); 163 f2fs_balance_fs(sbi);
164 }
142 165
143 return 0; 166 return 0;
144} 167}
@@ -180,7 +203,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
180 buf->f_files = sbi->total_node_count; 203 buf->f_files = sbi->total_node_count;
181 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); 204 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
182 205
183 buf->f_namelen = F2FS_MAX_NAME_LEN; 206 buf->f_namelen = F2FS_NAME_LEN;
184 buf->f_fsid.val[0] = (u32)id; 207 buf->f_fsid.val[0] = (u32)id;
185 buf->f_fsid.val[1] = (u32)(id >> 32); 208 buf->f_fsid.val[1] = (u32)(id >> 32);
186 209
@@ -223,6 +246,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
223 246
224static struct super_operations f2fs_sops = { 247static struct super_operations f2fs_sops = {
225 .alloc_inode = f2fs_alloc_inode, 248 .alloc_inode = f2fs_alloc_inode,
249 .drop_inode = f2fs_drop_inode,
226 .destroy_inode = f2fs_destroy_inode, 250 .destroy_inode = f2fs_destroy_inode,
227 .write_inode = f2fs_write_inode, 251 .write_inode = f2fs_write_inode,
228 .show_options = f2fs_show_options, 252 .show_options = f2fs_show_options,
@@ -457,6 +481,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
457 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); 481 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
458 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); 482 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
459 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); 483 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
484 sbi->cur_victim_sec = NULL_SECNO;
460 485
461 for (i = 0; i < NR_COUNT_TYPE; i++) 486 for (i = 0; i < NR_COUNT_TYPE; i++)
462 atomic_set(&sbi->nr_pages[i], 0); 487 atomic_set(&sbi->nr_pages[i], 0);
@@ -473,7 +498,7 @@ static int validate_superblock(struct super_block *sb,
473 if (!*raw_super_buf) { 498 if (!*raw_super_buf) {
474 f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", 499 f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
475 super); 500 super);
476 return 1; 501 return -EIO;
477 } 502 }
478 503
479 *raw_super = (struct f2fs_super_block *) 504 *raw_super = (struct f2fs_super_block *)
@@ -485,7 +510,7 @@ static int validate_superblock(struct super_block *sb,
485 510
486 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " 511 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
487 "in %s superblock", super); 512 "in %s superblock", super);
488 return 1; 513 return -EINVAL;
489} 514}
490 515
491static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 516static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -508,9 +533,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
508 goto free_sbi; 533 goto free_sbi;
509 } 534 }
510 535
511 if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) { 536 err = validate_superblock(sb, &raw_super, &raw_super_buf, 0);
537 if (err) {
512 brelse(raw_super_buf); 538 brelse(raw_super_buf);
513 if (validate_superblock(sb, &raw_super, &raw_super_buf, 1)) 539 /* check secondary superblock when primary failed */
540 err = validate_superblock(sb, &raw_super, &raw_super_buf, 1);
541 if (err)
514 goto free_sb_buf; 542 goto free_sb_buf;
515 } 543 }
516 /* init some FS parameters */ 544 /* init some FS parameters */
@@ -525,7 +553,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
525 set_opt(sbi, POSIX_ACL); 553 set_opt(sbi, POSIX_ACL);
526#endif 554#endif
527 /* parse mount options */ 555 /* parse mount options */
528 if (parse_options(sb, sbi, (char *)data)) 556 err = parse_options(sb, sbi, (char *)data);
557 if (err)
529 goto free_sb_buf; 558 goto free_sb_buf;
530 559
531 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 560 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
@@ -547,11 +576,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
547 sbi->raw_super = raw_super; 576 sbi->raw_super = raw_super;
548 sbi->raw_super_buf = raw_super_buf; 577 sbi->raw_super_buf = raw_super_buf;
549 mutex_init(&sbi->gc_mutex); 578 mutex_init(&sbi->gc_mutex);
550 mutex_init(&sbi->write_inode);
551 mutex_init(&sbi->writepages); 579 mutex_init(&sbi->writepages);
552 mutex_init(&sbi->cp_mutex); 580 mutex_init(&sbi->cp_mutex);
553 for (i = 0; i < NR_LOCK_TYPE; i++) 581 for (i = 0; i < NR_GLOBAL_LOCKS; i++)
554 mutex_init(&sbi->fs_lock[i]); 582 mutex_init(&sbi->fs_lock[i]);
583 mutex_init(&sbi->node_write);
555 sbi->por_doing = 0; 584 sbi->por_doing = 0;
556 spin_lock_init(&sbi->stat_lock); 585 spin_lock_init(&sbi->stat_lock);
557 init_rwsem(&sbi->bio_sem); 586 init_rwsem(&sbi->bio_sem);
@@ -638,8 +667,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
638 } 667 }
639 668
640 /* recover fsynced data */ 669 /* recover fsynced data */
641 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) 670 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
642 recover_fsync_data(sbi); 671 err = recover_fsync_data(sbi);
672 if (err)
673 f2fs_msg(sb, KERN_ERR,
674 "Cannot recover all fsync data errno=%ld", err);
675 }
643 676
644 /* After POR, we can run background GC thread */ 677 /* After POR, we can run background GC thread */
645 err = start_gc_thread(sbi); 678 err = start_gc_thread(sbi);
@@ -650,6 +683,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
650 if (err) 683 if (err)
651 goto fail; 684 goto fail;
652 685
686 if (test_opt(sbi, DISCARD)) {
687 struct request_queue *q = bdev_get_queue(sb->s_bdev);
688 if (!blk_queue_discard(q))
689 f2fs_msg(sb, KERN_WARNING,
690 "mounting with \"discard\" option, but "
691 "the device does not support discard");
692 }
693
653 return 0; 694 return 0;
654fail: 695fail:
655 stop_gc_thread(sbi); 696 stop_gc_thread(sbi);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8038c0496504..0b02dce31356 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -307,27 +307,30 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
307 int error, found, free, newsize; 307 int error, found, free, newsize;
308 size_t name_len; 308 size_t name_len;
309 char *pval; 309 char *pval;
310 int ilock;
310 311
311 if (name == NULL) 312 if (name == NULL)
312 return -EINVAL; 313 return -EINVAL;
313 name_len = strlen(name);
314 314
315 if (value == NULL) 315 if (value == NULL)
316 value_len = 0; 316 value_len = 0;
317 317
318 if (name_len > 255 || value_len > MAX_VALUE_LEN) 318 name_len = strlen(name);
319
320 if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN)
319 return -ERANGE; 321 return -ERANGE;
320 322
321 f2fs_balance_fs(sbi); 323 f2fs_balance_fs(sbi);
322 324
323 mutex_lock_op(sbi, NODE_NEW); 325 ilock = mutex_lock_op(sbi);
326
324 if (!fi->i_xattr_nid) { 327 if (!fi->i_xattr_nid) {
325 /* Allocate new attribute block */ 328 /* Allocate new attribute block */
326 struct dnode_of_data dn; 329 struct dnode_of_data dn;
327 330
328 if (!alloc_nid(sbi, &fi->i_xattr_nid)) { 331 if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
329 mutex_unlock_op(sbi, NODE_NEW); 332 error = -ENOSPC;
330 return -ENOSPC; 333 goto exit;
331 } 334 }
332 set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); 335 set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
333 mark_inode_dirty(inode); 336 mark_inode_dirty(inode);
@@ -336,8 +339,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
336 if (IS_ERR(page)) { 339 if (IS_ERR(page)) {
337 alloc_nid_failed(sbi, fi->i_xattr_nid); 340 alloc_nid_failed(sbi, fi->i_xattr_nid);
338 fi->i_xattr_nid = 0; 341 fi->i_xattr_nid = 0;
339 mutex_unlock_op(sbi, NODE_NEW); 342 error = PTR_ERR(page);
340 return PTR_ERR(page); 343 goto exit;
341 } 344 }
342 345
343 alloc_nid_done(sbi, fi->i_xattr_nid); 346 alloc_nid_done(sbi, fi->i_xattr_nid);
@@ -349,8 +352,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
349 /* The inode already has an extended attribute block. */ 352 /* The inode already has an extended attribute block. */
350 page = get_node_page(sbi, fi->i_xattr_nid); 353 page = get_node_page(sbi, fi->i_xattr_nid);
351 if (IS_ERR(page)) { 354 if (IS_ERR(page)) {
352 mutex_unlock_op(sbi, NODE_NEW); 355 error = PTR_ERR(page);
353 return PTR_ERR(page); 356 goto exit;
354 } 357 }
355 358
356 base_addr = page_address(page); 359 base_addr = page_address(page);
@@ -432,12 +435,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
432 inode->i_ctime = CURRENT_TIME; 435 inode->i_ctime = CURRENT_TIME;
433 clear_inode_flag(fi, FI_ACL_MODE); 436 clear_inode_flag(fi, FI_ACL_MODE);
434 } 437 }
435 f2fs_write_inode(inode, NULL); 438 update_inode_page(inode);
436 mutex_unlock_op(sbi, NODE_NEW); 439 mutex_unlock_op(sbi, ilock);
437 440
438 return 0; 441 return 0;
439cleanup: 442cleanup:
440 f2fs_put_page(page, 1); 443 f2fs_put_page(page, 1);
441 mutex_unlock_op(sbi, NODE_NEW); 444exit:
445 mutex_unlock_op(sbi, ilock);
442 return error; 446 return error;
443} 447}