aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:10:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:10:21 -0500
commita02cd4229e298aadbe8f5cf286edee8058d87116 (patch)
treebf22338b0280b9c5d638c9277e9cb8d96d4746f9
parent487e2c9f44c4b5ea23bfe87bb34679f7297a0bce (diff)
parentead710b7d82dc9e8184e10871c155a3ed8b3f673 (diff)
Merge tag 'f2fs-for-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we introduce sysfile-based quota support which is required for Android by default. In addition, we allow that users are able to reserve some blocks in runtime to mitigate performance drops in low free space. Enhancements: - assign proper data segments according to write_hints given by user - issue cache_flush on dirty devices only among multiple devices - exploit cp_error flag and add more faults to enhance fault injection test - conduct more readaheads during f2fs_readdir - add a range for discard commands Bug fixes: - fix zero stat->st_blocks when inline_data is set - drop crypto key and free stale memory pointer while evict_inode is failing - fix some corner cases in free space and segment management - fix wrong last_disk_size This series includes lots of clean-ups and code enhancement in terms of xattr operations, discard/flush command control. In addition, it adds versatile debugfs entries to monitor f2fs status" * tag 'f2fs-for-4.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (75 commits) f2fs: deny accessing encryption policy if encryption is off f2fs: inject fault in inc_valid_node_count f2fs: fix to clear FI_NO_PREALLOC f2fs: expose quota information in debugfs f2fs: separate nat entry mem alloc from nat_tree_lock f2fs: validate before set/clear free nat bitmap f2fs: avoid opened loop codes in __add_ino_entry f2fs: apply write hints to select the type of segments for buffered write f2fs: introduce scan_curseg_cache for cleanup f2fs: optimize the way of traversing free_nid_bitmap f2fs: keep scanning until enough free nids are acquired f2fs: trace checkpoint reason in fsync() f2fs: keep isize once block is reserved cross EOF f2fs: avoid race in between GC and block exchange f2fs: save a multiplication for last_nid calculation f2fs: fix summary info corruption f2fs: remove dead code in update_meta_page f2fs: remove unneeded semicolon f2fs: don't bother with inode->i_version f2fs: check curseg space before foreground GC ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs37
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/checkpoint.c64
-rw-r--r--fs/f2fs/data.c37
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h222
-rw-r--r--fs/f2fs/file.c123
-rw-r--r--fs/f2fs/gc.c37
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c26
-rw-r--r--fs/f2fs/namei.c101
-rw-r--r--fs/f2fs/node.c410
-rw-r--r--fs/f2fs/node.h16
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c509
-rw-r--r--fs/f2fs/segment.h39
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c219
-rw-r--r--fs/f2fs/sysfs.c53
-rw-r--r--fs/f2fs/xattr.c174
-rw-r--r--include/linux/f2fs_fs.h10
-rw-r--r--include/trace/events/f2fs.h116
23 files changed, 1648 insertions, 622 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 11b7f4ebea7c..a7799c2fca28 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,18 @@ Description:
51 Controls the dirty page count condition for the in-place-update 51 Controls the dirty page count condition for the in-place-update
52 policies. 52 policies.
53 53
54What: /sys/fs/f2fs/<disk>/min_hot_blocks
55Date: March 2017
56Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
57Description:
58 Controls the dirty page count condition for redefining hot data.
59
60What: /sys/fs/f2fs/<disk>/min_ssr_sections
61Date: October 2017
62Contact: "Chao Yu" <yuchao0@huawei.com>
63Description:
64 Controls the fee section threshold to trigger SSR allocation.
65
54What: /sys/fs/f2fs/<disk>/max_small_discards 66What: /sys/fs/f2fs/<disk>/max_small_discards
55Date: November 2013 67Date: November 2013
56Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 68Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -102,6 +114,12 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
102Description: 114Description:
103 Controls the idle timing. 115 Controls the idle timing.
104 116
117What: /sys/fs/f2fs/<disk>/iostat_enable
118Date: August 2017
119Contact: "Chao Yu" <yuchao0@huawei.com>
120Description:
121 Controls to enable/disable IO stat.
122
105What: /sys/fs/f2fs/<disk>/ra_nid_pages 123What: /sys/fs/f2fs/<disk>/ra_nid_pages
106Date: October 2015 124Date: October 2015
107Contact: "Chao Yu" <chao2.yu@samsung.com> 125Contact: "Chao Yu" <chao2.yu@samsung.com>
@@ -122,6 +140,12 @@ Contact: "Shuoran Liu" <liushuoran@huawei.com>
122Description: 140Description:
123 Shows total written kbytes issued to disk. 141 Shows total written kbytes issued to disk.
124 142
143What: /sys/fs/f2fs/<disk>/feature
144Date: July 2017
145Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
146Description:
147 Shows all enabled features in current device.
148
125What: /sys/fs/f2fs/<disk>/inject_rate 149What: /sys/fs/f2fs/<disk>/inject_rate
126Date: May 2016 150Date: May 2016
127Contact: "Sheng Yong" <shengyong1@huawei.com> 151Contact: "Sheng Yong" <shengyong1@huawei.com>
@@ -138,7 +162,18 @@ What: /sys/fs/f2fs/<disk>/reserved_blocks
138Date: June 2017 162Date: June 2017
139Contact: "Chao Yu" <yuchao0@huawei.com> 163Contact: "Chao Yu" <yuchao0@huawei.com>
140Description: 164Description:
141 Controls current reserved blocks in system. 165 Controls target reserved blocks in system, the threshold
166 is soft, it could exceed current available user space.
167
168What: /sys/fs/f2fs/<disk>/current_reserved_blocks
169Date: October 2017
170Contact: "Yunlong Song" <yunlong.song@huawei.com>
171Contact: "Chao Yu" <yuchao0@huawei.com>
172Description:
173 Shows current reserved blocks in system, it may be temporarily
174 smaller than target_reserved_blocks, but will gradually
175 increase to target_reserved_blocks when more free blocks are
176 freed by user later.
142 177
143What: /sys/fs/f2fs/<disk>/gc_urgent 178What: /sys/fs/f2fs/<disk>/gc_urgent
144Date: August 2017 179Date: August 2017
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 436b3a1464d9..2bb7c9fc5144 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -250,6 +250,9 @@ static int __f2fs_set_acl(struct inode *inode, int type,
250 250
251int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 251int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
252{ 252{
253 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
254 return -EIO;
255
253 return __f2fs_set_acl(inode, type, acl, NULL); 256 return __f2fs_set_acl(inode, type, acl, NULL);
254} 257}
255 258
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0bb8e2c022d3..dd2e73e10857 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab;
29void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) 29void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
30{ 30{
31 set_ckpt_flags(sbi, CP_ERROR_FLAG); 31 set_ckpt_flags(sbi, CP_ERROR_FLAG);
32 sbi->sb->s_flags |= MS_RDONLY;
33 if (!end_io) 32 if (!end_io)
34 f2fs_flush_merged_writes(sbi); 33 f2fs_flush_merged_writes(sbi);
35} 34}
@@ -398,24 +397,23 @@ const struct address_space_operations f2fs_meta_aops = {
398#endif 397#endif
399}; 398};
400 399
401static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 400static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
401 unsigned int devidx, int type)
402{ 402{
403 struct inode_management *im = &sbi->im[type]; 403 struct inode_management *im = &sbi->im[type];
404 struct ino_entry *e, *tmp; 404 struct ino_entry *e, *tmp;
405 405
406 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); 406 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
407retry: 407
408 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); 408 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
409 409
410 spin_lock(&im->ino_lock); 410 spin_lock(&im->ino_lock);
411 e = radix_tree_lookup(&im->ino_root, ino); 411 e = radix_tree_lookup(&im->ino_root, ino);
412 if (!e) { 412 if (!e) {
413 e = tmp; 413 e = tmp;
414 if (radix_tree_insert(&im->ino_root, ino, e)) { 414 if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
415 spin_unlock(&im->ino_lock); 415 f2fs_bug_on(sbi, 1);
416 radix_tree_preload_end(); 416
417 goto retry;
418 }
419 memset(e, 0, sizeof(struct ino_entry)); 417 memset(e, 0, sizeof(struct ino_entry));
420 e->ino = ino; 418 e->ino = ino;
421 419
@@ -423,6 +421,10 @@ retry:
423 if (type != ORPHAN_INO) 421 if (type != ORPHAN_INO)
424 im->ino_num++; 422 im->ino_num++;
425 } 423 }
424
425 if (type == FLUSH_INO)
426 f2fs_set_bit(devidx, (char *)&e->dirty_device);
427
426 spin_unlock(&im->ino_lock); 428 spin_unlock(&im->ino_lock);
427 radix_tree_preload_end(); 429 radix_tree_preload_end();
428 430
@@ -451,7 +453,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
451void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 453void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
452{ 454{
453 /* add new dirty ino entry into list */ 455 /* add new dirty ino entry into list */
454 __add_ino_entry(sbi, ino, type); 456 __add_ino_entry(sbi, ino, 0, type);
455} 457}
456 458
457void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 459void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -477,7 +479,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
477 struct ino_entry *e, *tmp; 479 struct ino_entry *e, *tmp;
478 int i; 480 int i;
479 481
480 for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { 482 for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
481 struct inode_management *im = &sbi->im[i]; 483 struct inode_management *im = &sbi->im[i];
482 484
483 spin_lock(&im->ino_lock); 485 spin_lock(&im->ino_lock);
@@ -491,6 +493,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
491 } 493 }
492} 494}
493 495
496void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
497 unsigned int devidx, int type)
498{
499 __add_ino_entry(sbi, ino, devidx, type);
500}
501
502bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
503 unsigned int devidx, int type)
504{
505 struct inode_management *im = &sbi->im[type];
506 struct ino_entry *e;
507 bool is_dirty = false;
508
509 spin_lock(&im->ino_lock);
510 e = radix_tree_lookup(&im->ino_root, ino);
511 if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
512 is_dirty = true;
513 spin_unlock(&im->ino_lock);
514 return is_dirty;
515}
516
494int acquire_orphan_inode(struct f2fs_sb_info *sbi) 517int acquire_orphan_inode(struct f2fs_sb_info *sbi)
495{ 518{
496 struct inode_management *im = &sbi->im[ORPHAN_INO]; 519 struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -527,7 +550,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
527void add_orphan_inode(struct inode *inode) 550void add_orphan_inode(struct inode *inode)
528{ 551{
529 /* add new orphan ino entry into list */ 552 /* add new orphan ino entry into list */
530 __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); 553 __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
531 update_inode_page(inode); 554 update_inode_page(inode);
532} 555}
533 556
@@ -551,7 +574,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
551 return err; 574 return err;
552 } 575 }
553 576
554 __add_ino_entry(sbi, ino, ORPHAN_INO); 577 __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
555 578
556 inode = f2fs_iget_retry(sbi->sb, ino); 579 inode = f2fs_iget_retry(sbi->sb, ino);
557 if (IS_ERR(inode)) { 580 if (IS_ERR(inode)) {
@@ -587,6 +610,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
587 block_t start_blk, orphan_blocks, i, j; 610 block_t start_blk, orphan_blocks, i, j;
588 unsigned int s_flags = sbi->sb->s_flags; 611 unsigned int s_flags = sbi->sb->s_flags;
589 int err = 0; 612 int err = 0;
613#ifdef CONFIG_QUOTA
614 int quota_enabled;
615#endif
590 616
591 if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) 617 if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
592 return 0; 618 return 0;
@@ -599,8 +625,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
599#ifdef CONFIG_QUOTA 625#ifdef CONFIG_QUOTA
600 /* Needed for iput() to work correctly and not trash data */ 626 /* Needed for iput() to work correctly and not trash data */
601 sbi->sb->s_flags |= MS_ACTIVE; 627 sbi->sb->s_flags |= MS_ACTIVE;
628
602 /* Turn on quotas so that they are updated correctly */ 629 /* Turn on quotas so that they are updated correctly */
603 f2fs_enable_quota_files(sbi); 630 quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
604#endif 631#endif
605 632
606 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 633 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -628,7 +655,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
628out: 655out:
629#ifdef CONFIG_QUOTA 656#ifdef CONFIG_QUOTA
630 /* Turn quotas off */ 657 /* Turn quotas off */
631 f2fs_quota_off_umount(sbi->sb); 658 if (quota_enabled)
659 f2fs_quota_off_umount(sbi->sb);
632#endif 660#endif
633 sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 661 sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
634 662
@@ -983,7 +1011,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
983 update_inode_page(inode); 1011 update_inode_page(inode);
984 iput(inode); 1012 iput(inode);
985 } 1013 }
986 }; 1014 }
987 return 0; 1015 return 0;
988} 1016}
989 1017
@@ -1143,6 +1171,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1143 struct super_block *sb = sbi->sb; 1171 struct super_block *sb = sbi->sb;
1144 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1172 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1145 u64 kbytes_written; 1173 u64 kbytes_written;
1174 int err;
1146 1175
1147 /* Flush all the NAT/SIT pages */ 1176 /* Flush all the NAT/SIT pages */
1148 while (get_pages(sbi, F2FS_DIRTY_META)) { 1177 while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1236,6 +1265,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1236 if (unlikely(f2fs_cp_error(sbi))) 1265 if (unlikely(f2fs_cp_error(sbi)))
1237 return -EIO; 1266 return -EIO;
1238 1267
1268 /* flush all device cache */
1269 err = f2fs_flush_device_cache(sbi);
1270 if (err)
1271 return err;
1272
1239 /* write out checkpoint buffer at block 0 */ 1273 /* write out checkpoint buffer at block 0 */
1240 update_meta_page(sbi, ckpt, start_blk++); 1274 update_meta_page(sbi, ckpt, start_blk++);
1241 1275
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7b3ad5d8e2e9..516fa0d3ff9c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -173,7 +173,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
173{ 173{
174 struct bio *bio; 174 struct bio *bio;
175 175
176 bio = f2fs_bio_alloc(npages); 176 bio = f2fs_bio_alloc(sbi, npages, true);
177 177
178 f2fs_target_device(sbi, blk_addr, bio); 178 f2fs_target_device(sbi, blk_addr, bio);
179 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 179 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
@@ -418,8 +418,8 @@ next:
418 418
419 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; 419 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
420 420
421 /* set submitted = 1 as a return value */ 421 /* set submitted = true as a return value */
422 fio->submitted = 1; 422 fio->submitted = true;
423 423
424 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 424 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
425 425
@@ -473,7 +473,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
473 f2fs_wait_on_block_writeback(sbi, blkaddr); 473 f2fs_wait_on_block_writeback(sbi, blkaddr);
474 } 474 }
475 475
476 bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); 476 bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
477 if (!bio) { 477 if (!bio) {
478 if (ctx) 478 if (ctx)
479 fscrypt_release_ctx(ctx); 479 fscrypt_release_ctx(ctx);
@@ -833,6 +833,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
833 struct f2fs_map_blocks map; 833 struct f2fs_map_blocks map;
834 int err = 0; 834 int err = 0;
835 835
836 /* convert inline data for Direct I/O*/
837 if (iocb->ki_flags & IOCB_DIRECT) {
838 err = f2fs_convert_inline_inode(inode);
839 if (err)
840 return err;
841 }
842
836 if (is_inode_flag_set(inode, FI_NO_PREALLOC)) 843 if (is_inode_flag_set(inode, FI_NO_PREALLOC))
837 return 0; 844 return 0;
838 845
@@ -845,15 +852,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
845 852
846 map.m_next_pgofs = NULL; 853 map.m_next_pgofs = NULL;
847 854
848 if (iocb->ki_flags & IOCB_DIRECT) { 855 if (iocb->ki_flags & IOCB_DIRECT)
849 err = f2fs_convert_inline_inode(inode);
850 if (err)
851 return err;
852 return f2fs_map_blocks(inode, &map, 1, 856 return f2fs_map_blocks(inode, &map, 1,
853 __force_buffered_io(inode, WRITE) ? 857 __force_buffered_io(inode, WRITE) ?
854 F2FS_GET_BLOCK_PRE_AIO : 858 F2FS_GET_BLOCK_PRE_AIO :
855 F2FS_GET_BLOCK_PRE_DIO); 859 F2FS_GET_BLOCK_PRE_DIO);
856 }
857 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { 860 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
858 err = f2fs_convert_inline_inode(inode); 861 err = f2fs_convert_inline_inode(inode);
859 if (err) 862 if (err)
@@ -1334,7 +1337,7 @@ static int f2fs_read_data_pages(struct file *file,
1334 struct address_space *mapping, 1337 struct address_space *mapping,
1335 struct list_head *pages, unsigned nr_pages) 1338 struct list_head *pages, unsigned nr_pages)
1336{ 1339{
1337 struct inode *inode = file->f_mapping->host; 1340 struct inode *inode = mapping->host;
1338 struct page *page = list_last_entry(pages, struct page, lru); 1341 struct page *page = list_last_entry(pages, struct page, lru);
1339 1342
1340 trace_f2fs_readpages(inode, page, nr_pages); 1343 trace_f2fs_readpages(inode, page, nr_pages);
@@ -1495,6 +1498,7 @@ static int __write_data_page(struct page *page, bool *submitted,
1495 int err = 0; 1498 int err = 0;
1496 struct f2fs_io_info fio = { 1499 struct f2fs_io_info fio = {
1497 .sbi = sbi, 1500 .sbi = sbi,
1501 .ino = inode->i_ino,
1498 .type = DATA, 1502 .type = DATA,
1499 .op = REQ_OP_WRITE, 1503 .op = REQ_OP_WRITE,
1500 .op_flags = wbc_to_write_flags(wbc), 1504 .op_flags = wbc_to_write_flags(wbc),
@@ -1566,8 +1570,11 @@ write:
1566 err = do_write_data_page(&fio); 1570 err = do_write_data_page(&fio);
1567 } 1571 }
1568 } 1572 }
1573
1574 down_write(&F2FS_I(inode)->i_sem);
1569 if (F2FS_I(inode)->last_disk_size < psize) 1575 if (F2FS_I(inode)->last_disk_size < psize)
1570 F2FS_I(inode)->last_disk_size = psize; 1576 F2FS_I(inode)->last_disk_size = psize;
1577 up_write(&F2FS_I(inode)->i_sem);
1571 1578
1572done: 1579done:
1573 if (err && err != -ENOENT) 1580 if (err && err != -ENOENT)
@@ -1932,6 +1939,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1932 1939
1933 trace_f2fs_write_begin(inode, pos, len, flags); 1940 trace_f2fs_write_begin(inode, pos, len, flags);
1934 1941
1942 if (f2fs_is_atomic_file(inode) &&
1943 !available_free_memory(sbi, INMEM_PAGES)) {
1944 err = -ENOMEM;
1945 goto fail;
1946 }
1947
1935 /* 1948 /*
1936 * We should check this at this moment to avoid deadlock on inode page 1949 * We should check this at this moment to avoid deadlock on inode page
1937 * and #0 page. The locking rule for inline_data conversion should be: 1950 * and #0 page. The locking rule for inline_data conversion should be:
@@ -1947,7 +1960,7 @@ repeat:
1947 * Do not use grab_cache_page_write_begin() to avoid deadlock due to 1960 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
1948 * wait_for_stable_page. Will wait that below with our IO control. 1961 * wait_for_stable_page. Will wait that below with our IO control.
1949 */ 1962 */
1950 page = pagecache_get_page(mapping, index, 1963 page = f2fs_pagecache_get_page(mapping, index,
1951 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); 1964 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
1952 if (!page) { 1965 if (!page) {
1953 err = -ENOMEM; 1966 err = -ENOMEM;
@@ -2009,6 +2022,8 @@ repeat:
2009fail: 2022fail:
2010 f2fs_put_page(page, 1); 2023 f2fs_put_page(page, 1);
2011 f2fs_write_failed(mapping, pos + len); 2024 f2fs_write_failed(mapping, pos + len);
2025 if (f2fs_is_atomic_file(inode))
2026 drop_inmem_pages_all(sbi);
2012 return err; 2027 return err;
2013} 2028}
2014 2029
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 87f449845f5f..ecada8425268 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
45 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); 45 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
46 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); 46 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
47 si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); 47 si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
48 si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
48 si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); 49 si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
49 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; 50 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 51 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
52
53 si->nquota_files = 0;
54 if (f2fs_sb_has_quota_ino(sbi->sb)) {
55 for (i = 0; i < MAXQUOTAS; i++) {
56 if (f2fs_qf_ino(sbi->sb, i))
57 si->nquota_files++;
58 }
59 }
51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 60 si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 61 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
53 si->aw_cnt = atomic_read(&sbi->aw_cnt); 62 si->aw_cnt = atomic_read(&sbi->aw_cnt);
@@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
61 atomic_read(&SM_I(sbi)->fcc_info->issued_flush); 70 atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
62 si->nr_flushing = 71 si->nr_flushing =
63 atomic_read(&SM_I(sbi)->fcc_info->issing_flush); 72 atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
73 si->flush_list_empty =
74 llist_empty(&SM_I(sbi)->fcc_info->issue_list);
64 } 75 }
65 if (SM_I(sbi) && SM_I(sbi)->dcc_info) { 76 if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
66 si->nr_discarded = 77 si->nr_discarded =
@@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
96 si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; 107 si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
97 si->sits = MAIN_SEGS(sbi); 108 si->sits = MAIN_SEGS(sbi);
98 si->dirty_sits = SIT_I(sbi)->dirty_sentries; 109 si->dirty_sits = SIT_I(sbi)->dirty_sentries;
99 si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; 110 si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
100 si->avail_nids = NM_I(sbi)->available_nids; 111 si->avail_nids = NM_I(sbi)->available_nids;
101 si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; 112 si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
102 si->bg_gc = sbi->bg_gc; 113 si->bg_gc = sbi->bg_gc;
103 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) 114 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
104 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) 115 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -231,14 +242,14 @@ get_cache:
231 } 242 }
232 243
233 /* free nids */ 244 /* free nids */
234 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + 245 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
235 NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) * 246 NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
236 sizeof(struct free_nid); 247 sizeof(struct free_nid);
237 si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); 248 si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
238 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 249 si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
239 sizeof(struct nat_entry_set); 250 sizeof(struct nat_entry_set);
240 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 251 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
241 for (i = 0; i <= ORPHAN_INO; i++) 252 for (i = 0; i < MAX_INO_ENTRY; i++)
242 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 253 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
243 si->cache_mem += atomic_read(&sbi->total_ext_tree) * 254 si->cache_mem += atomic_read(&sbi->total_ext_tree) *
244 sizeof(struct extent_tree); 255 sizeof(struct extent_tree);
@@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v)
262 list_for_each_entry(si, &f2fs_stat_list, stat_list) { 273 list_for_each_entry(si, &f2fs_stat_list, stat_list) {
263 update_general_status(si->sbi); 274 update_general_status(si->sbi);
264 275
265 seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", 276 seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
266 si->sbi->sb->s_bdev, i++, 277 si->sbi->sb->s_bdev, i++,
267 f2fs_readonly(si->sbi->sb) ? "RO": "RW"); 278 f2fs_readonly(si->sbi->sb) ? "RO": "RW",
279 f2fs_cp_error(si->sbi) ? "Error": "Good");
268 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", 280 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
269 si->sit_area_segs, si->nat_area_segs); 281 si->sit_area_segs, si->nat_area_segs);
270 seq_printf(s, "[SSA: %d] [MAIN: %d", 282 seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v)
349 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 361 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
350 si->ext_tree, si->zombie_tree, si->ext_node); 362 si->ext_tree, si->zombie_tree, si->ext_node);
351 seq_puts(s, "\nBalancing F2FS Async:\n"); 363 seq_puts(s, "\nBalancing F2FS Async:\n");
352 seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " 364 seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
353 "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", 365 "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
354 si->nr_wb_cp_data, si->nr_wb_data, 366 si->nr_wb_cp_data, si->nr_wb_data,
355 si->nr_flushing, si->nr_flushed, 367 si->nr_flushing, si->nr_flushed,
368 si->flush_list_empty,
356 si->nr_discarding, si->nr_discarded, 369 si->nr_discarding, si->nr_discarded,
357 si->nr_discard_cmd, si->undiscard_blks); 370 si->nr_discard_cmd, si->undiscard_blks);
358 seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " 371 seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
@@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
365 si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); 378 si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
366 seq_printf(s, " - datas: %4d in files:%4d\n", 379 seq_printf(s, " - datas: %4d in files:%4d\n",
367 si->ndirty_data, si->ndirty_files); 380 si->ndirty_data, si->ndirty_files);
381 seq_printf(s, " - quota datas: %4d in quota files:%4d\n",
382 si->ndirty_qdata, si->nquota_files);
368 seq_printf(s, " - meta: %4d in %4d\n", 383 seq_printf(s, " - meta: %4d in %4d\n",
369 si->ndirty_meta, si->meta_pages); 384 si->ndirty_meta, si->meta_pages);
370 seq_printf(s, " - imeta: %4d\n", 385 seq_printf(s, " - imeta: %4d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c0c933ad43c8..2d98d877c09d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -10,10 +10,12 @@
10 */ 10 */
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/sched/signal.h>
13#include "f2fs.h" 14#include "f2fs.h"
14#include "node.h" 15#include "node.h"
15#include "acl.h" 16#include "acl.h"
16#include "xattr.h" 17#include "xattr.h"
18#include <trace/events/f2fs.h>
17 19
18static unsigned long dir_blocks(struct inode *inode) 20static unsigned long dir_blocks(struct inode *inode)
19{ 21{
@@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
847 struct f2fs_dentry_block *dentry_blk = NULL; 849 struct f2fs_dentry_block *dentry_blk = NULL;
848 struct page *dentry_page = NULL; 850 struct page *dentry_page = NULL;
849 struct file_ra_state *ra = &file->f_ra; 851 struct file_ra_state *ra = &file->f_ra;
852 loff_t start_pos = ctx->pos;
850 unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); 853 unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
851 struct f2fs_dentry_ptr d; 854 struct f2fs_dentry_ptr d;
852 struct fscrypt_str fstr = FSTR_INIT(NULL, 0); 855 struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
@@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
855 if (f2fs_encrypted_inode(inode)) { 858 if (f2fs_encrypted_inode(inode)) {
856 err = fscrypt_get_encryption_info(inode); 859 err = fscrypt_get_encryption_info(inode);
857 if (err && err != -ENOKEY) 860 if (err && err != -ENOKEY)
858 return err; 861 goto out;
859 862
860 err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); 863 err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
861 if (err < 0) 864 if (err < 0)
862 return err; 865 goto out;
863 } 866 }
864 867
865 if (f2fs_has_inline_dentry(inode)) { 868 if (f2fs_has_inline_dentry(inode)) {
866 err = f2fs_read_inline_dir(file, ctx, &fstr); 869 err = f2fs_read_inline_dir(file, ctx, &fstr);
867 goto out; 870 goto out_free;
868 } 871 }
869 872
870 /* readahead for multi pages of dir */ 873 for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
871 if (npages - n > 1 && !ra_has_index(ra, n)) 874
872 page_cache_sync_readahead(inode->i_mapping, ra, file, n, 875 /* allow readdir() to be interrupted */
876 if (fatal_signal_pending(current)) {
877 err = -ERESTARTSYS;
878 goto out_free;
879 }
880 cond_resched();
881
882 /* readahead for multi pages of dir */
883 if (npages - n > 1 && !ra_has_index(ra, n))
884 page_cache_sync_readahead(inode->i_mapping, ra, file, n,
873 min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); 885 min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
874 886
875 for (; n < npages; n++) {
876 dentry_page = get_lock_data_page(inode, n, false); 887 dentry_page = get_lock_data_page(inode, n, false);
877 if (IS_ERR(dentry_page)) { 888 if (IS_ERR(dentry_page)) {
878 err = PTR_ERR(dentry_page); 889 err = PTR_ERR(dentry_page);
@@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
880 err = 0; 891 err = 0;
881 continue; 892 continue;
882 } else { 893 } else {
883 goto out; 894 goto out_free;
884 } 895 }
885 } 896 }
886 897
@@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
896 break; 907 break;
897 } 908 }
898 909
899 ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
900 kunmap(dentry_page); 910 kunmap(dentry_page);
901 f2fs_put_page(dentry_page, 1); 911 f2fs_put_page(dentry_page, 1);
902 } 912 }
903out: 913out_free:
904 fscrypt_fname_free_buffer(&fstr); 914 fscrypt_fname_free_buffer(&fstr);
915out:
916 trace_f2fs_readdir(inode, start_pos, ctx->pos, err);
905 return err < 0 ? err : 0; 917 return err < 0 ? err : 0;
906} 918}
907 919
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 115204fdefcc..f4e094e816c6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -44,6 +44,8 @@
44enum { 44enum {
45 FAULT_KMALLOC, 45 FAULT_KMALLOC,
46 FAULT_PAGE_ALLOC, 46 FAULT_PAGE_ALLOC,
47 FAULT_PAGE_GET,
48 FAULT_ALLOC_BIO,
47 FAULT_ALLOC_NID, 49 FAULT_ALLOC_NID,
48 FAULT_ORPHAN, 50 FAULT_ORPHAN,
49 FAULT_BLOCK, 51 FAULT_BLOCK,
@@ -91,6 +93,7 @@ extern char *fault_name[FAULT_MAX];
91#define F2FS_MOUNT_GRPQUOTA 0x00100000 93#define F2FS_MOUNT_GRPQUOTA 0x00100000
92#define F2FS_MOUNT_PRJQUOTA 0x00200000 94#define F2FS_MOUNT_PRJQUOTA 0x00200000
93#define F2FS_MOUNT_QUOTA 0x00400000 95#define F2FS_MOUNT_QUOTA 0x00400000
96#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
94 97
95#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) 98#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
96#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) 99#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -116,6 +119,8 @@ struct f2fs_mount_info {
116#define F2FS_FEATURE_EXTRA_ATTR 0x0008 119#define F2FS_FEATURE_EXTRA_ATTR 0x0008
117#define F2FS_FEATURE_PRJQUOTA 0x0010 120#define F2FS_FEATURE_PRJQUOTA 0x0010
118#define F2FS_FEATURE_INODE_CHKSUM 0x0020 121#define F2FS_FEATURE_INODE_CHKSUM 0x0020
122#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
123#define F2FS_FEATURE_QUOTA_INO 0x0080
119 124
120#define F2FS_HAS_FEATURE(sb, mask) \ 125#define F2FS_HAS_FEATURE(sb, mask) \
121 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) 126 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -145,7 +150,7 @@ enum {
145#define BATCHED_TRIM_BLOCKS(sbi) \ 150#define BATCHED_TRIM_BLOCKS(sbi) \
146 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 151 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
147#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) 152#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
148#define DISCARD_ISSUE_RATE 8 153#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
149#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ 154#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
150#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ 155#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
151#define DEF_CP_INTERVAL 60 /* 60 secs */ 156#define DEF_CP_INTERVAL 60 /* 60 secs */
@@ -156,7 +161,6 @@ struct cp_control {
156 __u64 trim_start; 161 __u64 trim_start;
157 __u64 trim_end; 162 __u64 trim_end;
158 __u64 trim_minlen; 163 __u64 trim_minlen;
159 __u64 trimmed;
160}; 164};
161 165
162/* 166/*
@@ -175,12 +179,14 @@ enum {
175 ORPHAN_INO, /* for orphan ino list */ 179 ORPHAN_INO, /* for orphan ino list */
176 APPEND_INO, /* for append ino list */ 180 APPEND_INO, /* for append ino list */
177 UPDATE_INO, /* for update ino list */ 181 UPDATE_INO, /* for update ino list */
182 FLUSH_INO, /* for multiple device flushing */
178 MAX_INO_ENTRY, /* max. list */ 183 MAX_INO_ENTRY, /* max. list */
179}; 184};
180 185
181struct ino_entry { 186struct ino_entry {
182 struct list_head list; /* list head */ 187 struct list_head list; /* list head */
183 nid_t ino; /* inode number */ 188 nid_t ino; /* inode number */
189 unsigned int dirty_device; /* dirty device bitmap */
184}; 190};
185 191
186/* for the list of inodes to be GCed */ 192/* for the list of inodes to be GCed */
@@ -204,10 +210,6 @@ struct discard_entry {
204#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ 210#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
205 (MAX_PLIST_NUM - 1) : (blk_num - 1)) 211 (MAX_PLIST_NUM - 1) : (blk_num - 1))
206 212
207#define P_ACTIVE 0x01
208#define P_TRIM 0x02
209#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
210
211enum { 213enum {
212 D_PREP, 214 D_PREP,
213 D_SUBMIT, 215 D_SUBMIT,
@@ -239,12 +241,32 @@ struct discard_cmd {
239 int error; /* bio error */ 241 int error; /* bio error */
240}; 242};
241 243
244enum {
245 DPOLICY_BG,
246 DPOLICY_FORCE,
247 DPOLICY_FSTRIM,
248 DPOLICY_UMOUNT,
249 MAX_DPOLICY,
250};
251
252struct discard_policy {
253 int type; /* type of discard */
254 unsigned int min_interval; /* used for candidates exist */
255 unsigned int max_interval; /* used for candidates not exist */
256 unsigned int max_requests; /* # of discards issued per round */
257 unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
258 bool io_aware; /* issue discard in idle time */
259 bool sync; /* submit discard with REQ_SYNC flag */
260 unsigned int granularity; /* discard granularity */
261};
262
242struct discard_cmd_control { 263struct discard_cmd_control {
243 struct task_struct *f2fs_issue_discard; /* discard thread */ 264 struct task_struct *f2fs_issue_discard; /* discard thread */
244 struct list_head entry_list; /* 4KB discard entry list */ 265 struct list_head entry_list; /* 4KB discard entry list */
245 struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ 266 struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
246 unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ 267 unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
247 struct list_head wait_list; /* store on-flushing entries */ 268 struct list_head wait_list; /* store on-flushing entries */
269 struct list_head fstrim_list; /* in-flight discard from fstrim */
248 wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ 270 wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
249 unsigned int discard_wake; /* to wake up discard thread */ 271 unsigned int discard_wake; /* to wake up discard thread */
250 struct mutex cmd_lock; 272 struct mutex cmd_lock;
@@ -377,11 +399,14 @@ struct f2fs_flush_device {
377 399
378/* for inline stuff */ 400/* for inline stuff */
379#define DEF_INLINE_RESERVED_SIZE 1 401#define DEF_INLINE_RESERVED_SIZE 1
402#define DEF_MIN_INLINE_SIZE 1
380static inline int get_extra_isize(struct inode *inode); 403static inline int get_extra_isize(struct inode *inode);
381#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ 404static inline int get_inline_xattr_addrs(struct inode *inode);
382 (CUR_ADDRS_PER_INODE(inode) - \ 405#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode)
383 DEF_INLINE_RESERVED_SIZE - \ 406#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
384 F2FS_INLINE_XATTR_ADDRS)) 407 (CUR_ADDRS_PER_INODE(inode) - \
408 F2FS_INLINE_XATTR_ADDRS(inode) - \
409 DEF_INLINE_RESERVED_SIZE))
385 410
386/* for inline dir */ 411/* for inline dir */
387#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ 412#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
@@ -581,6 +606,7 @@ struct f2fs_inode_info {
581#endif 606#endif
582 struct list_head dirty_list; /* dirty list for dirs and files */ 607 struct list_head dirty_list; /* dirty list for dirs and files */
583 struct list_head gdirty_list; /* linked in global dirty list */ 608 struct list_head gdirty_list; /* linked in global dirty list */
609 struct list_head inmem_ilist; /* list for inmem inodes */
584 struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 610 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
585 struct task_struct *inmem_task; /* store inmemory task */ 611 struct task_struct *inmem_task; /* store inmemory task */
586 struct mutex inmem_lock; /* lock for inmemory pages */ 612 struct mutex inmem_lock; /* lock for inmemory pages */
@@ -591,6 +617,7 @@ struct f2fs_inode_info {
591 617
592 int i_extra_isize; /* size of extra space located in i_addr */ 618 int i_extra_isize; /* size of extra space located in i_addr */
593 kprojid_t i_projid; /* id for project quota */ 619 kprojid_t i_projid; /* id for project quota */
620 int i_inline_xattr_size; /* inline xattr size */
594}; 621};
595 622
596static inline void get_extent_info(struct extent_info *ext, 623static inline void get_extent_info(struct extent_info *ext,
@@ -664,10 +691,13 @@ static inline void __try_update_largest_extent(struct inode *inode,
664 } 691 }
665} 692}
666 693
667enum nid_list { 694/*
668 FREE_NID_LIST, 695 * For free nid management
669 ALLOC_NID_LIST, 696 */
670 MAX_NID_LIST, 697enum nid_state {
698 FREE_NID, /* newly added to free nid list */
699 PREALLOC_NID, /* it is preallocated */
700 MAX_NID_STATE,
671}; 701};
672 702
673struct f2fs_nm_info { 703struct f2fs_nm_info {
@@ -690,8 +720,8 @@ struct f2fs_nm_info {
690 720
691 /* free node ids management */ 721 /* free node ids management */
692 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 722 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
693 struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ 723 struct list_head free_nid_list; /* list for free nids excluding preallocated nids */
694 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ 724 unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
695 spinlock_t nid_list_lock; /* protect nid lists ops */ 725 spinlock_t nid_list_lock; /* protect nid lists ops */
696 struct mutex build_lock; /* lock for build free nids */ 726 struct mutex build_lock; /* lock for build free nids */
697 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; 727 unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
@@ -769,6 +799,7 @@ enum {
769struct flush_cmd { 799struct flush_cmd {
770 struct completion wait; 800 struct completion wait;
771 struct llist_node llnode; 801 struct llist_node llnode;
802 nid_t ino;
772 int ret; 803 int ret;
773}; 804};
774 805
@@ -787,6 +818,8 @@ struct f2fs_sm_info {
787 struct dirty_seglist_info *dirty_info; /* dirty segment information */ 818 struct dirty_seglist_info *dirty_info; /* dirty segment information */
788 struct curseg_info *curseg_array; /* active segment information */ 819 struct curseg_info *curseg_array; /* active segment information */
789 820
821 struct rw_semaphore curseg_lock; /* for preventing curseg change */
822
790 block_t seg0_blkaddr; /* block address of 0'th segment */ 823 block_t seg0_blkaddr; /* block address of 0'th segment */
791 block_t main_blkaddr; /* start block address of main area */ 824 block_t main_blkaddr; /* start block address of main area */
792 block_t ssa_blkaddr; /* start block address of SSA area */ 825 block_t ssa_blkaddr; /* start block address of SSA area */
@@ -808,6 +841,7 @@ struct f2fs_sm_info {
808 unsigned int min_ipu_util; /* in-place-update threshold */ 841 unsigned int min_ipu_util; /* in-place-update threshold */
809 unsigned int min_fsync_blocks; /* threshold for fsync */ 842 unsigned int min_fsync_blocks; /* threshold for fsync */
810 unsigned int min_hot_blocks; /* threshold for hot block allocation */ 843 unsigned int min_hot_blocks; /* threshold for hot block allocation */
844 unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
811 845
812 /* for flush command control */ 846 /* for flush command control */
813 struct flush_cmd_control *fcc_info; 847 struct flush_cmd_control *fcc_info;
@@ -829,6 +863,7 @@ struct f2fs_sm_info {
829enum count_type { 863enum count_type {
830 F2FS_DIRTY_DENTS, 864 F2FS_DIRTY_DENTS,
831 F2FS_DIRTY_DATA, 865 F2FS_DIRTY_DATA,
866 F2FS_DIRTY_QDATA,
832 F2FS_DIRTY_NODES, 867 F2FS_DIRTY_NODES,
833 F2FS_DIRTY_META, 868 F2FS_DIRTY_META,
834 F2FS_INMEM_PAGES, 869 F2FS_INMEM_PAGES,
@@ -877,6 +912,18 @@ enum need_lock_type {
877 LOCK_RETRY, 912 LOCK_RETRY,
878}; 913};
879 914
915enum cp_reason_type {
916 CP_NO_NEEDED,
917 CP_NON_REGULAR,
918 CP_HARDLINK,
919 CP_SB_NEED_CP,
920 CP_WRONG_PINO,
921 CP_NO_SPC_ROLL,
922 CP_NODE_NEED_CP,
923 CP_FASTBOOT_MODE,
924 CP_SPEC_LOG_NUM,
925};
926
880enum iostat_type { 927enum iostat_type {
881 APP_DIRECT_IO, /* app direct IOs */ 928 APP_DIRECT_IO, /* app direct IOs */
882 APP_BUFFERED_IO, /* app buffered IOs */ 929 APP_BUFFERED_IO, /* app buffered IOs */
@@ -896,6 +943,7 @@ enum iostat_type {
896 943
897struct f2fs_io_info { 944struct f2fs_io_info {
898 struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ 945 struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
946 nid_t ino; /* inode number */
899 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ 947 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
900 enum temp_type temp; /* contains HOT/WARM/COLD */ 948 enum temp_type temp; /* contains HOT/WARM/COLD */
901 int op; /* contains REQ_OP_ */ 949 int op; /* contains REQ_OP_ */
@@ -940,6 +988,7 @@ enum inode_type {
940 DIR_INODE, /* for dirty dir inode */ 988 DIR_INODE, /* for dirty dir inode */
941 FILE_INODE, /* for dirty regular/symlink inode */ 989 FILE_INODE, /* for dirty regular/symlink inode */
942 DIRTY_META, /* for all dirtied inode metadata */ 990 DIRTY_META, /* for all dirtied inode metadata */
991 ATOMIC_FILE, /* for all atomic files */
943 NR_INODE_TYPE, 992 NR_INODE_TYPE,
944}; 993};
945 994
@@ -1042,12 +1091,15 @@ struct f2fs_sb_info {
1042 loff_t max_file_blocks; /* max block index of file */ 1091 loff_t max_file_blocks; /* max block index of file */
1043 int active_logs; /* # of active logs */ 1092 int active_logs; /* # of active logs */
1044 int dir_level; /* directory level */ 1093 int dir_level; /* directory level */
1094 int inline_xattr_size; /* inline xattr size */
1095 unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
1045 1096
1046 block_t user_block_count; /* # of user blocks */ 1097 block_t user_block_count; /* # of user blocks */
1047 block_t total_valid_block_count; /* # of valid blocks */ 1098 block_t total_valid_block_count; /* # of valid blocks */
1048 block_t discard_blks; /* discard command candidats */ 1099 block_t discard_blks; /* discard command candidats */
1049 block_t last_valid_block_count; /* for recovery */ 1100 block_t last_valid_block_count; /* for recovery */
1050 block_t reserved_blocks; /* configurable reserved blocks */ 1101 block_t reserved_blocks; /* configurable reserved blocks */
1102 block_t current_reserved_blocks; /* current reserved blocks */
1051 1103
1052 u32 s_next_generation; /* for NFS support */ 1104 u32 s_next_generation; /* for NFS support */
1053 1105
@@ -1113,6 +1165,8 @@ struct f2fs_sb_info {
1113 struct list_head s_list; 1165 struct list_head s_list;
1114 int s_ndevs; /* number of devices */ 1166 int s_ndevs; /* number of devices */
1115 struct f2fs_dev_info *devs; /* for device list */ 1167 struct f2fs_dev_info *devs; /* for device list */
1168 unsigned int dirty_device; /* for checkpoint data flush */
1169 spinlock_t dev_lock; /* protect dirty_device */
1116 struct mutex umount_mutex; 1170 struct mutex umount_mutex;
1117 unsigned int shrinker_run_no; 1171 unsigned int shrinker_run_no;
1118 1172
@@ -1176,8 +1230,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
1176 1230
1177static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) 1231static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
1178{ 1232{
1179 struct timespec ts = {sbi->interval_time[type], 0}; 1233 unsigned long interval = sbi->interval_time[type] * HZ;
1180 unsigned long interval = timespec_to_jiffies(&ts);
1181 1234
1182 return time_after(jiffies, sbi->last_time[type] + interval); 1235 return time_after(jiffies, sbi->last_time[type] + interval);
1183} 1236}
@@ -1344,6 +1397,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
1344 return le64_to_cpu(cp->checkpoint_ver); 1397 return le64_to_cpu(cp->checkpoint_ver);
1345} 1398}
1346 1399
1400static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type)
1401{
1402 if (type < F2FS_MAX_QUOTAS)
1403 return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]);
1404 return 0;
1405}
1406
1347static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) 1407static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
1348{ 1408{
1349 size_t crc_offset = le32_to_cpu(cp->checksum_offset); 1409 size_t crc_offset = le32_to_cpu(cp->checksum_offset);
@@ -1522,7 +1582,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
1522 1582
1523 spin_lock(&sbi->stat_lock); 1583 spin_lock(&sbi->stat_lock);
1524 sbi->total_valid_block_count += (block_t)(*count); 1584 sbi->total_valid_block_count += (block_t)(*count);
1525 avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; 1585 avail_user_block_count = sbi->user_block_count -
1586 sbi->current_reserved_blocks;
1526 if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { 1587 if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
1527 diff = sbi->total_valid_block_count - avail_user_block_count; 1588 diff = sbi->total_valid_block_count - avail_user_block_count;
1528 *count -= diff; 1589 *count -= diff;
@@ -1556,6 +1617,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
1556 f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); 1617 f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
1557 f2fs_bug_on(sbi, inode->i_blocks < sectors); 1618 f2fs_bug_on(sbi, inode->i_blocks < sectors);
1558 sbi->total_valid_block_count -= (block_t)count; 1619 sbi->total_valid_block_count -= (block_t)count;
1620 if (sbi->reserved_blocks &&
1621 sbi->current_reserved_blocks < sbi->reserved_blocks)
1622 sbi->current_reserved_blocks = min(sbi->reserved_blocks,
1623 sbi->current_reserved_blocks + count);
1559 spin_unlock(&sbi->stat_lock); 1624 spin_unlock(&sbi->stat_lock);
1560 f2fs_i_blocks_write(inode, count, false, true); 1625 f2fs_i_blocks_write(inode, count, false, true);
1561} 1626}
@@ -1576,6 +1641,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode)
1576 atomic_inc(&F2FS_I(inode)->dirty_pages); 1641 atomic_inc(&F2FS_I(inode)->dirty_pages);
1577 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1642 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1578 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1643 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1644 if (IS_NOQUOTA(inode))
1645 inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
1579} 1646}
1580 1647
1581static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1648static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1592,6 +1659,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
1592 atomic_dec(&F2FS_I(inode)->dirty_pages); 1659 atomic_dec(&F2FS_I(inode)->dirty_pages);
1593 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1660 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1594 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1661 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1662 if (IS_NOQUOTA(inode))
1663 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
1595} 1664}
1596 1665
1597static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) 1666static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1699,10 +1768,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
1699 return ret; 1768 return ret;
1700 } 1769 }
1701 1770
1771#ifdef CONFIG_F2FS_FAULT_INJECTION
1772 if (time_to_inject(sbi, FAULT_BLOCK)) {
1773 f2fs_show_injection_info(FAULT_BLOCK);
1774 goto enospc;
1775 }
1776#endif
1777
1702 spin_lock(&sbi->stat_lock); 1778 spin_lock(&sbi->stat_lock);
1703 1779
1704 valid_block_count = sbi->total_valid_block_count + 1; 1780 valid_block_count = sbi->total_valid_block_count + 1;
1705 if (unlikely(valid_block_count + sbi->reserved_blocks > 1781 if (unlikely(valid_block_count + sbi->current_reserved_blocks >
1706 sbi->user_block_count)) { 1782 sbi->user_block_count)) {
1707 spin_unlock(&sbi->stat_lock); 1783 spin_unlock(&sbi->stat_lock);
1708 goto enospc; 1784 goto enospc;
@@ -1745,6 +1821,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
1745 1821
1746 sbi->total_valid_node_count--; 1822 sbi->total_valid_node_count--;
1747 sbi->total_valid_block_count--; 1823 sbi->total_valid_block_count--;
1824 if (sbi->reserved_blocks &&
1825 sbi->current_reserved_blocks < sbi->reserved_blocks)
1826 sbi->current_reserved_blocks++;
1748 1827
1749 spin_unlock(&sbi->stat_lock); 1828 spin_unlock(&sbi->stat_lock);
1750 1829
@@ -1791,6 +1870,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
1791 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 1870 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
1792} 1871}
1793 1872
1873static inline struct page *f2fs_pagecache_get_page(
1874 struct address_space *mapping, pgoff_t index,
1875 int fgp_flags, gfp_t gfp_mask)
1876{
1877#ifdef CONFIG_F2FS_FAULT_INJECTION
1878 if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
1879 f2fs_show_injection_info(FAULT_PAGE_GET);
1880 return NULL;
1881 }
1882#endif
1883 return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
1884}
1885
1794static inline void f2fs_copy_page(struct page *src, struct page *dst) 1886static inline void f2fs_copy_page(struct page *src, struct page *dst)
1795{ 1887{
1796 char *src_kaddr = kmap(src); 1888 char *src_kaddr = kmap(src);
@@ -1840,15 +1932,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
1840 return entry; 1932 return entry;
1841} 1933}
1842 1934
1843static inline struct bio *f2fs_bio_alloc(int npages) 1935static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
1936 int npages, bool no_fail)
1844{ 1937{
1845 struct bio *bio; 1938 struct bio *bio;
1846 1939
1847 /* No failure on bio allocation */ 1940 if (no_fail) {
1848 bio = bio_alloc(GFP_NOIO, npages); 1941 /* No failure on bio allocation */
1849 if (!bio) 1942 bio = bio_alloc(GFP_NOIO, npages);
1850 bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); 1943 if (!bio)
1851 return bio; 1944 bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
1945 return bio;
1946 }
1947#ifdef CONFIG_F2FS_FAULT_INJECTION
1948 if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
1949 f2fs_show_injection_info(FAULT_ALLOC_BIO);
1950 return NULL;
1951 }
1952#endif
1953 return bio_alloc(GFP_KERNEL, npages);
1852} 1954}
1853 1955
1854static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, 1956static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
@@ -2158,25 +2260,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
2158 2260
2159static inline unsigned int addrs_per_inode(struct inode *inode) 2261static inline unsigned int addrs_per_inode(struct inode *inode)
2160{ 2262{
2161 if (f2fs_has_inline_xattr(inode)) 2263 return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode);
2162 return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS;
2163 return CUR_ADDRS_PER_INODE(inode);
2164} 2264}
2165 2265
2166static inline void *inline_xattr_addr(struct page *page) 2266static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
2167{ 2267{
2168 struct f2fs_inode *ri = F2FS_INODE(page); 2268 struct f2fs_inode *ri = F2FS_INODE(page);
2169 2269
2170 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 2270 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
2171 F2FS_INLINE_XATTR_ADDRS]); 2271 F2FS_INLINE_XATTR_ADDRS(inode)]);
2172} 2272}
2173 2273
2174static inline int inline_xattr_size(struct inode *inode) 2274static inline int inline_xattr_size(struct inode *inode)
2175{ 2275{
2176 if (f2fs_has_inline_xattr(inode)) 2276 return get_inline_xattr_addrs(inode) * sizeof(__le32);
2177 return F2FS_INLINE_XATTR_ADDRS << 2;
2178 else
2179 return 0;
2180} 2277}
2181 2278
2182static inline int f2fs_has_inline_data(struct inode *inode) 2279static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2257,9 +2354,10 @@ static inline void clear_file(struct inode *inode, int type)
2257 2354
2258static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) 2355static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
2259{ 2356{
2357 bool ret;
2358
2260 if (dsync) { 2359 if (dsync) {
2261 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2360 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2262 bool ret;
2263 2361
2264 spin_lock(&sbi->inode_lock[DIRTY_META]); 2362 spin_lock(&sbi->inode_lock[DIRTY_META]);
2265 ret = list_empty(&F2FS_I(inode)->gdirty_list); 2363 ret = list_empty(&F2FS_I(inode)->gdirty_list);
@@ -2270,7 +2368,12 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
2270 file_keep_isize(inode) || 2368 file_keep_isize(inode) ||
2271 i_size_read(inode) & PAGE_MASK) 2369 i_size_read(inode) & PAGE_MASK)
2272 return false; 2370 return false;
2273 return F2FS_I(inode)->last_disk_size == i_size_read(inode); 2371
2372 down_read(&F2FS_I(inode)->i_sem);
2373 ret = F2FS_I(inode)->last_disk_size == i_size_read(inode);
2374 up_read(&F2FS_I(inode)->i_sem);
2375
2376 return ret;
2274} 2377}
2275 2378
2276static inline int f2fs_readonly(struct super_block *sb) 2379static inline int f2fs_readonly(struct super_block *sb)
@@ -2320,6 +2423,12 @@ static inline int get_extra_isize(struct inode *inode)
2320 return F2FS_I(inode)->i_extra_isize / sizeof(__le32); 2423 return F2FS_I(inode)->i_extra_isize / sizeof(__le32);
2321} 2424}
2322 2425
2426static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb);
2427static inline int get_inline_xattr_addrs(struct inode *inode)
2428{
2429 return F2FS_I(inode)->i_inline_xattr_size;
2430}
2431
2323#define get_inode_mode(i) \ 2432#define get_inode_mode(i) \
2324 ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ 2433 ((is_inode_flag_set(i, FI_ACL_MODE)) ? \
2325 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 2434 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -2448,7 +2557,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
2448 */ 2557 */
2449int f2fs_inode_dirtied(struct inode *inode, bool sync); 2558int f2fs_inode_dirtied(struct inode *inode, bool sync);
2450void f2fs_inode_synced(struct inode *inode); 2559void f2fs_inode_synced(struct inode *inode);
2451void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); 2560int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
2452void f2fs_quota_off_umount(struct super_block *sb); 2561void f2fs_quota_off_umount(struct super_block *sb);
2453int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); 2562int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
2454int f2fs_sync_fs(struct super_block *sb, int sync); 2563int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -2476,7 +2585,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
2476pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); 2585pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
2477int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); 2586int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
2478int truncate_inode_blocks(struct inode *inode, pgoff_t from); 2587int truncate_inode_blocks(struct inode *inode, pgoff_t from);
2479int truncate_xattr_node(struct inode *inode, struct page *page); 2588int truncate_xattr_node(struct inode *inode);
2480int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); 2589int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
2481int remove_inode_page(struct inode *inode); 2590int remove_inode_page(struct inode *inode);
2482struct page *new_inode_page(struct inode *inode); 2591struct page *new_inode_page(struct inode *inode);
@@ -2511,19 +2620,22 @@ void destroy_node_manager_caches(void);
2511 */ 2620 */
2512bool need_SSR(struct f2fs_sb_info *sbi); 2621bool need_SSR(struct f2fs_sb_info *sbi);
2513void register_inmem_page(struct inode *inode, struct page *page); 2622void register_inmem_page(struct inode *inode, struct page *page);
2623void drop_inmem_pages_all(struct f2fs_sb_info *sbi);
2514void drop_inmem_pages(struct inode *inode); 2624void drop_inmem_pages(struct inode *inode);
2515void drop_inmem_page(struct inode *inode, struct page *page); 2625void drop_inmem_page(struct inode *inode, struct page *page);
2516int commit_inmem_pages(struct inode *inode); 2626int commit_inmem_pages(struct inode *inode);
2517void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); 2627void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
2518void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); 2628void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
2519int f2fs_issue_flush(struct f2fs_sb_info *sbi); 2629int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
2520int create_flush_cmd_control(struct f2fs_sb_info *sbi); 2630int create_flush_cmd_control(struct f2fs_sb_info *sbi);
2631int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
2521void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); 2632void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
2522void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); 2633void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
2523bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); 2634bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
2524void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); 2635void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
2636 unsigned int granularity);
2525void stop_discard_thread(struct f2fs_sb_info *sbi); 2637void stop_discard_thread(struct f2fs_sb_info *sbi);
2526void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); 2638bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
2527void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); 2639void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
2528void release_discard_addrs(struct f2fs_sb_info *sbi); 2640void release_discard_addrs(struct f2fs_sb_info *sbi);
2529int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); 2641int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
@@ -2578,6 +2690,10 @@ void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2578void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); 2690void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
2579void release_ino_entry(struct f2fs_sb_info *sbi, bool all); 2691void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
2580bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); 2692bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
2693void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
2694 unsigned int devidx, int type);
2695bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
2696 unsigned int devidx, int type);
2581int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); 2697int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
2582int acquire_orphan_inode(struct f2fs_sb_info *sbi); 2698int acquire_orphan_inode(struct f2fs_sb_info *sbi);
2583void release_orphan_inode(struct f2fs_sb_info *sbi); 2699void release_orphan_inode(struct f2fs_sb_info *sbi);
@@ -2665,14 +2781,16 @@ struct f2fs_stat_info {
2665 unsigned long long hit_largest, hit_cached, hit_rbtree; 2781 unsigned long long hit_largest, hit_cached, hit_rbtree;
2666 unsigned long long hit_total, total_ext; 2782 unsigned long long hit_total, total_ext;
2667 int ext_tree, zombie_tree, ext_node; 2783 int ext_tree, zombie_tree, ext_node;
2668 int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; 2784 int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
2785 int ndirty_data, ndirty_qdata;
2669 int inmem_pages; 2786 int inmem_pages;
2670 unsigned int ndirty_dirs, ndirty_files, ndirty_all; 2787 unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
2671 int nats, dirty_nats, sits, dirty_sits; 2788 int nats, dirty_nats, sits, dirty_sits;
2672 int free_nids, avail_nids, alloc_nids; 2789 int free_nids, avail_nids, alloc_nids;
2673 int total_count, utilization; 2790 int total_count, utilization;
2674 int bg_gc, nr_wb_cp_data, nr_wb_data; 2791 int bg_gc, nr_wb_cp_data, nr_wb_data;
2675 int nr_flushing, nr_flushed, nr_discarding, nr_discarded; 2792 int nr_flushing, nr_flushed, flush_list_empty;
2793 int nr_discarding, nr_discarded;
2676 int nr_discard_cmd; 2794 int nr_discard_cmd;
2677 unsigned int undiscard_blks; 2795 unsigned int undiscard_blks;
2678 int inline_xattr, inline_inode, inline_dir, append, update, orphans; 2796 int inline_xattr, inline_inode, inline_dir, append, update, orphans;
@@ -2981,6 +3099,16 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb)
2981 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); 3099 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM);
2982} 3100}
2983 3101
3102static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb)
3103{
3104 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
3105}
3106
3107static inline int f2fs_sb_has_quota_ino(struct super_block *sb)
3108{
3109 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO);
3110}
3111
2984#ifdef CONFIG_BLK_DEV_ZONED 3112#ifdef CONFIG_BLK_DEV_ZONED
2985static inline int get_blkz_type(struct f2fs_sb_info *sbi, 3113static inline int get_blkz_type(struct f2fs_sb_info *sbi,
2986 struct block_device *bdev, block_t blkaddr) 3114 struct block_device *bdev, block_t blkaddr)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f78b76ec4707..7874bbd7311d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -53,6 +53,11 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
53 struct dnode_of_data dn; 53 struct dnode_of_data dn;
54 int err; 54 int err;
55 55
56 if (unlikely(f2fs_cp_error(sbi))) {
57 err = -EIO;
58 goto err;
59 }
60
56 sb_start_pagefault(inode->i_sb); 61 sb_start_pagefault(inode->i_sb);
57 62
58 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 63 f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -114,6 +119,7 @@ out_sem:
114out: 119out:
115 sb_end_pagefault(inode->i_sb); 120 sb_end_pagefault(inode->i_sb);
116 f2fs_update_time(sbi, REQ_TIME); 121 f2fs_update_time(sbi, REQ_TIME);
122err:
117 return block_page_mkwrite_return(err); 123 return block_page_mkwrite_return(err);
118} 124}
119 125
@@ -138,27 +144,29 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
138 return 1; 144 return 1;
139} 145}
140 146
141static inline bool need_do_checkpoint(struct inode *inode) 147static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
142{ 148{
143 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
144 bool need_cp = false; 150 enum cp_reason_type cp_reason = CP_NO_NEEDED;
145 151
146 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 152 if (!S_ISREG(inode->i_mode))
147 need_cp = true; 153 cp_reason = CP_NON_REGULAR;
154 else if (inode->i_nlink != 1)
155 cp_reason = CP_HARDLINK;
148 else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) 156 else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
149 need_cp = true; 157 cp_reason = CP_SB_NEED_CP;
150 else if (file_wrong_pino(inode)) 158 else if (file_wrong_pino(inode))
151 need_cp = true; 159 cp_reason = CP_WRONG_PINO;
152 else if (!space_for_roll_forward(sbi)) 160 else if (!space_for_roll_forward(sbi))
153 need_cp = true; 161 cp_reason = CP_NO_SPC_ROLL;
154 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) 162 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
155 need_cp = true; 163 cp_reason = CP_NODE_NEED_CP;
156 else if (test_opt(sbi, FASTBOOT)) 164 else if (test_opt(sbi, FASTBOOT))
157 need_cp = true; 165 cp_reason = CP_FASTBOOT_MODE;
158 else if (sbi->active_logs == 2) 166 else if (sbi->active_logs == 2)
159 need_cp = true; 167 cp_reason = CP_SPEC_LOG_NUM;
160 168
161 return need_cp; 169 return cp_reason;
162} 170}
163 171
164static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) 172static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
@@ -193,7 +201,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
193 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 201 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
194 nid_t ino = inode->i_ino; 202 nid_t ino = inode->i_ino;
195 int ret = 0; 203 int ret = 0;
196 bool need_cp = false; 204 enum cp_reason_type cp_reason = 0;
197 struct writeback_control wbc = { 205 struct writeback_control wbc = {
198 .sync_mode = WB_SYNC_ALL, 206 .sync_mode = WB_SYNC_ALL,
199 .nr_to_write = LONG_MAX, 207 .nr_to_write = LONG_MAX,
@@ -212,7 +220,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
212 clear_inode_flag(inode, FI_NEED_IPU); 220 clear_inode_flag(inode, FI_NEED_IPU);
213 221
214 if (ret) { 222 if (ret) {
215 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 223 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
216 return ret; 224 return ret;
217 } 225 }
218 226
@@ -243,10 +251,10 @@ go_write:
243 * sudden-power-off. 251 * sudden-power-off.
244 */ 252 */
245 down_read(&F2FS_I(inode)->i_sem); 253 down_read(&F2FS_I(inode)->i_sem);
246 need_cp = need_do_checkpoint(inode); 254 cp_reason = need_do_checkpoint(inode);
247 up_read(&F2FS_I(inode)->i_sem); 255 up_read(&F2FS_I(inode)->i_sem);
248 256
249 if (need_cp) { 257 if (cp_reason) {
250 /* all the dirty node pages should be flushed for POR */ 258 /* all the dirty node pages should be flushed for POR */
251 ret = f2fs_sync_fs(inode->i_sb, 1); 259 ret = f2fs_sync_fs(inode->i_sb, 1);
252 260
@@ -294,19 +302,24 @@ sync_nodes:
294 remove_ino_entry(sbi, ino, APPEND_INO); 302 remove_ino_entry(sbi, ino, APPEND_INO);
295 clear_inode_flag(inode, FI_APPEND_WRITE); 303 clear_inode_flag(inode, FI_APPEND_WRITE);
296flush_out: 304flush_out:
297 remove_ino_entry(sbi, ino, UPDATE_INO);
298 clear_inode_flag(inode, FI_UPDATE_WRITE);
299 if (!atomic) 305 if (!atomic)
300 ret = f2fs_issue_flush(sbi); 306 ret = f2fs_issue_flush(sbi, inode->i_ino);
307 if (!ret) {
308 remove_ino_entry(sbi, ino, UPDATE_INO);
309 clear_inode_flag(inode, FI_UPDATE_WRITE);
310 remove_ino_entry(sbi, ino, FLUSH_INO);
311 }
301 f2fs_update_time(sbi, REQ_TIME); 312 f2fs_update_time(sbi, REQ_TIME);
302out: 313out:
303 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 314 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
304 f2fs_trace_ios(NULL, 1); 315 f2fs_trace_ios(NULL, 1);
305 return ret; 316 return ret;
306} 317}
307 318
308int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 319int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
309{ 320{
321 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
322 return -EIO;
310 return f2fs_do_sync_file(file, start, end, datasync, false); 323 return f2fs_do_sync_file(file, start, end, datasync, false);
311} 324}
312 325
@@ -444,6 +457,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
444 struct inode *inode = file_inode(file); 457 struct inode *inode = file_inode(file);
445 int err; 458 int err;
446 459
460 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
461 return -EIO;
462
447 /* we don't need to use inline_data strictly */ 463 /* we don't need to use inline_data strictly */
448 err = f2fs_convert_inline_inode(inode); 464 err = f2fs_convert_inline_inode(inode);
449 if (err) 465 if (err)
@@ -630,6 +646,9 @@ int f2fs_truncate(struct inode *inode)
630{ 646{
631 int err; 647 int err;
632 648
649 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
650 return -EIO;
651
633 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 652 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
634 S_ISLNK(inode->i_mode))) 653 S_ISLNK(inode->i_mode)))
635 return 0; 654 return 0;
@@ -684,6 +703,12 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
684 STATX_ATTR_NODUMP); 703 STATX_ATTR_NODUMP);
685 704
686 generic_fillattr(inode, stat); 705 generic_fillattr(inode, stat);
706
707 /* we need to show initial sectors used for inline_data/dentries */
708 if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
709 f2fs_has_inline_dentry(inode))
710 stat->blocks += (stat->size + 511) >> 9;
711
687 return 0; 712 return 0;
688} 713}
689 714
@@ -723,6 +748,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
723 int err; 748 int err;
724 bool size_changed = false; 749 bool size_changed = false;
725 750
751 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
752 return -EIO;
753
726 err = setattr_prepare(dentry, attr); 754 err = setattr_prepare(dentry, attr);
727 if (err) 755 if (err)
728 return err; 756 return err;
@@ -775,6 +803,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
775 inode->i_mtime = inode->i_ctime = current_time(inode); 803 inode->i_mtime = inode->i_ctime = current_time(inode);
776 } 804 }
777 805
806 down_write(&F2FS_I(inode)->i_sem);
807 F2FS_I(inode)->last_disk_size = i_size_read(inode);
808 up_write(&F2FS_I(inode)->i_sem);
809
778 size_changed = true; 810 size_changed = true;
779 } 811 }
780 812
@@ -845,7 +877,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
845 err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); 877 err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
846 if (err) { 878 if (err) {
847 if (err == -ENOENT) { 879 if (err == -ENOENT) {
848 pg_start++; 880 pg_start = get_next_page_offset(&dn, pg_start);
849 continue; 881 continue;
850 } 882 }
851 return err; 883 return err;
@@ -1160,11 +1192,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1160 if (ret) 1192 if (ret)
1161 goto out; 1193 goto out;
1162 1194
1195 /* avoid gc operation during block exchange */
1196 down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
1197
1163 truncate_pagecache(inode, offset); 1198 truncate_pagecache(inode, offset);
1164 1199
1165 ret = f2fs_do_collapse(inode, pg_start, pg_end); 1200 ret = f2fs_do_collapse(inode, pg_start, pg_end);
1166 if (ret) 1201 if (ret)
1167 goto out; 1202 goto out_unlock;
1168 1203
1169 /* write out all moved pages, if possible */ 1204 /* write out all moved pages, if possible */
1170 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); 1205 filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1176,7 +1211,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1176 ret = truncate_blocks(inode, new_size, true); 1211 ret = truncate_blocks(inode, new_size, true);
1177 if (!ret) 1212 if (!ret)
1178 f2fs_i_size_write(inode, new_size); 1213 f2fs_i_size_write(inode, new_size);
1179 1214out_unlock:
1215 up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
1180out: 1216out:
1181 up_write(&F2FS_I(inode)->i_mmap_sem); 1217 up_write(&F2FS_I(inode)->i_mmap_sem);
1182 return ret; 1218 return ret;
@@ -1359,6 +1395,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1359 if (ret) 1395 if (ret)
1360 goto out; 1396 goto out;
1361 1397
1398 /* avoid gc operation during block exchange */
1399 down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
1400
1362 truncate_pagecache(inode, offset); 1401 truncate_pagecache(inode, offset);
1363 1402
1364 pg_start = offset >> PAGE_SHIFT; 1403 pg_start = offset >> PAGE_SHIFT;
@@ -1386,6 +1425,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1386 1425
1387 if (!ret) 1426 if (!ret)
1388 f2fs_i_size_write(inode, new_size); 1427 f2fs_i_size_write(inode, new_size);
1428
1429 up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
1389out: 1430out:
1390 up_write(&F2FS_I(inode)->i_mmap_sem); 1431 up_write(&F2FS_I(inode)->i_mmap_sem);
1391 return ret; 1432 return ret;
@@ -1435,8 +1476,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1435 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; 1476 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1436 } 1477 }
1437 1478
1438 if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) 1479 if (new_size > i_size_read(inode)) {
1439 f2fs_i_size_write(inode, new_size); 1480 if (mode & FALLOC_FL_KEEP_SIZE)
1481 file_set_keep_isize(inode);
1482 else
1483 f2fs_i_size_write(inode, new_size);
1484 }
1440 1485
1441 return err; 1486 return err;
1442} 1487}
@@ -1447,6 +1492,9 @@ static long f2fs_fallocate(struct file *file, int mode,
1447 struct inode *inode = file_inode(file); 1492 struct inode *inode = file_inode(file);
1448 long ret = 0; 1493 long ret = 0;
1449 1494
1495 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1496 return -EIO;
1497
1450 /* f2fs only support ->fallocate for regular file */ 1498 /* f2fs only support ->fallocate for regular file */
1451 if (!S_ISREG(inode->i_mode)) 1499 if (!S_ISREG(inode->i_mode))
1452 return -EINVAL; 1500 return -EINVAL;
@@ -1480,8 +1528,6 @@ static long f2fs_fallocate(struct file *file, int mode,
1480 if (!ret) { 1528 if (!ret) {
1481 inode->i_mtime = inode->i_ctime = current_time(inode); 1529 inode->i_mtime = inode->i_ctime = current_time(inode);
1482 f2fs_mark_inode_dirty_sync(inode, false); 1530 f2fs_mark_inode_dirty_sync(inode, false);
1483 if (mode & FALLOC_FL_KEEP_SIZE)
1484 file_set_keep_isize(inode);
1485 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1531 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1486 } 1532 }
1487 1533
@@ -1883,6 +1929,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
1883{ 1929{
1884 struct inode *inode = file_inode(filp); 1930 struct inode *inode = file_inode(filp);
1885 1931
1932 if (!f2fs_sb_has_crypto(inode->i_sb))
1933 return -EOPNOTSUPP;
1934
1886 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1935 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1887 1936
1888 return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); 1937 return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
@@ -1890,6 +1939,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
1890 1939
1891static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 1940static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
1892{ 1941{
1942 if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb))
1943 return -EOPNOTSUPP;
1893 return fscrypt_ioctl_get_policy(filp, (void __user *)arg); 1944 return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
1894} 1945}
1895 1946
@@ -2245,9 +2296,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2245 } 2296 }
2246 2297
2247 inode_lock(src); 2298 inode_lock(src);
2299 down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
2248 if (src != dst) { 2300 if (src != dst) {
2249 if (!inode_trylock(dst)) { 2301 ret = -EBUSY;
2250 ret = -EBUSY; 2302 if (!inode_trylock(dst))
2303 goto out;
2304 if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
2305 inode_unlock(dst);
2251 goto out; 2306 goto out;
2252 } 2307 }
2253 } 2308 }
@@ -2307,9 +2362,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2307 } 2362 }
2308 f2fs_unlock_op(sbi); 2363 f2fs_unlock_op(sbi);
2309out_unlock: 2364out_unlock:
2310 if (src != dst) 2365 if (src != dst) {
2366 up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
2311 inode_unlock(dst); 2367 inode_unlock(dst);
2368 }
2312out: 2369out:
2370 up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
2313 inode_unlock(src); 2371 inode_unlock(src);
2314 return ret; 2372 return ret;
2315} 2373}
@@ -2625,6 +2683,9 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
2625 2683
2626long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 2684long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2627{ 2685{
2686 if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
2687 return -EIO;
2688
2628 switch (cmd) { 2689 switch (cmd) {
2629 case F2FS_IOC_GETFLAGS: 2690 case F2FS_IOC_GETFLAGS:
2630 return f2fs_ioc_getflags(filp, arg); 2691 return f2fs_ioc_getflags(filp, arg);
@@ -2682,6 +2743,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2682 struct blk_plug plug; 2743 struct blk_plug plug;
2683 ssize_t ret; 2744 ssize_t ret;
2684 2745
2746 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2747 return -EIO;
2748
2685 inode_lock(inode); 2749 inode_lock(inode);
2686 ret = generic_write_checks(iocb, from); 2750 ret = generic_write_checks(iocb, from);
2687 if (ret > 0) { 2751 if (ret > 0) {
@@ -2692,6 +2756,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2692 2756
2693 err = f2fs_preallocate_blocks(iocb, from); 2757 err = f2fs_preallocate_blocks(iocb, from);
2694 if (err) { 2758 if (err) {
2759 clear_inode_flag(inode, FI_NO_PREALLOC);
2695 inode_unlock(inode); 2760 inode_unlock(inode);
2696 return err; 2761 return err;
2697 } 2762 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bfe6a8ccc3a0..5d5bba462f26 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -267,16 +267,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
267 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); 267 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
268} 268}
269 269
270static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
271 unsigned int segno)
272{
273 unsigned int valid_blocks =
274 get_valid_blocks(sbi, segno, true);
275
276 return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
277 valid_blocks * 2 : valid_blocks;
278}
279
280static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, 270static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
281 unsigned int segno, struct victim_sel_policy *p) 271 unsigned int segno, struct victim_sel_policy *p)
282{ 272{
@@ -285,7 +275,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
285 275
286 /* alloc_mode == LFS */ 276 /* alloc_mode == LFS */
287 if (p->gc_mode == GC_GREEDY) 277 if (p->gc_mode == GC_GREEDY)
288 return get_greedy_cost(sbi, segno); 278 return get_valid_blocks(sbi, segno, true);
289 else 279 else
290 return get_cb_cost(sbi, segno); 280 return get_cb_cost(sbi, segno);
291} 281}
@@ -466,10 +456,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
466 struct seg_entry *sentry; 456 struct seg_entry *sentry;
467 int ret; 457 int ret;
468 458
469 mutex_lock(&sit_i->sentry_lock); 459 down_read(&sit_i->sentry_lock);
470 sentry = get_seg_entry(sbi, segno); 460 sentry = get_seg_entry(sbi, segno);
471 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 461 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
472 mutex_unlock(&sit_i->sentry_lock); 462 up_read(&sit_i->sentry_lock);
473 return ret; 463 return ret;
474} 464}
475 465
@@ -608,6 +598,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
608{ 598{
609 struct f2fs_io_info fio = { 599 struct f2fs_io_info fio = {
610 .sbi = F2FS_I_SB(inode), 600 .sbi = F2FS_I_SB(inode),
601 .ino = inode->i_ino,
611 .type = DATA, 602 .type = DATA,
612 .temp = COLD, 603 .temp = COLD,
613 .op = REQ_OP_READ, 604 .op = REQ_OP_READ,
@@ -659,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
659 allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, 650 allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
660 &sum, CURSEG_COLD_DATA, NULL, false); 651 &sum, CURSEG_COLD_DATA, NULL, false);
661 652
662 fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, 653 fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
663 FGP_LOCK | FGP_CREAT, GFP_NOFS); 654 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
664 if (!fio.encrypted_page) { 655 if (!fio.encrypted_page) {
665 err = -ENOMEM; 656 err = -ENOMEM;
666 goto recover_block; 657 goto recover_block;
@@ -738,6 +729,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
738 } else { 729 } else {
739 struct f2fs_io_info fio = { 730 struct f2fs_io_info fio = {
740 .sbi = F2FS_I_SB(inode), 731 .sbi = F2FS_I_SB(inode),
732 .ino = inode->i_ino,
741 .type = DATA, 733 .type = DATA,
742 .temp = COLD, 734 .temp = COLD,
743 .op = REQ_OP_WRITE, 735 .op = REQ_OP_WRITE,
@@ -840,10 +832,17 @@ next_step:
840 continue; 832 continue;
841 } 833 }
842 834
835 if (!down_write_trylock(
836 &F2FS_I(inode)->dio_rwsem[WRITE])) {
837 iput(inode);
838 continue;
839 }
840
843 start_bidx = start_bidx_of_node(nofs, inode); 841 start_bidx = start_bidx_of_node(nofs, inode);
844 data_page = get_read_data_page(inode, 842 data_page = get_read_data_page(inode,
845 start_bidx + ofs_in_node, REQ_RAHEAD, 843 start_bidx + ofs_in_node, REQ_RAHEAD,
846 true); 844 true);
845 up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
847 if (IS_ERR(data_page)) { 846 if (IS_ERR(data_page)) {
848 iput(inode); 847 iput(inode);
849 continue; 848 continue;
@@ -901,10 +900,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
901 struct sit_info *sit_i = SIT_I(sbi); 900 struct sit_info *sit_i = SIT_I(sbi);
902 int ret; 901 int ret;
903 902
904 mutex_lock(&sit_i->sentry_lock); 903 down_write(&sit_i->sentry_lock);
905 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, 904 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
906 NO_CHECK_TYPE, LFS); 905 NO_CHECK_TYPE, LFS);
907 mutex_unlock(&sit_i->sentry_lock); 906 up_write(&sit_i->sentry_lock);
908 return ret; 907 return ret;
909} 908}
910 909
@@ -952,8 +951,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
952 /* 951 /*
953 * this is to avoid deadlock: 952 * this is to avoid deadlock:
954 * - lock_page(sum_page) - f2fs_replace_block 953 * - lock_page(sum_page) - f2fs_replace_block
955 * - check_valid_map() - mutex_lock(sentry_lock) 954 * - check_valid_map() - down_write(sentry_lock)
956 * - mutex_lock(sentry_lock) - change_curseg() 955 * - down_read(sentry_lock) - change_curseg()
957 * - lock_page(sum_page) 956 * - lock_page(sum_page)
958 */ 957 */
959 if (type == SUM_TYPE_NODE) 958 if (type == SUM_TYPE_NODE)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 8322e4e7bb3f..90e38d8ea688 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -112,6 +112,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
112{ 112{
113 struct f2fs_io_info fio = { 113 struct f2fs_io_info fio = {
114 .sbi = F2FS_I_SB(dn->inode), 114 .sbi = F2FS_I_SB(dn->inode),
115 .ino = dn->inode->i_ino,
115 .type = DATA, 116 .type = DATA,
116 .op = REQ_OP_WRITE, 117 .op = REQ_OP_WRITE,
117 .op_flags = REQ_SYNC | REQ_PRIO, 118 .op_flags = REQ_SYNC | REQ_PRIO,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 53fb08810ee9..b4c4f2b25304 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -235,6 +235,23 @@ static int do_read_inode(struct inode *inode)
235 fi->i_extra_isize = f2fs_has_extra_attr(inode) ? 235 fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
236 le16_to_cpu(ri->i_extra_isize) : 0; 236 le16_to_cpu(ri->i_extra_isize) : 0;
237 237
238 if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
239 f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
240 fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
241 } else if (f2fs_has_inline_xattr(inode) ||
242 f2fs_has_inline_dentry(inode)) {
243 fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
244 } else {
245
246 /*
247 * Previous inline data or directory always reserved 200 bytes
248 * in inode layout, even if inline_xattr is disabled. In order
249 * to keep inline_dentry's structure for backward compatibility,
250 * we get the space back only from inline_data.
251 */
252 fi->i_inline_xattr_size = 0;
253 }
254
238 /* check data exist */ 255 /* check data exist */
239 if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) 256 if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
240 __recover_inline_status(inode, node_page); 257 __recover_inline_status(inode, node_page);
@@ -387,6 +404,10 @@ int update_inode(struct inode *inode, struct page *node_page)
387 if (f2fs_has_extra_attr(inode)) { 404 if (f2fs_has_extra_attr(inode)) {
388 ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); 405 ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
389 406
407 if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
408 ri->i_inline_xattr_size =
409 cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
410
390 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && 411 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
391 F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, 412 F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
392 i_projid)) { 413 i_projid)) {
@@ -483,6 +504,7 @@ void f2fs_evict_inode(struct inode *inode)
483 504
484 remove_ino_entry(sbi, inode->i_ino, APPEND_INO); 505 remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
485 remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); 506 remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
507 remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
486 508
487 sb_start_intwrite(inode->i_sb); 509 sb_start_intwrite(inode->i_sb);
488 set_inode_flag(inode, FI_NO_ALLOC); 510 set_inode_flag(inode, FI_NO_ALLOC);
@@ -522,8 +544,10 @@ no_delete:
522 stat_dec_inline_dir(inode); 544 stat_dec_inline_dir(inode);
523 stat_dec_inline_inode(inode); 545 stat_dec_inline_inode(inode);
524 546
525 if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) 547 if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
526 f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); 548 f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
549 else
550 f2fs_inode_synced(inode);
527 551
528 /* ino == 0, if f2fs_new_inode() was failed t*/ 552 /* ino == 0, if f2fs_new_inode() was failed t*/
529 if (inode->i_ino) 553 if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a4dab98c4b7b..28bdf8828e73 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
29 nid_t ino; 29 nid_t ino;
30 struct inode *inode; 30 struct inode *inode;
31 bool nid_free = false; 31 bool nid_free = false;
32 int xattr_size = 0;
32 int err; 33 int err;
33 34
34 inode = new_inode(dir->i_sb); 35 inode = new_inode(dir->i_sb);
@@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
86 87
87 if (test_opt(sbi, INLINE_XATTR)) 88 if (test_opt(sbi, INLINE_XATTR))
88 set_inode_flag(inode, FI_INLINE_XATTR); 89 set_inode_flag(inode, FI_INLINE_XATTR);
90
89 if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) 91 if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
90 set_inode_flag(inode, FI_INLINE_DATA); 92 set_inode_flag(inode, FI_INLINE_DATA);
91 if (f2fs_may_inline_dentry(inode)) 93 if (f2fs_may_inline_dentry(inode))
92 set_inode_flag(inode, FI_INLINE_DENTRY); 94 set_inode_flag(inode, FI_INLINE_DENTRY);
93 95
96 if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
97 f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
98 if (f2fs_has_inline_xattr(inode))
99 xattr_size = sbi->inline_xattr_size;
100 /* Otherwise, will be 0 */
101 } else if (f2fs_has_inline_xattr(inode) ||
102 f2fs_has_inline_dentry(inode)) {
103 xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
104 }
105 F2FS_I(inode)->i_inline_xattr_size = xattr_size;
106
94 f2fs_init_extent_tree(inode, NULL); 107 f2fs_init_extent_tree(inode, NULL);
95 108
96 stat_inc_inline_xattr(inode); 109 stat_inc_inline_xattr(inode);
@@ -177,6 +190,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
177 nid_t ino = 0; 190 nid_t ino = 0;
178 int err; 191 int err;
179 192
193 if (unlikely(f2fs_cp_error(sbi)))
194 return -EIO;
195
180 err = dquot_initialize(dir); 196 err = dquot_initialize(dir);
181 if (err) 197 if (err)
182 return err; 198 return err;
@@ -221,6 +237,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
221 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 237 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
222 int err; 238 int err;
223 239
240 if (unlikely(f2fs_cp_error(sbi)))
241 return -EIO;
242
224 if (f2fs_encrypted_inode(dir) && 243 if (f2fs_encrypted_inode(dir) &&
225 !fscrypt_has_permitted_context(dir, inode)) 244 !fscrypt_has_permitted_context(dir, inode))
226 return -EPERM; 245 return -EPERM;
@@ -331,12 +350,15 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
331 struct inode *inode = NULL; 350 struct inode *inode = NULL;
332 struct f2fs_dir_entry *de; 351 struct f2fs_dir_entry *de;
333 struct page *page; 352 struct page *page;
334 nid_t ino; 353 struct dentry *new;
354 nid_t ino = -1;
335 int err = 0; 355 int err = 0;
336 unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); 356 unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
337 357
358 trace_f2fs_lookup_start(dir, dentry, flags);
359
338 if (f2fs_encrypted_inode(dir)) { 360 if (f2fs_encrypted_inode(dir)) {
339 int res = fscrypt_get_encryption_info(dir); 361 err = fscrypt_get_encryption_info(dir);
340 362
341 /* 363 /*
342 * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is 364 * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
@@ -346,18 +368,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
346 if (fscrypt_has_encryption_key(dir)) 368 if (fscrypt_has_encryption_key(dir))
347 fscrypt_set_encrypted_dentry(dentry); 369 fscrypt_set_encrypted_dentry(dentry);
348 fscrypt_set_d_op(dentry); 370 fscrypt_set_d_op(dentry);
349 if (res && res != -ENOKEY) 371 if (err && err != -ENOKEY)
350 return ERR_PTR(res); 372 goto out;
351 } 373 }
352 374
353 if (dentry->d_name.len > F2FS_NAME_LEN) 375 if (dentry->d_name.len > F2FS_NAME_LEN) {
354 return ERR_PTR(-ENAMETOOLONG); 376 err = -ENAMETOOLONG;
377 goto out;
378 }
355 379
356 de = f2fs_find_entry(dir, &dentry->d_name, &page); 380 de = f2fs_find_entry(dir, &dentry->d_name, &page);
357 if (!de) { 381 if (!de) {
358 if (IS_ERR(page)) 382 if (IS_ERR(page)) {
359 return (struct dentry *)page; 383 err = PTR_ERR(page);
360 return d_splice_alias(inode, dentry); 384 goto out;
385 }
386 goto out_splice;
361 } 387 }
362 388
363 ino = le32_to_cpu(de->ino); 389 ino = le32_to_cpu(de->ino);
@@ -365,19 +391,21 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
365 f2fs_put_page(page, 0); 391 f2fs_put_page(page, 0);
366 392
367 inode = f2fs_iget(dir->i_sb, ino); 393 inode = f2fs_iget(dir->i_sb, ino);
368 if (IS_ERR(inode)) 394 if (IS_ERR(inode)) {
369 return ERR_CAST(inode); 395 err = PTR_ERR(inode);
396 goto out;
397 }
370 398
371 if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { 399 if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) {
372 err = __recover_dot_dentries(dir, root_ino); 400 err = __recover_dot_dentries(dir, root_ino);
373 if (err) 401 if (err)
374 goto err_out; 402 goto out_iput;
375 } 403 }
376 404
377 if (f2fs_has_inline_dots(inode)) { 405 if (f2fs_has_inline_dots(inode)) {
378 err = __recover_dot_dentries(inode, dir->i_ino); 406 err = __recover_dot_dentries(inode, dir->i_ino);
379 if (err) 407 if (err)
380 goto err_out; 408 goto out_iput;
381 } 409 }
382 if (f2fs_encrypted_inode(dir) && 410 if (f2fs_encrypted_inode(dir) &&
383 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && 411 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
@@ -386,12 +414,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
386 "Inconsistent encryption contexts: %lu/%lu", 414 "Inconsistent encryption contexts: %lu/%lu",
387 dir->i_ino, inode->i_ino); 415 dir->i_ino, inode->i_ino);
388 err = -EPERM; 416 err = -EPERM;
389 goto err_out; 417 goto out_iput;
390 } 418 }
391 return d_splice_alias(inode, dentry); 419out_splice:
392 420 new = d_splice_alias(inode, dentry);
393err_out: 421 if (IS_ERR(new))
422 err = PTR_ERR(new);
423 trace_f2fs_lookup_end(dir, dentry, ino, err);
424 return new;
425out_iput:
394 iput(inode); 426 iput(inode);
427out:
428 trace_f2fs_lookup_end(dir, dentry, ino, err);
395 return ERR_PTR(err); 429 return ERR_PTR(err);
396} 430}
397 431
@@ -405,9 +439,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
405 439
406 trace_f2fs_unlink_enter(dir, dentry); 440 trace_f2fs_unlink_enter(dir, dentry);
407 441
442 if (unlikely(f2fs_cp_error(sbi)))
443 return -EIO;
444
408 err = dquot_initialize(dir); 445 err = dquot_initialize(dir);
409 if (err) 446 if (err)
410 return err; 447 return err;
448 err = dquot_initialize(inode);
449 if (err)
450 return err;
411 451
412 de = f2fs_find_entry(dir, &dentry->d_name, &page); 452 de = f2fs_find_entry(dir, &dentry->d_name, &page);
413 if (!de) { 453 if (!de) {
@@ -460,6 +500,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
460 struct fscrypt_symlink_data *sd = NULL; 500 struct fscrypt_symlink_data *sd = NULL;
461 int err; 501 int err;
462 502
503 if (unlikely(f2fs_cp_error(sbi)))
504 return -EIO;
505
463 if (f2fs_encrypted_inode(dir)) { 506 if (f2fs_encrypted_inode(dir)) {
464 err = fscrypt_get_encryption_info(dir); 507 err = fscrypt_get_encryption_info(dir);
465 if (err) 508 if (err)
@@ -566,6 +609,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
566 struct inode *inode; 609 struct inode *inode;
567 int err; 610 int err;
568 611
612 if (unlikely(f2fs_cp_error(sbi)))
613 return -EIO;
614
569 err = dquot_initialize(dir); 615 err = dquot_initialize(dir);
570 if (err) 616 if (err)
571 return err; 617 return err;
@@ -618,6 +664,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
618 struct inode *inode; 664 struct inode *inode;
619 int err = 0; 665 int err = 0;
620 666
667 if (unlikely(f2fs_cp_error(sbi)))
668 return -EIO;
669
621 err = dquot_initialize(dir); 670 err = dquot_initialize(dir);
622 if (err) 671 if (err)
623 return err; 672 return err;
@@ -712,6 +761,9 @@ out:
712 761
713static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 762static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
714{ 763{
764 if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
765 return -EIO;
766
715 if (f2fs_encrypted_inode(dir)) { 767 if (f2fs_encrypted_inode(dir)) {
716 int err = fscrypt_get_encryption_info(dir); 768 int err = fscrypt_get_encryption_info(dir);
717 if (err) 769 if (err)
@@ -723,6 +775,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
723 775
724static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) 776static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
725{ 777{
778 if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
779 return -EIO;
780
726 return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); 781 return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
727} 782}
728 783
@@ -742,6 +797,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
742 bool is_old_inline = f2fs_has_inline_dentry(old_dir); 797 bool is_old_inline = f2fs_has_inline_dentry(old_dir);
743 int err = -ENOENT; 798 int err = -ENOENT;
744 799
800 if (unlikely(f2fs_cp_error(sbi)))
801 return -EIO;
802
745 if ((f2fs_encrypted_inode(old_dir) && 803 if ((f2fs_encrypted_inode(old_dir) &&
746 !fscrypt_has_encryption_key(old_dir)) || 804 !fscrypt_has_encryption_key(old_dir)) ||
747 (f2fs_encrypted_inode(new_dir) && 805 (f2fs_encrypted_inode(new_dir) &&
@@ -767,6 +825,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
767 if (err) 825 if (err)
768 goto out; 826 goto out;
769 827
828 if (new_inode) {
829 err = dquot_initialize(new_inode);
830 if (err)
831 goto out;
832 }
833
770 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 834 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
771 if (!old_entry) { 835 if (!old_entry) {
772 if (IS_ERR(old_page)) 836 if (IS_ERR(old_page))
@@ -935,6 +999,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
935 int old_nlink = 0, new_nlink = 0; 999 int old_nlink = 0, new_nlink = 0;
936 int err = -ENOENT; 1000 int err = -ENOENT;
937 1001
1002 if (unlikely(f2fs_cp_error(sbi)))
1003 return -EIO;
1004
938 if ((f2fs_encrypted_inode(old_dir) && 1005 if ((f2fs_encrypted_inode(old_dir) &&
939 !fscrypt_has_encryption_key(old_dir)) || 1006 !fscrypt_has_encryption_key(old_dir)) ||
940 (f2fs_encrypted_inode(new_dir) && 1007 (f2fs_encrypted_inode(new_dir) &&
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b33dac9592ca..d3322752426f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,7 +46,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
46 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively 46 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
47 */ 47 */
48 if (type == FREE_NIDS) { 48 if (type == FREE_NIDS) {
49 mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * 49 mem_size = (nm_i->nid_cnt[FREE_NID] *
50 sizeof(struct free_nid)) >> PAGE_SHIFT; 50 sizeof(struct free_nid)) >> PAGE_SHIFT;
51 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 51 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
52 } else if (type == NAT_ENTRIES) { 52 } else if (type == NAT_ENTRIES) {
@@ -63,7 +63,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
63 } else if (type == INO_ENTRIES) { 63 } else if (type == INO_ENTRIES) {
64 int i; 64 int i;
65 65
66 for (i = 0; i <= UPDATE_INO; i++) 66 for (i = 0; i < MAX_INO_ENTRY; i++)
67 mem_size += sbi->im[i].ino_num * 67 mem_size += sbi->im[i].ino_num *
68 sizeof(struct ino_entry); 68 sizeof(struct ino_entry);
69 mem_size >>= PAGE_SHIFT; 69 mem_size >>= PAGE_SHIFT;
@@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
74 atomic_read(&sbi->total_ext_node) * 74 atomic_read(&sbi->total_ext_node) *
75 sizeof(struct extent_node)) >> PAGE_SHIFT; 75 sizeof(struct extent_node)) >> PAGE_SHIFT;
76 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 76 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
77 } else if (type == INMEM_PAGES) {
78 /* it allows 20% / total_ram for inmemory pages */
79 mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
80 res = mem_size < (val.totalram / 5);
77 } else { 81 } else {
78 if (!sbi->sb->s_bdi->wb.dirty_exceeded) 82 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
79 return true; 83 return true;
@@ -134,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
134 return dst_page; 138 return dst_page;
135} 139}
136 140
141static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
142{
143 struct nat_entry *new;
144
145 if (no_fail)
146 new = f2fs_kmem_cache_alloc(nat_entry_slab,
147 GFP_NOFS | __GFP_ZERO);
148 else
149 new = kmem_cache_alloc(nat_entry_slab,
150 GFP_NOFS | __GFP_ZERO);
151 if (new) {
152 nat_set_nid(new, nid);
153 nat_reset_flag(new);
154 }
155 return new;
156}
157
158static void __free_nat_entry(struct nat_entry *e)
159{
160 kmem_cache_free(nat_entry_slab, e);
161}
162
163/* must be locked by nat_tree_lock */
164static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
165 struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
166{
167 if (no_fail)
168 f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
169 else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
170 return NULL;
171
172 if (raw_ne)
173 node_info_from_raw_nat(&ne->ni, raw_ne);
174 list_add_tail(&ne->list, &nm_i->nat_entries);
175 nm_i->nat_cnt++;
176 return ne;
177}
178
137static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 179static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
138{ 180{
139 return radix_tree_lookup(&nm_i->nat_root, n); 181 return radix_tree_lookup(&nm_i->nat_root, n);
@@ -150,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
150 list_del(&e->list); 192 list_del(&e->list);
151 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); 193 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
152 nm_i->nat_cnt--; 194 nm_i->nat_cnt--;
153 kmem_cache_free(nat_entry_slab, e); 195 __free_nat_entry(e);
154} 196}
155 197
156static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, 198static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -246,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
246 return need_update; 288 return need_update;
247} 289}
248 290
249static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, 291/* must be locked by nat_tree_lock */
250 bool no_fail)
251{
252 struct nat_entry *new;
253
254 if (no_fail) {
255 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
256 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
257 } else {
258 new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
259 if (!new)
260 return NULL;
261 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
262 kmem_cache_free(nat_entry_slab, new);
263 return NULL;
264 }
265 }
266
267 memset(new, 0, sizeof(struct nat_entry));
268 nat_set_nid(new, nid);
269 nat_reset_flag(new);
270 list_add_tail(&new->list, &nm_i->nat_entries);
271 nm_i->nat_cnt++;
272 return new;
273}
274
275static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, 292static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
276 struct f2fs_nat_entry *ne) 293 struct f2fs_nat_entry *ne)
277{ 294{
278 struct f2fs_nm_info *nm_i = NM_I(sbi); 295 struct f2fs_nm_info *nm_i = NM_I(sbi);
279 struct nat_entry *e; 296 struct nat_entry *new, *e;
280 297
298 new = __alloc_nat_entry(nid, false);
299 if (!new)
300 return;
301
302 down_write(&nm_i->nat_tree_lock);
281 e = __lookup_nat_cache(nm_i, nid); 303 e = __lookup_nat_cache(nm_i, nid);
282 if (!e) { 304 if (!e)
283 e = grab_nat_entry(nm_i, nid, false); 305 e = __init_nat_entry(nm_i, new, ne, false);
284 if (e) 306 else
285 node_info_from_raw_nat(&e->ni, ne);
286 } else {
287 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || 307 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
288 nat_get_blkaddr(e) != 308 nat_get_blkaddr(e) !=
289 le32_to_cpu(ne->block_addr) || 309 le32_to_cpu(ne->block_addr) ||
290 nat_get_version(e) != ne->version); 310 nat_get_version(e) != ne->version);
291 } 311 up_write(&nm_i->nat_tree_lock);
312 if (e != new)
313 __free_nat_entry(new);
292} 314}
293 315
294static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 316static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -296,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
296{ 318{
297 struct f2fs_nm_info *nm_i = NM_I(sbi); 319 struct f2fs_nm_info *nm_i = NM_I(sbi);
298 struct nat_entry *e; 320 struct nat_entry *e;
321 struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
299 322
300 down_write(&nm_i->nat_tree_lock); 323 down_write(&nm_i->nat_tree_lock);
301 e = __lookup_nat_cache(nm_i, ni->nid); 324 e = __lookup_nat_cache(nm_i, ni->nid);
302 if (!e) { 325 if (!e) {
303 e = grab_nat_entry(nm_i, ni->nid, true); 326 e = __init_nat_entry(nm_i, new, NULL, true);
304 copy_node_info(&e->ni, ni); 327 copy_node_info(&e->ni, ni);
305 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 328 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
306 } else if (new_blkaddr == NEW_ADDR) { 329 } else if (new_blkaddr == NEW_ADDR) {
@@ -312,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
312 copy_node_info(&e->ni, ni); 335 copy_node_info(&e->ni, ni);
313 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 336 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
314 } 337 }
338 /* let's free early to reduce memory consumption */
339 if (e != new)
340 __free_nat_entry(new);
315 341
316 /* sanity check */ 342 /* sanity check */
317 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); 343 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
@@ -327,10 +353,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
327 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 353 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
328 unsigned char version = nat_get_version(e); 354 unsigned char version = nat_get_version(e);
329 nat_set_version(e, inc_node_version(version)); 355 nat_set_version(e, inc_node_version(version));
330
331 /* in order to reuse the nid */
332 if (nm_i->next_scan_nid > ni->nid)
333 nm_i->next_scan_nid = ni->nid;
334 } 356 }
335 357
336 /* change address */ 358 /* change address */
@@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
424 f2fs_put_page(page, 1); 446 f2fs_put_page(page, 1);
425cache: 447cache:
426 /* cache nat entry */ 448 /* cache nat entry */
427 down_write(&nm_i->nat_tree_lock);
428 cache_nat_entry(sbi, nid, &ne); 449 cache_nat_entry(sbi, nid, &ne);
429 up_write(&nm_i->nat_tree_lock);
430} 450}
431 451
432/* 452/*
@@ -962,7 +982,8 @@ fail:
962 return err > 0 ? 0 : err; 982 return err > 0 ? 0 : err;
963} 983}
964 984
965int truncate_xattr_node(struct inode *inode, struct page *page) 985/* caller must lock inode page */
986int truncate_xattr_node(struct inode *inode)
966{ 987{
967 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 988 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
968 nid_t nid = F2FS_I(inode)->i_xattr_nid; 989 nid_t nid = F2FS_I(inode)->i_xattr_nid;
@@ -978,10 +999,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
978 999
979 f2fs_i_xnid_write(inode, 0); 1000 f2fs_i_xnid_write(inode, 0);
980 1001
981 set_new_dnode(&dn, inode, page, npage, nid); 1002 set_new_dnode(&dn, inode, NULL, npage, nid);
982
983 if (page)
984 dn.inode_page_locked = true;
985 truncate_node(&dn); 1003 truncate_node(&dn);
986 return 0; 1004 return 0;
987} 1005}
@@ -1000,7 +1018,7 @@ int remove_inode_page(struct inode *inode)
1000 if (err) 1018 if (err)
1001 return err; 1019 return err;
1002 1020
1003 err = truncate_xattr_node(inode, dn.inode_page); 1021 err = truncate_xattr_node(inode);
1004 if (err) { 1022 if (err) {
1005 f2fs_put_dnode(&dn); 1023 f2fs_put_dnode(&dn);
1006 return err; 1024 return err;
@@ -1220,7 +1238,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1220 if (!inode) 1238 if (!inode)
1221 return; 1239 return;
1222 1240
1223 page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); 1241 page = f2fs_pagecache_get_page(inode->i_mapping, 0,
1242 FGP_LOCK|FGP_NOWAIT, 0);
1224 if (!page) 1243 if (!page)
1225 goto iput_out; 1244 goto iput_out;
1226 1245
@@ -1244,37 +1263,6 @@ iput_out:
1244 iput(inode); 1263 iput(inode);
1245} 1264}
1246 1265
1247void move_node_page(struct page *node_page, int gc_type)
1248{
1249 if (gc_type == FG_GC) {
1250 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
1251 struct writeback_control wbc = {
1252 .sync_mode = WB_SYNC_ALL,
1253 .nr_to_write = 1,
1254 .for_reclaim = 0,
1255 };
1256
1257 set_page_dirty(node_page);
1258 f2fs_wait_on_page_writeback(node_page, NODE, true);
1259
1260 f2fs_bug_on(sbi, PageWriteback(node_page));
1261 if (!clear_page_dirty_for_io(node_page))
1262 goto out_page;
1263
1264 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
1265 unlock_page(node_page);
1266 goto release_page;
1267 } else {
1268 /* set page dirty and write it */
1269 if (!PageWriteback(node_page))
1270 set_page_dirty(node_page);
1271 }
1272out_page:
1273 unlock_page(node_page);
1274release_page:
1275 f2fs_put_page(node_page, 0);
1276}
1277
1278static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) 1266static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1279{ 1267{
1280 pgoff_t index; 1268 pgoff_t index;
@@ -1340,6 +1328,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1340 struct node_info ni; 1328 struct node_info ni;
1341 struct f2fs_io_info fio = { 1329 struct f2fs_io_info fio = {
1342 .sbi = sbi, 1330 .sbi = sbi,
1331 .ino = ino_of_node(page),
1343 .type = NODE, 1332 .type = NODE,
1344 .op = REQ_OP_WRITE, 1333 .op = REQ_OP_WRITE,
1345 .op_flags = wbc_to_write_flags(wbc), 1334 .op_flags = wbc_to_write_flags(wbc),
@@ -1412,6 +1401,37 @@ redirty_out:
1412 return AOP_WRITEPAGE_ACTIVATE; 1401 return AOP_WRITEPAGE_ACTIVATE;
1413} 1402}
1414 1403
1404void move_node_page(struct page *node_page, int gc_type)
1405{
1406 if (gc_type == FG_GC) {
1407 struct writeback_control wbc = {
1408 .sync_mode = WB_SYNC_ALL,
1409 .nr_to_write = 1,
1410 .for_reclaim = 0,
1411 };
1412
1413 set_page_dirty(node_page);
1414 f2fs_wait_on_page_writeback(node_page, NODE, true);
1415
1416 f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
1417 if (!clear_page_dirty_for_io(node_page))
1418 goto out_page;
1419
1420 if (__write_node_page(node_page, false, NULL,
1421 &wbc, false, FS_GC_NODE_IO))
1422 unlock_page(node_page);
1423 goto release_page;
1424 } else {
1425 /* set page dirty and write it */
1426 if (!PageWriteback(node_page))
1427 set_page_dirty(node_page);
1428 }
1429out_page:
1430 unlock_page(node_page);
1431release_page:
1432 f2fs_put_page(node_page, 0);
1433}
1434
1415static int f2fs_write_node_page(struct page *page, 1435static int f2fs_write_node_page(struct page *page,
1416 struct writeback_control *wbc) 1436 struct writeback_control *wbc)
1417{ 1437{
@@ -1742,35 +1762,54 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1742 return radix_tree_lookup(&nm_i->free_nid_root, n); 1762 return radix_tree_lookup(&nm_i->free_nid_root, n);
1743} 1763}
1744 1764
1745static int __insert_nid_to_list(struct f2fs_sb_info *sbi, 1765static int __insert_free_nid(struct f2fs_sb_info *sbi,
1746 struct free_nid *i, enum nid_list list, bool new) 1766 struct free_nid *i, enum nid_state state)
1747{ 1767{
1748 struct f2fs_nm_info *nm_i = NM_I(sbi); 1768 struct f2fs_nm_info *nm_i = NM_I(sbi);
1749 1769
1750 if (new) { 1770 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
1751 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); 1771 if (err)
1752 if (err) 1772 return err;
1753 return err;
1754 }
1755 1773
1756 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : 1774 f2fs_bug_on(sbi, state != i->state);
1757 i->state != NID_ALLOC); 1775 nm_i->nid_cnt[state]++;
1758 nm_i->nid_cnt[list]++; 1776 if (state == FREE_NID)
1759 list_add_tail(&i->list, &nm_i->nid_list[list]); 1777 list_add_tail(&i->list, &nm_i->free_nid_list);
1760 return 0; 1778 return 0;
1761} 1779}
1762 1780
1763static void __remove_nid_from_list(struct f2fs_sb_info *sbi, 1781static void __remove_free_nid(struct f2fs_sb_info *sbi,
1764 struct free_nid *i, enum nid_list list, bool reuse) 1782 struct free_nid *i, enum nid_state state)
1783{
1784 struct f2fs_nm_info *nm_i = NM_I(sbi);
1785
1786 f2fs_bug_on(sbi, state != i->state);
1787 nm_i->nid_cnt[state]--;
1788 if (state == FREE_NID)
1789 list_del(&i->list);
1790 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1791}
1792
1793static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
1794 enum nid_state org_state, enum nid_state dst_state)
1765{ 1795{
1766 struct f2fs_nm_info *nm_i = NM_I(sbi); 1796 struct f2fs_nm_info *nm_i = NM_I(sbi);
1767 1797
1768 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : 1798 f2fs_bug_on(sbi, org_state != i->state);
1769 i->state != NID_ALLOC); 1799 i->state = dst_state;
1770 nm_i->nid_cnt[list]--; 1800 nm_i->nid_cnt[org_state]--;
1771 list_del(&i->list); 1801 nm_i->nid_cnt[dst_state]++;
1772 if (!reuse) 1802
1773 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1803 switch (dst_state) {
1804 case PREALLOC_NID:
1805 list_del(&i->list);
1806 break;
1807 case FREE_NID:
1808 list_add_tail(&i->list, &nm_i->free_nid_list);
1809 break;
1810 default:
1811 BUG_ON(1);
1812 }
1774} 1813}
1775 1814
1776/* return if the nid is recognized as free */ 1815/* return if the nid is recognized as free */
@@ -1788,7 +1827,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1788 1827
1789 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); 1828 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1790 i->nid = nid; 1829 i->nid = nid;
1791 i->state = NID_NEW; 1830 i->state = FREE_NID;
1792 1831
1793 if (radix_tree_preload(GFP_NOFS)) 1832 if (radix_tree_preload(GFP_NOFS))
1794 goto err; 1833 goto err;
@@ -1801,7 +1840,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1801 * - f2fs_create 1840 * - f2fs_create
1802 * - f2fs_new_inode 1841 * - f2fs_new_inode
1803 * - alloc_nid 1842 * - alloc_nid
1804 * - __insert_nid_to_list(ALLOC_NID_LIST) 1843 * - __insert_nid_to_list(PREALLOC_NID)
1805 * - f2fs_balance_fs_bg 1844 * - f2fs_balance_fs_bg
1806 * - build_free_nids 1845 * - build_free_nids
1807 * - __build_free_nids 1846 * - __build_free_nids
@@ -1814,8 +1853,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1814 * - new_node_page 1853 * - new_node_page
1815 * - set_node_addr 1854 * - set_node_addr
1816 * - alloc_nid_done 1855 * - alloc_nid_done
1817 * - __remove_nid_from_list(ALLOC_NID_LIST) 1856 * - __remove_nid_from_list(PREALLOC_NID)
1818 * - __insert_nid_to_list(FREE_NID_LIST) 1857 * - __insert_nid_to_list(FREE_NID)
1819 */ 1858 */
1820 ne = __lookup_nat_cache(nm_i, nid); 1859 ne = __lookup_nat_cache(nm_i, nid);
1821 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1860 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
@@ -1824,13 +1863,13 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1824 1863
1825 e = __lookup_free_nid_list(nm_i, nid); 1864 e = __lookup_free_nid_list(nm_i, nid);
1826 if (e) { 1865 if (e) {
1827 if (e->state == NID_NEW) 1866 if (e->state == FREE_NID)
1828 ret = true; 1867 ret = true;
1829 goto err_out; 1868 goto err_out;
1830 } 1869 }
1831 } 1870 }
1832 ret = true; 1871 ret = true;
1833 err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); 1872 err = __insert_free_nid(sbi, i, FREE_NID);
1834err_out: 1873err_out:
1835 spin_unlock(&nm_i->nid_list_lock); 1874 spin_unlock(&nm_i->nid_list_lock);
1836 radix_tree_preload_end(); 1875 radix_tree_preload_end();
@@ -1848,8 +1887,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1848 1887
1849 spin_lock(&nm_i->nid_list_lock); 1888 spin_lock(&nm_i->nid_list_lock);
1850 i = __lookup_free_nid_list(nm_i, nid); 1889 i = __lookup_free_nid_list(nm_i, nid);
1851 if (i && i->state == NID_NEW) { 1890 if (i && i->state == FREE_NID) {
1852 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); 1891 __remove_free_nid(sbi, i, FREE_NID);
1853 need_free = true; 1892 need_free = true;
1854 } 1893 }
1855 spin_unlock(&nm_i->nid_list_lock); 1894 spin_unlock(&nm_i->nid_list_lock);
@@ -1868,15 +1907,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
1868 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) 1907 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1869 return; 1908 return;
1870 1909
1871 if (set) 1910 if (set) {
1911 if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
1912 return;
1872 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); 1913 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1873 else
1874 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1875
1876 if (set)
1877 nm_i->free_nid_count[nat_ofs]++; 1914 nm_i->free_nid_count[nat_ofs]++;
1878 else if (!build) 1915 } else {
1879 nm_i->free_nid_count[nat_ofs]--; 1916 if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
1917 return;
1918 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1919 if (!build)
1920 nm_i->free_nid_count[nat_ofs]--;
1921 }
1880} 1922}
1881 1923
1882static void scan_nat_page(struct f2fs_sb_info *sbi, 1924static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1911,12 +1953,32 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1911 } 1953 }
1912} 1954}
1913 1955
1914static void scan_free_nid_bits(struct f2fs_sb_info *sbi) 1956static void scan_curseg_cache(struct f2fs_sb_info *sbi)
1915{ 1957{
1916 struct f2fs_nm_info *nm_i = NM_I(sbi);
1917 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1958 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1918 struct f2fs_journal *journal = curseg->journal; 1959 struct f2fs_journal *journal = curseg->journal;
1960 int i;
1961
1962 down_read(&curseg->journal_rwsem);
1963 for (i = 0; i < nats_in_cursum(journal); i++) {
1964 block_t addr;
1965 nid_t nid;
1966
1967 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1968 nid = le32_to_cpu(nid_in_journal(journal, i));
1969 if (addr == NULL_ADDR)
1970 add_free_nid(sbi, nid, true);
1971 else
1972 remove_free_nid(sbi, nid);
1973 }
1974 up_read(&curseg->journal_rwsem);
1975}
1976
1977static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1978{
1979 struct f2fs_nm_info *nm_i = NM_I(sbi);
1919 unsigned int i, idx; 1980 unsigned int i, idx;
1981 nid_t nid;
1920 1982
1921 down_read(&nm_i->nat_tree_lock); 1983 down_read(&nm_i->nat_tree_lock);
1922 1984
@@ -1926,40 +1988,27 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1926 if (!nm_i->free_nid_count[i]) 1988 if (!nm_i->free_nid_count[i])
1927 continue; 1989 continue;
1928 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { 1990 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1929 nid_t nid; 1991 idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
1930 1992 NAT_ENTRY_PER_BLOCK, idx);
1931 if (!test_bit_le(idx, nm_i->free_nid_bitmap[i])) 1993 if (idx >= NAT_ENTRY_PER_BLOCK)
1932 continue; 1994 break;
1933 1995
1934 nid = i * NAT_ENTRY_PER_BLOCK + idx; 1996 nid = i * NAT_ENTRY_PER_BLOCK + idx;
1935 add_free_nid(sbi, nid, true); 1997 add_free_nid(sbi, nid, true);
1936 1998
1937 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) 1999 if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
1938 goto out; 2000 goto out;
1939 } 2001 }
1940 } 2002 }
1941out: 2003out:
1942 down_read(&curseg->journal_rwsem); 2004 scan_curseg_cache(sbi);
1943 for (i = 0; i < nats_in_cursum(journal); i++) {
1944 block_t addr;
1945 nid_t nid;
1946 2005
1947 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1948 nid = le32_to_cpu(nid_in_journal(journal, i));
1949 if (addr == NULL_ADDR)
1950 add_free_nid(sbi, nid, true);
1951 else
1952 remove_free_nid(sbi, nid);
1953 }
1954 up_read(&curseg->journal_rwsem);
1955 up_read(&nm_i->nat_tree_lock); 2006 up_read(&nm_i->nat_tree_lock);
1956} 2007}
1957 2008
1958static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) 2009static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1959{ 2010{
1960 struct f2fs_nm_info *nm_i = NM_I(sbi); 2011 struct f2fs_nm_info *nm_i = NM_I(sbi);
1961 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1962 struct f2fs_journal *journal = curseg->journal;
1963 int i = 0; 2012 int i = 0;
1964 nid_t nid = nm_i->next_scan_nid; 2013 nid_t nid = nm_i->next_scan_nid;
1965 2014
@@ -1967,7 +2016,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1967 nid = 0; 2016 nid = 0;
1968 2017
1969 /* Enough entries */ 2018 /* Enough entries */
1970 if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) 2019 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
1971 return; 2020 return;
1972 2021
1973 if (!sync && !available_free_memory(sbi, FREE_NIDS)) 2022 if (!sync && !available_free_memory(sbi, FREE_NIDS))
@@ -1977,7 +2026,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
1977 /* try to find free nids in free_nid_bitmap */ 2026 /* try to find free nids in free_nid_bitmap */
1978 scan_free_nid_bits(sbi); 2027 scan_free_nid_bits(sbi);
1979 2028
1980 if (nm_i->nid_cnt[FREE_NID_LIST]) 2029 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
1981 return; 2030 return;
1982 } 2031 }
1983 2032
@@ -2005,18 +2054,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2005 nm_i->next_scan_nid = nid; 2054 nm_i->next_scan_nid = nid;
2006 2055
2007 /* find free nids from current sum_pages */ 2056 /* find free nids from current sum_pages */
2008 down_read(&curseg->journal_rwsem); 2057 scan_curseg_cache(sbi);
2009 for (i = 0; i < nats_in_cursum(journal); i++) {
2010 block_t addr;
2011 2058
2012 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
2013 nid = le32_to_cpu(nid_in_journal(journal, i));
2014 if (addr == NULL_ADDR)
2015 add_free_nid(sbi, nid, true);
2016 else
2017 remove_free_nid(sbi, nid);
2018 }
2019 up_read(&curseg->journal_rwsem);
2020 up_read(&nm_i->nat_tree_lock); 2059 up_read(&nm_i->nat_tree_lock);
2021 2060
2022 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 2061 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -2054,15 +2093,13 @@ retry:
2054 } 2093 }
2055 2094
2056 /* We should not use stale free nids created by build_free_nids */ 2095 /* We should not use stale free nids created by build_free_nids */
2057 if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { 2096 if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
2058 f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); 2097 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
2059 i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], 2098 i = list_first_entry(&nm_i->free_nid_list,
2060 struct free_nid, list); 2099 struct free_nid, list);
2061 *nid = i->nid; 2100 *nid = i->nid;
2062 2101
2063 __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); 2102 __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
2064 i->state = NID_ALLOC;
2065 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
2066 nm_i->available_nids--; 2103 nm_i->available_nids--;
2067 2104
2068 update_free_nid_bitmap(sbi, *nid, false, false); 2105 update_free_nid_bitmap(sbi, *nid, false, false);
@@ -2088,7 +2125,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
2088 spin_lock(&nm_i->nid_list_lock); 2125 spin_lock(&nm_i->nid_list_lock);
2089 i = __lookup_free_nid_list(nm_i, nid); 2126 i = __lookup_free_nid_list(nm_i, nid);
2090 f2fs_bug_on(sbi, !i); 2127 f2fs_bug_on(sbi, !i);
2091 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); 2128 __remove_free_nid(sbi, i, PREALLOC_NID);
2092 spin_unlock(&nm_i->nid_list_lock); 2129 spin_unlock(&nm_i->nid_list_lock);
2093 2130
2094 kmem_cache_free(free_nid_slab, i); 2131 kmem_cache_free(free_nid_slab, i);
@@ -2111,12 +2148,10 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
2111 f2fs_bug_on(sbi, !i); 2148 f2fs_bug_on(sbi, !i);
2112 2149
2113 if (!available_free_memory(sbi, FREE_NIDS)) { 2150 if (!available_free_memory(sbi, FREE_NIDS)) {
2114 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); 2151 __remove_free_nid(sbi, i, PREALLOC_NID);
2115 need_free = true; 2152 need_free = true;
2116 } else { 2153 } else {
2117 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); 2154 __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
2118 i->state = NID_NEW;
2119 __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
2120 } 2155 }
2121 2156
2122 nm_i->available_nids++; 2157 nm_i->available_nids++;
@@ -2135,20 +2170,19 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
2135 struct free_nid *i, *next; 2170 struct free_nid *i, *next;
2136 int nr = nr_shrink; 2171 int nr = nr_shrink;
2137 2172
2138 if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) 2173 if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2139 return 0; 2174 return 0;
2140 2175
2141 if (!mutex_trylock(&nm_i->build_lock)) 2176 if (!mutex_trylock(&nm_i->build_lock))
2142 return 0; 2177 return 0;
2143 2178
2144 spin_lock(&nm_i->nid_list_lock); 2179 spin_lock(&nm_i->nid_list_lock);
2145 list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], 2180 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
2146 list) {
2147 if (nr_shrink <= 0 || 2181 if (nr_shrink <= 0 ||
2148 nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) 2182 nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2149 break; 2183 break;
2150 2184
2151 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); 2185 __remove_free_nid(sbi, i, FREE_NID);
2152 kmem_cache_free(free_nid_slab, i); 2186 kmem_cache_free(free_nid_slab, i);
2153 nr_shrink--; 2187 nr_shrink--;
2154 } 2188 }
@@ -2174,8 +2208,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
2174 goto update_inode; 2208 goto update_inode;
2175 } 2209 }
2176 2210
2177 dst_addr = inline_xattr_addr(ipage); 2211 dst_addr = inline_xattr_addr(inode, ipage);
2178 src_addr = inline_xattr_addr(page); 2212 src_addr = inline_xattr_addr(inode, page);
2179 inline_size = inline_xattr_size(inode); 2213 inline_size = inline_xattr_size(inode);
2180 2214
2181 f2fs_wait_on_page_writeback(ipage, NODE, true); 2215 f2fs_wait_on_page_writeback(ipage, NODE, true);
@@ -2264,6 +2298,12 @@ retry:
2264 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); 2298 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
2265 if (dst->i_inline & F2FS_EXTRA_ATTR) { 2299 if (dst->i_inline & F2FS_EXTRA_ATTR) {
2266 dst->i_extra_isize = src->i_extra_isize; 2300 dst->i_extra_isize = src->i_extra_isize;
2301
2302 if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
2303 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2304 i_inline_xattr_size))
2305 dst->i_inline_xattr_size = src->i_inline_xattr_size;
2306
2267 if (f2fs_sb_has_project_quota(sbi->sb) && 2307 if (f2fs_sb_has_project_quota(sbi->sb) &&
2268 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), 2308 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2269 i_projid)) 2309 i_projid))
@@ -2335,8 +2375,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2335 2375
2336 ne = __lookup_nat_cache(nm_i, nid); 2376 ne = __lookup_nat_cache(nm_i, nid);
2337 if (!ne) { 2377 if (!ne) {
2338 ne = grab_nat_entry(nm_i, nid, true); 2378 ne = __alloc_nat_entry(nid, true);
2339 node_info_from_raw_nat(&ne->ni, &raw_ne); 2379 __init_nat_entry(nm_i, ne, &raw_ne, true);
2340 } 2380 }
2341 2381
2342 /* 2382 /*
@@ -2382,15 +2422,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2382 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; 2422 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2383 struct f2fs_nat_block *nat_blk = page_address(page); 2423 struct f2fs_nat_block *nat_blk = page_address(page);
2384 int valid = 0; 2424 int valid = 0;
2385 int i; 2425 int i = 0;
2386 2426
2387 if (!enabled_nat_bits(sbi, NULL)) 2427 if (!enabled_nat_bits(sbi, NULL))
2388 return; 2428 return;
2389 2429
2390 for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) { 2430 if (nat_index == 0) {
2391 if (start_nid == 0 && i == 0) 2431 valid = 1;
2392 valid++; 2432 i = 1;
2393 if (nat_blk->entries[i].block_addr) 2433 }
2434 for (; i < NAT_ENTRY_PER_BLOCK; i++) {
2435 if (nat_blk->entries[i].block_addr != NULL_ADDR)
2394 valid++; 2436 valid++;
2395 } 2437 }
2396 if (valid == 0) { 2438 if (valid == 0) {
@@ -2585,7 +2627,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
2585 __set_bit_le(i, nm_i->nat_block_bitmap); 2627 __set_bit_le(i, nm_i->nat_block_bitmap);
2586 2628
2587 nid = i * NAT_ENTRY_PER_BLOCK; 2629 nid = i * NAT_ENTRY_PER_BLOCK;
2588 last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; 2630 last_nid = nid + NAT_ENTRY_PER_BLOCK;
2589 2631
2590 spin_lock(&NM_I(sbi)->nid_list_lock); 2632 spin_lock(&NM_I(sbi)->nid_list_lock);
2591 for (; nid < last_nid; nid++) 2633 for (; nid < last_nid; nid++)
@@ -2620,16 +2662,15 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2620 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2662 /* not used nids: 0, node, meta, (and root counted as valid node) */
2621 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - 2663 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
2622 F2FS_RESERVED_NODE_NUM; 2664 F2FS_RESERVED_NODE_NUM;
2623 nm_i->nid_cnt[FREE_NID_LIST] = 0; 2665 nm_i->nid_cnt[FREE_NID] = 0;
2624 nm_i->nid_cnt[ALLOC_NID_LIST] = 0; 2666 nm_i->nid_cnt[PREALLOC_NID] = 0;
2625 nm_i->nat_cnt = 0; 2667 nm_i->nat_cnt = 0;
2626 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2668 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
2627 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2669 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
2628 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 2670 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
2629 2671
2630 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2672 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
2631 INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); 2673 INIT_LIST_HEAD(&nm_i->free_nid_list);
2632 INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
2633 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 2674 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
2634 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 2675 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
2635 INIT_LIST_HEAD(&nm_i->nat_entries); 2676 INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -2721,16 +2762,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2721 2762
2722 /* destroy free nid list */ 2763 /* destroy free nid list */
2723 spin_lock(&nm_i->nid_list_lock); 2764 spin_lock(&nm_i->nid_list_lock);
2724 list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], 2765 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
2725 list) { 2766 __remove_free_nid(sbi, i, FREE_NID);
2726 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2727 spin_unlock(&nm_i->nid_list_lock); 2767 spin_unlock(&nm_i->nid_list_lock);
2728 kmem_cache_free(free_nid_slab, i); 2768 kmem_cache_free(free_nid_slab, i);
2729 spin_lock(&nm_i->nid_list_lock); 2769 spin_lock(&nm_i->nid_list_lock);
2730 } 2770 }
2731 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); 2771 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
2732 f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); 2772 f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
2733 f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); 2773 f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
2734 spin_unlock(&nm_i->nid_list_lock); 2774 spin_unlock(&nm_i->nid_list_lock);
2735 2775
2736 /* destroy nat cache */ 2776 /* destroy nat cache */
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index bb53e9955ff2..0ee3e5ff49a3 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -140,6 +140,7 @@ enum mem_type {
140 DIRTY_DENTS, /* indicates dirty dentry pages */ 140 DIRTY_DENTS, /* indicates dirty dentry pages */
141 INO_ENTRIES, /* indicates inode entries */ 141 INO_ENTRIES, /* indicates inode entries */
142 EXTENT_CACHE, /* indicates extent cache */ 142 EXTENT_CACHE, /* indicates extent cache */
143 INMEM_PAGES, /* indicates inmemory pages */
143 BASE_CHECK, /* check kernel status */ 144 BASE_CHECK, /* check kernel status */
144}; 145};
145 146
@@ -150,18 +151,10 @@ struct nat_entry_set {
150 unsigned int entry_cnt; /* the # of nat entries in set */ 151 unsigned int entry_cnt; /* the # of nat entries in set */
151}; 152};
152 153
153/*
154 * For free nid mangement
155 */
156enum nid_state {
157 NID_NEW, /* newly added to free nid list */
158 NID_ALLOC /* it is allocated */
159};
160
161struct free_nid { 154struct free_nid {
162 struct list_head list; /* for free node id list */ 155 struct list_head list; /* for free node id list */
163 nid_t nid; /* node id */ 156 nid_t nid; /* node id */
164 int state; /* in use or not: NID_NEW or NID_ALLOC */ 157 int state; /* in use or not: FREE_NID or PREALLOC_NID */
165}; 158};
166 159
167static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) 160static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
@@ -170,12 +163,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
170 struct free_nid *fnid; 163 struct free_nid *fnid;
171 164
172 spin_lock(&nm_i->nid_list_lock); 165 spin_lock(&nm_i->nid_list_lock);
173 if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) { 166 if (nm_i->nid_cnt[FREE_NID] <= 0) {
174 spin_unlock(&nm_i->nid_list_lock); 167 spin_unlock(&nm_i->nid_list_lock);
175 return; 168 return;
176 } 169 }
177 fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], 170 fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list);
178 struct free_nid, list);
179 *nid = fnid->nid; 171 *nid = fnid->nid;
180 spin_unlock(&nm_i->nid_list_lock); 172 spin_unlock(&nm_i->nid_list_lock);
181} 173}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9626758bc762..92c57ace1939 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
594 int ret = 0; 594 int ret = 0;
595 unsigned long s_flags = sbi->sb->s_flags; 595 unsigned long s_flags = sbi->sb->s_flags;
596 bool need_writecp = false; 596 bool need_writecp = false;
597#ifdef CONFIG_QUOTA
598 int quota_enabled;
599#endif
597 600
598 if (s_flags & MS_RDONLY) { 601 if (s_flags & MS_RDONLY) {
599 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); 602 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
@@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
604 /* Needed for iput() to work correctly and not trash data */ 607 /* Needed for iput() to work correctly and not trash data */
605 sbi->sb->s_flags |= MS_ACTIVE; 608 sbi->sb->s_flags |= MS_ACTIVE;
606 /* Turn on quotas so that they are updated correctly */ 609 /* Turn on quotas so that they are updated correctly */
607 f2fs_enable_quota_files(sbi); 610 quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
608#endif 611#endif
609 612
610 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 613 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -665,7 +668,8 @@ skip:
665out: 668out:
666#ifdef CONFIG_QUOTA 669#ifdef CONFIG_QUOTA
667 /* Turn quotas off */ 670 /* Turn quotas off */
668 f2fs_quota_off_umount(sbi->sb); 671 if (quota_enabled)
672 f2fs_quota_off_umount(sbi->sb);
669#endif 673#endif
670 sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 674 sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
671 675
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c695ff462ee6..c117e0913f2a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
181 return true; 181 return true;
182 182
183 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + 183 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
184 2 * reserved_sections(sbi)); 184 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
185} 185}
186 186
187void register_inmem_page(struct inode *inode, struct page *page) 187void register_inmem_page(struct inode *inode, struct page *page)
188{ 188{
189 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
189 struct f2fs_inode_info *fi = F2FS_I(inode); 190 struct f2fs_inode_info *fi = F2FS_I(inode);
190 struct inmem_pages *new; 191 struct inmem_pages *new;
191 192
@@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page)
204 mutex_lock(&fi->inmem_lock); 205 mutex_lock(&fi->inmem_lock);
205 get_page(page); 206 get_page(page);
206 list_add_tail(&new->list, &fi->inmem_pages); 207 list_add_tail(&new->list, &fi->inmem_pages);
208 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
209 if (list_empty(&fi->inmem_ilist))
210 list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
211 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
207 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 212 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
208 mutex_unlock(&fi->inmem_lock); 213 mutex_unlock(&fi->inmem_lock);
209 214
@@ -262,12 +267,41 @@ next:
262 return err; 267 return err;
263} 268}
264 269
270void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
271{
272 struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
273 struct inode *inode;
274 struct f2fs_inode_info *fi;
275next:
276 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
277 if (list_empty(head)) {
278 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
279 return;
280 }
281 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
282 inode = igrab(&fi->vfs_inode);
283 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
284
285 if (inode) {
286 drop_inmem_pages(inode);
287 iput(inode);
288 }
289 congestion_wait(BLK_RW_ASYNC, HZ/50);
290 cond_resched();
291 goto next;
292}
293
265void drop_inmem_pages(struct inode *inode) 294void drop_inmem_pages(struct inode *inode)
266{ 295{
296 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
267 struct f2fs_inode_info *fi = F2FS_I(inode); 297 struct f2fs_inode_info *fi = F2FS_I(inode);
268 298
269 mutex_lock(&fi->inmem_lock); 299 mutex_lock(&fi->inmem_lock);
270 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 300 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
301 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
302 if (!list_empty(&fi->inmem_ilist))
303 list_del_init(&fi->inmem_ilist);
304 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
271 mutex_unlock(&fi->inmem_lock); 305 mutex_unlock(&fi->inmem_lock);
272 306
273 clear_inode_flag(inode, FI_ATOMIC_FILE); 307 clear_inode_flag(inode, FI_ATOMIC_FILE);
@@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode,
313 struct inmem_pages *cur, *tmp; 347 struct inmem_pages *cur, *tmp;
314 struct f2fs_io_info fio = { 348 struct f2fs_io_info fio = {
315 .sbi = sbi, 349 .sbi = sbi,
350 .ino = inode->i_ino,
316 .type = DATA, 351 .type = DATA,
317 .op = REQ_OP_WRITE, 352 .op = REQ_OP_WRITE,
318 .op_flags = REQ_SYNC | REQ_PRIO, 353 .op_flags = REQ_SYNC | REQ_PRIO,
@@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode)
398 /* drop all uncommitted pages */ 433 /* drop all uncommitted pages */
399 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 434 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
400 } 435 }
436 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
437 if (!list_empty(&fi->inmem_ilist))
438 list_del_init(&fi->inmem_ilist);
439 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
401 mutex_unlock(&fi->inmem_lock); 440 mutex_unlock(&fi->inmem_lock);
402 441
403 clear_inode_flag(inode, FI_ATOMIC_COMMIT); 442 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
472static int __submit_flush_wait(struct f2fs_sb_info *sbi, 511static int __submit_flush_wait(struct f2fs_sb_info *sbi,
473 struct block_device *bdev) 512 struct block_device *bdev)
474{ 513{
475 struct bio *bio = f2fs_bio_alloc(0); 514 struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
476 int ret; 515 int ret;
477 516
478 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; 517 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
@@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
485 return ret; 524 return ret;
486} 525}
487 526
488static int submit_flush_wait(struct f2fs_sb_info *sbi) 527static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
489{ 528{
490 int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); 529 int ret = 0;
491 int i; 530 int i;
492 531
493 if (!sbi->s_ndevs || ret) 532 if (!sbi->s_ndevs)
494 return ret; 533 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
495 534
496 for (i = 1; i < sbi->s_ndevs; i++) { 535 for (i = 0; i < sbi->s_ndevs; i++) {
536 if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
537 continue;
497 ret = __submit_flush_wait(sbi, FDEV(i).bdev); 538 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
498 if (ret) 539 if (ret)
499 break; 540 break;
@@ -519,7 +560,9 @@ repeat:
519 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 560 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
520 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 561 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
521 562
522 ret = submit_flush_wait(sbi); 563 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
564
565 ret = submit_flush_wait(sbi, cmd->ino);
523 atomic_inc(&fcc->issued_flush); 566 atomic_inc(&fcc->issued_flush);
524 567
525 llist_for_each_entry_safe(cmd, next, 568 llist_for_each_entry_safe(cmd, next,
@@ -537,7 +580,7 @@ repeat:
537 goto repeat; 580 goto repeat;
538} 581}
539 582
540int f2fs_issue_flush(struct f2fs_sb_info *sbi) 583int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
541{ 584{
542 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; 585 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
543 struct flush_cmd cmd; 586 struct flush_cmd cmd;
@@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
547 return 0; 590 return 0;
548 591
549 if (!test_opt(sbi, FLUSH_MERGE)) { 592 if (!test_opt(sbi, FLUSH_MERGE)) {
550 ret = submit_flush_wait(sbi); 593 ret = submit_flush_wait(sbi, ino);
551 atomic_inc(&fcc->issued_flush); 594 atomic_inc(&fcc->issued_flush);
552 return ret; 595 return ret;
553 } 596 }
554 597
555 if (atomic_inc_return(&fcc->issing_flush) == 1) { 598 if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
556 ret = submit_flush_wait(sbi); 599 ret = submit_flush_wait(sbi, ino);
557 atomic_dec(&fcc->issing_flush); 600 atomic_dec(&fcc->issing_flush);
558 601
559 atomic_inc(&fcc->issued_flush); 602 atomic_inc(&fcc->issued_flush);
560 return ret; 603 return ret;
561 } 604 }
562 605
606 cmd.ino = ino;
563 init_completion(&cmd.wait); 607 init_completion(&cmd.wait);
564 608
565 llist_add(&cmd.llnode, &fcc->issue_list); 609 llist_add(&cmd.llnode, &fcc->issue_list);
@@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
583 } else { 627 } else {
584 struct flush_cmd *tmp, *next; 628 struct flush_cmd *tmp, *next;
585 629
586 ret = submit_flush_wait(sbi); 630 ret = submit_flush_wait(sbi, ino);
587 631
588 llist_for_each_entry_safe(tmp, next, list, llnode) { 632 llist_for_each_entry_safe(tmp, next, list, llnode) {
589 if (tmp == &cmd) { 633 if (tmp == &cmd) {
@@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
653 } 697 }
654} 698}
655 699
700int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
701{
702 int ret = 0, i;
703
704 if (!sbi->s_ndevs)
705 return 0;
706
707 for (i = 1; i < sbi->s_ndevs; i++) {
708 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
709 continue;
710 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
711 if (ret)
712 break;
713
714 spin_lock(&sbi->dev_lock);
715 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
716 spin_unlock(&sbi->dev_lock);
717 }
718
719 return ret;
720}
721
656static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 722static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
657 enum dirty_type dirty_type) 723 enum dirty_type dirty_type)
658{ 724{
@@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
794{ 860{
795 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 861 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
796 862
863 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
864
797 f2fs_bug_on(sbi, dc->ref); 865 f2fs_bug_on(sbi, dc->ref);
798 866
799 if (dc->error == -EOPNOTSUPP) 867 if (dc->error == -EOPNOTSUPP)
@@ -845,10 +913,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi,
845 913
846/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 914/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
847static void __submit_discard_cmd(struct f2fs_sb_info *sbi, 915static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
848 struct discard_cmd *dc) 916 struct discard_policy *dpolicy,
917 struct discard_cmd *dc)
849{ 918{
850 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 919 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
920 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
921 &(dcc->fstrim_list) : &(dcc->wait_list);
851 struct bio *bio = NULL; 922 struct bio *bio = NULL;
923 int flag = dpolicy->sync ? REQ_SYNC : 0;
852 924
853 if (dc->state != D_PREP) 925 if (dc->state != D_PREP)
854 return; 926 return;
@@ -867,9 +939,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
867 if (bio) { 939 if (bio) {
868 bio->bi_private = dc; 940 bio->bi_private = dc;
869 bio->bi_end_io = f2fs_submit_discard_endio; 941 bio->bi_end_io = f2fs_submit_discard_endio;
870 bio->bi_opf |= REQ_SYNC; 942 bio->bi_opf |= flag;
871 submit_bio(bio); 943 submit_bio(bio);
872 list_move_tail(&dc->list, &dcc->wait_list); 944 list_move_tail(&dc->list, wait_list);
873 __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); 945 __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
874 946
875 f2fs_update_iostat(sbi, FS_DISCARD, 1); 947 f2fs_update_iostat(sbi, FS_DISCARD, 1);
@@ -886,7 +958,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
886 struct rb_node *insert_parent) 958 struct rb_node *insert_parent)
887{ 959{
888 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 960 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
889 struct rb_node **p = &dcc->root.rb_node; 961 struct rb_node **p;
890 struct rb_node *parent = NULL; 962 struct rb_node *parent = NULL;
891 struct discard_cmd *dc = NULL; 963 struct discard_cmd *dc = NULL;
892 964
@@ -1054,58 +1126,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1054 return 0; 1126 return 0;
1055} 1127}
1056 1128
1057static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) 1129static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
1130 struct discard_policy *dpolicy,
1131 unsigned int start, unsigned int end)
1132{
1133 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1134 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1135 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1136 struct discard_cmd *dc;
1137 struct blk_plug plug;
1138 int issued;
1139
1140next:
1141 issued = 0;
1142
1143 mutex_lock(&dcc->cmd_lock);
1144 f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
1145
1146 dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
1147 NULL, start,
1148 (struct rb_entry **)&prev_dc,
1149 (struct rb_entry **)&next_dc,
1150 &insert_p, &insert_parent, true);
1151 if (!dc)
1152 dc = next_dc;
1153
1154 blk_start_plug(&plug);
1155
1156 while (dc && dc->lstart <= end) {
1157 struct rb_node *node;
1158
1159 if (dc->len < dpolicy->granularity)
1160 goto skip;
1161
1162 if (dc->state != D_PREP) {
1163 list_move_tail(&dc->list, &dcc->fstrim_list);
1164 goto skip;
1165 }
1166
1167 __submit_discard_cmd(sbi, dpolicy, dc);
1168
1169 if (++issued >= dpolicy->max_requests) {
1170 start = dc->lstart + dc->len;
1171
1172 blk_finish_plug(&plug);
1173 mutex_unlock(&dcc->cmd_lock);
1174
1175 schedule();
1176
1177 goto next;
1178 }
1179skip:
1180 node = rb_next(&dc->rb_node);
1181 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1182
1183 if (fatal_signal_pending(current))
1184 break;
1185 }
1186
1187 blk_finish_plug(&plug);
1188 mutex_unlock(&dcc->cmd_lock);
1189}
1190
1191static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1192 struct discard_policy *dpolicy)
1058{ 1193{
1059 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1194 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1060 struct list_head *pend_list; 1195 struct list_head *pend_list;
1061 struct discard_cmd *dc, *tmp; 1196 struct discard_cmd *dc, *tmp;
1062 struct blk_plug plug; 1197 struct blk_plug plug;
1063 int iter = 0, issued = 0; 1198 int i, iter = 0, issued = 0;
1064 int i;
1065 bool io_interrupted = false; 1199 bool io_interrupted = false;
1066 1200
1067 mutex_lock(&dcc->cmd_lock); 1201 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1068 f2fs_bug_on(sbi, 1202 if (i + 1 < dpolicy->granularity)
1069 !__check_rb_tree_consistence(sbi, &dcc->root)); 1203 break;
1070 blk_start_plug(&plug);
1071 for (i = MAX_PLIST_NUM - 1;
1072 i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
1073 pend_list = &dcc->pend_list[i]; 1204 pend_list = &dcc->pend_list[i];
1205
1206 mutex_lock(&dcc->cmd_lock);
1207 f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
1208 blk_start_plug(&plug);
1074 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1209 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1075 f2fs_bug_on(sbi, dc->state != D_PREP); 1210 f2fs_bug_on(sbi, dc->state != D_PREP);
1076 1211
1077 /* Hurry up to finish fstrim */ 1212 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1078 if (dcc->pend_list_tag[i] & P_TRIM) { 1213 !is_idle(sbi)) {
1079 __submit_discard_cmd(sbi, dc);
1080 issued++;
1081
1082 if (fatal_signal_pending(current))
1083 break;
1084 continue;
1085 }
1086
1087 if (!issue_cond) {
1088 __submit_discard_cmd(sbi, dc);
1089 issued++;
1090 continue;
1091 }
1092
1093 if (is_idle(sbi)) {
1094 __submit_discard_cmd(sbi, dc);
1095 issued++;
1096 } else {
1097 io_interrupted = true; 1214 io_interrupted = true;
1215 goto skip;
1098 } 1216 }
1099 1217
1100 if (++iter >= DISCARD_ISSUE_RATE) 1218 __submit_discard_cmd(sbi, dpolicy, dc);
1101 goto out; 1219 issued++;
1220skip:
1221 if (++iter >= dpolicy->max_requests)
1222 break;
1102 } 1223 }
1103 if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) 1224 blk_finish_plug(&plug);
1104 dcc->pend_list_tag[i] &= (~P_TRIM); 1225 mutex_unlock(&dcc->cmd_lock);
1226
1227 if (iter >= dpolicy->max_requests)
1228 break;
1105 } 1229 }
1106out:
1107 blk_finish_plug(&plug);
1108 mutex_unlock(&dcc->cmd_lock);
1109 1230
1110 if (!issued && io_interrupted) 1231 if (!issued && io_interrupted)
1111 issued = -1; 1232 issued = -1;
@@ -1113,12 +1234,13 @@ out:
1113 return issued; 1234 return issued;
1114} 1235}
1115 1236
1116static void __drop_discard_cmd(struct f2fs_sb_info *sbi) 1237static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1117{ 1238{
1118 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1239 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1119 struct list_head *pend_list; 1240 struct list_head *pend_list;
1120 struct discard_cmd *dc, *tmp; 1241 struct discard_cmd *dc, *tmp;
1121 int i; 1242 int i;
1243 bool dropped = false;
1122 1244
1123 mutex_lock(&dcc->cmd_lock); 1245 mutex_lock(&dcc->cmd_lock);
1124 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { 1246 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
@@ -1126,39 +1248,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
1126 list_for_each_entry_safe(dc, tmp, pend_list, list) { 1248 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1127 f2fs_bug_on(sbi, dc->state != D_PREP); 1249 f2fs_bug_on(sbi, dc->state != D_PREP);
1128 __remove_discard_cmd(sbi, dc); 1250 __remove_discard_cmd(sbi, dc);
1251 dropped = true;
1129 } 1252 }
1130 } 1253 }
1131 mutex_unlock(&dcc->cmd_lock); 1254 mutex_unlock(&dcc->cmd_lock);
1255
1256 return dropped;
1132} 1257}
1133 1258
1134static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, 1259static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1135 struct discard_cmd *dc) 1260 struct discard_cmd *dc)
1136{ 1261{
1137 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1262 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1263 unsigned int len = 0;
1138 1264
1139 wait_for_completion_io(&dc->wait); 1265 wait_for_completion_io(&dc->wait);
1140 mutex_lock(&dcc->cmd_lock); 1266 mutex_lock(&dcc->cmd_lock);
1141 f2fs_bug_on(sbi, dc->state != D_DONE); 1267 f2fs_bug_on(sbi, dc->state != D_DONE);
1142 dc->ref--; 1268 dc->ref--;
1143 if (!dc->ref) 1269 if (!dc->ref) {
1270 if (!dc->error)
1271 len = dc->len;
1144 __remove_discard_cmd(sbi, dc); 1272 __remove_discard_cmd(sbi, dc);
1273 }
1145 mutex_unlock(&dcc->cmd_lock); 1274 mutex_unlock(&dcc->cmd_lock);
1275
1276 return len;
1146} 1277}
1147 1278
1148static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) 1279static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1280 struct discard_policy *dpolicy,
1281 block_t start, block_t end)
1149{ 1282{
1150 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1283 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1151 struct list_head *wait_list = &(dcc->wait_list); 1284 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1285 &(dcc->fstrim_list) : &(dcc->wait_list);
1152 struct discard_cmd *dc, *tmp; 1286 struct discard_cmd *dc, *tmp;
1153 bool need_wait; 1287 bool need_wait;
1288 unsigned int trimmed = 0;
1154 1289
1155next: 1290next:
1156 need_wait = false; 1291 need_wait = false;
1157 1292
1158 mutex_lock(&dcc->cmd_lock); 1293 mutex_lock(&dcc->cmd_lock);
1159 list_for_each_entry_safe(dc, tmp, wait_list, list) { 1294 list_for_each_entry_safe(dc, tmp, wait_list, list) {
1160 if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { 1295 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1296 continue;
1297 if (dc->len < dpolicy->granularity)
1298 continue;
1299 if (dc->state == D_DONE && !dc->ref) {
1161 wait_for_completion_io(&dc->wait); 1300 wait_for_completion_io(&dc->wait);
1301 if (!dc->error)
1302 trimmed += dc->len;
1162 __remove_discard_cmd(sbi, dc); 1303 __remove_discard_cmd(sbi, dc);
1163 } else { 1304 } else {
1164 dc->ref++; 1305 dc->ref++;
@@ -1169,9 +1310,17 @@ next:
1169 mutex_unlock(&dcc->cmd_lock); 1310 mutex_unlock(&dcc->cmd_lock);
1170 1311
1171 if (need_wait) { 1312 if (need_wait) {
1172 __wait_one_discard_bio(sbi, dc); 1313 trimmed += __wait_one_discard_bio(sbi, dc);
1173 goto next; 1314 goto next;
1174 } 1315 }
1316
1317 return trimmed;
1318}
1319
1320static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1321 struct discard_policy *dpolicy)
1322{
1323 __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1175} 1324}
1176 1325
1177/* This should be covered by global mutex, &sit_i->sentry_lock */ 1326/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1209,23 +1358,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
1209 } 1358 }
1210} 1359}
1211 1360
1212/* This comes from f2fs_put_super and f2fs_trim_fs */ 1361/* This comes from f2fs_put_super */
1213void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) 1362bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1214{
1215 __issue_discard_cmd(sbi, false);
1216 __drop_discard_cmd(sbi);
1217 __wait_discard_cmd(sbi, !umount);
1218}
1219
1220static void mark_discard_range_all(struct f2fs_sb_info *sbi)
1221{ 1363{
1222 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1364 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1223 int i; 1365 struct discard_policy dpolicy;
1366 bool dropped;
1224 1367
1225 mutex_lock(&dcc->cmd_lock); 1368 init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
1226 for (i = 0; i < MAX_PLIST_NUM; i++) 1369 __issue_discard_cmd(sbi, &dpolicy);
1227 dcc->pend_list_tag[i] |= P_TRIM; 1370 dropped = __drop_discard_cmd(sbi);
1228 mutex_unlock(&dcc->cmd_lock); 1371 __wait_all_discard_cmd(sbi, &dpolicy);
1372
1373 return dropped;
1229} 1374}
1230 1375
1231static int issue_discard_thread(void *data) 1376static int issue_discard_thread(void *data)
@@ -1233,12 +1378,16 @@ static int issue_discard_thread(void *data)
1233 struct f2fs_sb_info *sbi = data; 1378 struct f2fs_sb_info *sbi = data;
1234 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; 1379 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1235 wait_queue_head_t *q = &dcc->discard_wait_queue; 1380 wait_queue_head_t *q = &dcc->discard_wait_queue;
1381 struct discard_policy dpolicy;
1236 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; 1382 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1237 int issued; 1383 int issued;
1238 1384
1239 set_freezable(); 1385 set_freezable();
1240 1386
1241 do { 1387 do {
1388 init_discard_policy(&dpolicy, DPOLICY_BG,
1389 dcc->discard_granularity);
1390
1242 wait_event_interruptible_timeout(*q, 1391 wait_event_interruptible_timeout(*q,
1243 kthread_should_stop() || freezing(current) || 1392 kthread_should_stop() || freezing(current) ||
1244 dcc->discard_wake, 1393 dcc->discard_wake,
@@ -1251,17 +1400,18 @@ static int issue_discard_thread(void *data)
1251 if (dcc->discard_wake) { 1400 if (dcc->discard_wake) {
1252 dcc->discard_wake = 0; 1401 dcc->discard_wake = 0;
1253 if (sbi->gc_thread && sbi->gc_thread->gc_urgent) 1402 if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
1254 mark_discard_range_all(sbi); 1403 init_discard_policy(&dpolicy,
1404 DPOLICY_FORCE, 1);
1255 } 1405 }
1256 1406
1257 sb_start_intwrite(sbi->sb); 1407 sb_start_intwrite(sbi->sb);
1258 1408
1259 issued = __issue_discard_cmd(sbi, true); 1409 issued = __issue_discard_cmd(sbi, &dpolicy);
1260 if (issued) { 1410 if (issued) {
1261 __wait_discard_cmd(sbi, true); 1411 __wait_all_discard_cmd(sbi, &dpolicy);
1262 wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; 1412 wait_ms = dpolicy.min_interval;
1263 } else { 1413 } else {
1264 wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; 1414 wait_ms = dpolicy.max_interval;
1265 } 1415 }
1266 1416
1267 sb_end_intwrite(sbi->sb); 1417 sb_end_intwrite(sbi->sb);
@@ -1525,7 +1675,6 @@ find_next:
1525 1675
1526 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, 1676 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
1527 len); 1677 len);
1528 cpc->trimmed += len;
1529 total_len += len; 1678 total_len += len;
1530 } else { 1679 } else {
1531 next_pos = find_next_bit_le(entry->discard_map, 1680 next_pos = find_next_bit_le(entry->discard_map,
@@ -1546,6 +1695,37 @@ skip:
1546 wake_up_discard_thread(sbi, false); 1695 wake_up_discard_thread(sbi, false);
1547} 1696}
1548 1697
1698void init_discard_policy(struct discard_policy *dpolicy,
1699 int discard_type, unsigned int granularity)
1700{
1701 /* common policy */
1702 dpolicy->type = discard_type;
1703 dpolicy->sync = true;
1704 dpolicy->granularity = granularity;
1705
1706 if (discard_type == DPOLICY_BG) {
1707 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1708 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1709 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1710 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1711 dpolicy->io_aware = true;
1712 } else if (discard_type == DPOLICY_FORCE) {
1713 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1714 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1715 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1716 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1717 dpolicy->io_aware = true;
1718 } else if (discard_type == DPOLICY_FSTRIM) {
1719 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1720 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1721 dpolicy->io_aware = false;
1722 } else if (discard_type == DPOLICY_UMOUNT) {
1723 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1724 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1725 dpolicy->io_aware = false;
1726 }
1727}
1728
1549static int create_discard_cmd_control(struct f2fs_sb_info *sbi) 1729static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1550{ 1730{
1551 dev_t dev = sbi->sb->s_bdev->bd_dev; 1731 dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -1563,12 +1743,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1563 1743
1564 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; 1744 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1565 INIT_LIST_HEAD(&dcc->entry_list); 1745 INIT_LIST_HEAD(&dcc->entry_list);
1566 for (i = 0; i < MAX_PLIST_NUM; i++) { 1746 for (i = 0; i < MAX_PLIST_NUM; i++)
1567 INIT_LIST_HEAD(&dcc->pend_list[i]); 1747 INIT_LIST_HEAD(&dcc->pend_list[i]);
1568 if (i >= dcc->discard_granularity - 1)
1569 dcc->pend_list_tag[i] |= P_ACTIVE;
1570 }
1571 INIT_LIST_HEAD(&dcc->wait_list); 1748 INIT_LIST_HEAD(&dcc->wait_list);
1749 INIT_LIST_HEAD(&dcc->fstrim_list);
1572 mutex_init(&dcc->cmd_lock); 1750 mutex_init(&dcc->cmd_lock);
1573 atomic_set(&dcc->issued_discard, 0); 1751 atomic_set(&dcc->issued_discard, 0);
1574 atomic_set(&dcc->issing_discard, 0); 1752 atomic_set(&dcc->issing_discard, 0);
@@ -1716,16 +1894,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
1716 get_sec_entry(sbi, segno)->valid_blocks += del; 1894 get_sec_entry(sbi, segno)->valid_blocks += del;
1717} 1895}
1718 1896
1719void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
1720{
1721 update_sit_entry(sbi, new, 1);
1722 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
1723 update_sit_entry(sbi, old, -1);
1724
1725 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
1726 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
1727}
1728
1729void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 1897void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
1730{ 1898{
1731 unsigned int segno = GET_SEGNO(sbi, addr); 1899 unsigned int segno = GET_SEGNO(sbi, addr);
@@ -1736,14 +1904,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
1736 return; 1904 return;
1737 1905
1738 /* add it into sit main buffer */ 1906 /* add it into sit main buffer */
1739 mutex_lock(&sit_i->sentry_lock); 1907 down_write(&sit_i->sentry_lock);
1740 1908
1741 update_sit_entry(sbi, addr, -1); 1909 update_sit_entry(sbi, addr, -1);
1742 1910
1743 /* add it into dirty seglist */ 1911 /* add it into dirty seglist */
1744 locate_dirty_segment(sbi, segno); 1912 locate_dirty_segment(sbi, segno);
1745 1913
1746 mutex_unlock(&sit_i->sentry_lock); 1914 up_write(&sit_i->sentry_lock);
1747} 1915}
1748 1916
1749bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) 1917bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
@@ -1756,7 +1924,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
1756 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) 1924 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
1757 return true; 1925 return true;
1758 1926
1759 mutex_lock(&sit_i->sentry_lock); 1927 down_read(&sit_i->sentry_lock);
1760 1928
1761 segno = GET_SEGNO(sbi, blkaddr); 1929 segno = GET_SEGNO(sbi, blkaddr);
1762 se = get_seg_entry(sbi, segno); 1930 se = get_seg_entry(sbi, segno);
@@ -1765,7 +1933,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
1765 if (f2fs_test_bit(offset, se->ckpt_valid_map)) 1933 if (f2fs_test_bit(offset, se->ckpt_valid_map))
1766 is_cp = true; 1934 is_cp = true;
1767 1935
1768 mutex_unlock(&sit_i->sentry_lock); 1936 up_read(&sit_i->sentry_lock);
1769 1937
1770 return is_cp; 1938 return is_cp;
1771} 1939}
@@ -1823,12 +1991,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
1823void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) 1991void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
1824{ 1992{
1825 struct page *page = grab_meta_page(sbi, blk_addr); 1993 struct page *page = grab_meta_page(sbi, blk_addr);
1826 void *dst = page_address(page);
1827 1994
1828 if (src) 1995 memcpy(page_address(page), src, PAGE_SIZE);
1829 memcpy(dst, src, PAGE_SIZE);
1830 else
1831 memset(dst, 0, PAGE_SIZE);
1832 set_page_dirty(page); 1996 set_page_dirty(page);
1833 f2fs_put_page(page, 1); 1997 f2fs_put_page(page, 1);
1834} 1998}
@@ -1927,7 +2091,6 @@ find_other_zone:
1927 } 2091 }
1928 secno = left_start; 2092 secno = left_start;
1929skip_left: 2093skip_left:
1930 hint = secno;
1931 segno = GET_SEG_FROM_SEC(sbi, secno); 2094 segno = GET_SEG_FROM_SEC(sbi, secno);
1932 zoneno = GET_ZONE_FROM_SEC(sbi, secno); 2095 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
1933 2096
@@ -2162,12 +2325,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
2162 unsigned int old_segno; 2325 unsigned int old_segno;
2163 int i; 2326 int i;
2164 2327
2328 down_write(&SIT_I(sbi)->sentry_lock);
2329
2165 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 2330 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2166 curseg = CURSEG_I(sbi, i); 2331 curseg = CURSEG_I(sbi, i);
2167 old_segno = curseg->segno; 2332 old_segno = curseg->segno;
2168 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); 2333 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2169 locate_dirty_segment(sbi, old_segno); 2334 locate_dirty_segment(sbi, old_segno);
2170 } 2335 }
2336
2337 up_write(&SIT_I(sbi)->sentry_lock);
2171} 2338}
2172 2339
2173static const struct segment_allocation default_salloc_ops = { 2340static const struct segment_allocation default_salloc_ops = {
@@ -2179,14 +2346,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2179 __u64 trim_start = cpc->trim_start; 2346 __u64 trim_start = cpc->trim_start;
2180 bool has_candidate = false; 2347 bool has_candidate = false;
2181 2348
2182 mutex_lock(&SIT_I(sbi)->sentry_lock); 2349 down_write(&SIT_I(sbi)->sentry_lock);
2183 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { 2350 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2184 if (add_discard_addrs(sbi, cpc, true)) { 2351 if (add_discard_addrs(sbi, cpc, true)) {
2185 has_candidate = true; 2352 has_candidate = true;
2186 break; 2353 break;
2187 } 2354 }
2188 } 2355 }
2189 mutex_unlock(&SIT_I(sbi)->sentry_lock); 2356 up_write(&SIT_I(sbi)->sentry_lock);
2190 2357
2191 cpc->trim_start = trim_start; 2358 cpc->trim_start = trim_start;
2192 return has_candidate; 2359 return has_candidate;
@@ -2196,14 +2363,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2196{ 2363{
2197 __u64 start = F2FS_BYTES_TO_BLK(range->start); 2364 __u64 start = F2FS_BYTES_TO_BLK(range->start);
2198 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 2365 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2199 unsigned int start_segno, end_segno; 2366 unsigned int start_segno, end_segno, cur_segno;
2367 block_t start_block, end_block;
2200 struct cp_control cpc; 2368 struct cp_control cpc;
2369 struct discard_policy dpolicy;
2370 unsigned long long trimmed = 0;
2201 int err = 0; 2371 int err = 0;
2202 2372
2203 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 2373 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2204 return -EINVAL; 2374 return -EINVAL;
2205 2375
2206 cpc.trimmed = 0;
2207 if (end <= MAIN_BLKADDR(sbi)) 2376 if (end <= MAIN_BLKADDR(sbi))
2208 goto out; 2377 goto out;
2209 2378
@@ -2217,12 +2386,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2217 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); 2386 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2218 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 2387 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2219 GET_SEGNO(sbi, end); 2388 GET_SEGNO(sbi, end);
2389
2220 cpc.reason = CP_DISCARD; 2390 cpc.reason = CP_DISCARD;
2221 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); 2391 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2222 2392
2223 /* do checkpoint to issue discard commands safely */ 2393 /* do checkpoint to issue discard commands safely */
2224 for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { 2394 for (cur_segno = start_segno; cur_segno <= end_segno;
2225 cpc.trim_start = start_segno; 2395 cur_segno = cpc.trim_end + 1) {
2396 cpc.trim_start = cur_segno;
2226 2397
2227 if (sbi->discard_blks == 0) 2398 if (sbi->discard_blks == 0)
2228 break; 2399 break;
@@ -2230,7 +2401,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2230 cpc.trim_end = end_segno; 2401 cpc.trim_end = end_segno;
2231 else 2402 else
2232 cpc.trim_end = min_t(unsigned int, 2403 cpc.trim_end = min_t(unsigned int,
2233 rounddown(start_segno + 2404 rounddown(cur_segno +
2234 BATCHED_TRIM_SEGMENTS(sbi), 2405 BATCHED_TRIM_SEGMENTS(sbi),
2235 sbi->segs_per_sec) - 1, end_segno); 2406 sbi->segs_per_sec) - 1, end_segno);
2236 2407
@@ -2242,11 +2413,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2242 2413
2243 schedule(); 2414 schedule();
2244 } 2415 }
2245 /* It's time to issue all the filed discards */ 2416
2246 mark_discard_range_all(sbi); 2417 start_block = START_BLOCK(sbi, start_segno);
2247 f2fs_wait_discard_bios(sbi, false); 2418 end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
2419
2420 init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2421 __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
2422 trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
2423 start_block, end_block);
2248out: 2424out:
2249 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); 2425 range->len = F2FS_BLK_TO_BYTES(trimmed);
2250 return err; 2426 return err;
2251} 2427}
2252 2428
@@ -2258,6 +2434,18 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2258 return false; 2434 return false;
2259} 2435}
2260 2436
2437int rw_hint_to_seg_type(enum rw_hint hint)
2438{
2439 switch (hint) {
2440 case WRITE_LIFE_SHORT:
2441 return CURSEG_HOT_DATA;
2442 case WRITE_LIFE_EXTREME:
2443 return CURSEG_COLD_DATA;
2444 default:
2445 return CURSEG_WARM_DATA;
2446 }
2447}
2448
2261static int __get_segment_type_2(struct f2fs_io_info *fio) 2449static int __get_segment_type_2(struct f2fs_io_info *fio)
2262{ 2450{
2263 if (fio->type == DATA) 2451 if (fio->type == DATA)
@@ -2292,7 +2480,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
2292 return CURSEG_COLD_DATA; 2480 return CURSEG_COLD_DATA;
2293 if (is_inode_flag_set(inode, FI_HOT_DATA)) 2481 if (is_inode_flag_set(inode, FI_HOT_DATA))
2294 return CURSEG_HOT_DATA; 2482 return CURSEG_HOT_DATA;
2295 return CURSEG_WARM_DATA; 2483 return rw_hint_to_seg_type(inode->i_write_hint);
2296 } else { 2484 } else {
2297 if (IS_DNODE(fio->page)) 2485 if (IS_DNODE(fio->page))
2298 return is_cold_node(fio->page) ? CURSEG_WARM_NODE : 2486 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
@@ -2336,8 +2524,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2336 struct sit_info *sit_i = SIT_I(sbi); 2524 struct sit_info *sit_i = SIT_I(sbi);
2337 struct curseg_info *curseg = CURSEG_I(sbi, type); 2525 struct curseg_info *curseg = CURSEG_I(sbi, type);
2338 2526
2527 down_read(&SM_I(sbi)->curseg_lock);
2528
2339 mutex_lock(&curseg->curseg_mutex); 2529 mutex_lock(&curseg->curseg_mutex);
2340 mutex_lock(&sit_i->sentry_lock); 2530 down_write(&sit_i->sentry_lock);
2341 2531
2342 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 2532 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
2343 2533
@@ -2354,15 +2544,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2354 2544
2355 stat_inc_block_count(sbi, curseg); 2545 stat_inc_block_count(sbi, curseg);
2356 2546
2547 /*
2548 * SIT information should be updated before segment allocation,
2549 * since SSR needs latest valid block information.
2550 */
2551 update_sit_entry(sbi, *new_blkaddr, 1);
2552 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
2553 update_sit_entry(sbi, old_blkaddr, -1);
2554
2357 if (!__has_curseg_space(sbi, type)) 2555 if (!__has_curseg_space(sbi, type))
2358 sit_i->s_ops->allocate_segment(sbi, type, false); 2556 sit_i->s_ops->allocate_segment(sbi, type, false);
2557
2359 /* 2558 /*
2360 * SIT information should be updated after segment allocation, 2559 * segment dirty status should be updated after segment allocation,
2361 * since we need to keep dirty segments precisely under SSR. 2560 * so we just need to update status only one time after previous
2561 * segment being closed.
2362 */ 2562 */
2363 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 2563 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
2564 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
2364 2565
2365 mutex_unlock(&sit_i->sentry_lock); 2566 up_write(&sit_i->sentry_lock);
2366 2567
2367 if (page && IS_NODESEG(type)) { 2568 if (page && IS_NODESEG(type)) {
2368 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 2569 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
@@ -2382,6 +2583,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2382 } 2583 }
2383 2584
2384 mutex_unlock(&curseg->curseg_mutex); 2585 mutex_unlock(&curseg->curseg_mutex);
2586
2587 up_read(&SM_I(sbi)->curseg_lock);
2588}
2589
2590static void update_device_state(struct f2fs_io_info *fio)
2591{
2592 struct f2fs_sb_info *sbi = fio->sbi;
2593 unsigned int devidx;
2594
2595 if (!sbi->s_ndevs)
2596 return;
2597
2598 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
2599
2600 /* update device state for fsync */
2601 set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
2602
2603 /* update device state for checkpoint */
2604 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
2605 spin_lock(&sbi->dev_lock);
2606 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
2607 spin_unlock(&sbi->dev_lock);
2608 }
2385} 2609}
2386 2610
2387static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) 2611static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -2398,6 +2622,8 @@ reallocate:
2398 if (err == -EAGAIN) { 2622 if (err == -EAGAIN) {
2399 fio->old_blkaddr = fio->new_blkaddr; 2623 fio->old_blkaddr = fio->new_blkaddr;
2400 goto reallocate; 2624 goto reallocate;
2625 } else if (!err) {
2626 update_device_state(fio);
2401 } 2627 }
2402} 2628}
2403 2629
@@ -2458,12 +2684,26 @@ int rewrite_data_page(struct f2fs_io_info *fio)
2458 stat_inc_inplace_blocks(fio->sbi); 2684 stat_inc_inplace_blocks(fio->sbi);
2459 2685
2460 err = f2fs_submit_page_bio(fio); 2686 err = f2fs_submit_page_bio(fio);
2687 if (!err)
2688 update_device_state(fio);
2461 2689
2462 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); 2690 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
2463 2691
2464 return err; 2692 return err;
2465} 2693}
2466 2694
2695static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
2696 unsigned int segno)
2697{
2698 int i;
2699
2700 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
2701 if (CURSEG_I(sbi, i)->segno == segno)
2702 break;
2703 }
2704 return i;
2705}
2706
2467void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 2707void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2468 block_t old_blkaddr, block_t new_blkaddr, 2708 block_t old_blkaddr, block_t new_blkaddr,
2469 bool recover_curseg, bool recover_newaddr) 2709 bool recover_curseg, bool recover_newaddr)
@@ -2479,6 +2719,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2479 se = get_seg_entry(sbi, segno); 2719 se = get_seg_entry(sbi, segno);
2480 type = se->type; 2720 type = se->type;
2481 2721
2722 down_write(&SM_I(sbi)->curseg_lock);
2723
2482 if (!recover_curseg) { 2724 if (!recover_curseg) {
2483 /* for recovery flow */ 2725 /* for recovery flow */
2484 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { 2726 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
@@ -2488,14 +2730,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2488 type = CURSEG_WARM_DATA; 2730 type = CURSEG_WARM_DATA;
2489 } 2731 }
2490 } else { 2732 } else {
2491 if (!IS_CURSEG(sbi, segno)) 2733 if (IS_CURSEG(sbi, segno)) {
2734 /* se->type is volatile as SSR allocation */
2735 type = __f2fs_get_curseg(sbi, segno);
2736 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
2737 } else {
2492 type = CURSEG_WARM_DATA; 2738 type = CURSEG_WARM_DATA;
2739 }
2493 } 2740 }
2494 2741
2495 curseg = CURSEG_I(sbi, type); 2742 curseg = CURSEG_I(sbi, type);
2496 2743
2497 mutex_lock(&curseg->curseg_mutex); 2744 mutex_lock(&curseg->curseg_mutex);
2498 mutex_lock(&sit_i->sentry_lock); 2745 down_write(&sit_i->sentry_lock);
2499 2746
2500 old_cursegno = curseg->segno; 2747 old_cursegno = curseg->segno;
2501 old_blkoff = curseg->next_blkoff; 2748 old_blkoff = curseg->next_blkoff;
@@ -2527,8 +2774,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
2527 curseg->next_blkoff = old_blkoff; 2774 curseg->next_blkoff = old_blkoff;
2528 } 2775 }
2529 2776
2530 mutex_unlock(&sit_i->sentry_lock); 2777 up_write(&sit_i->sentry_lock);
2531 mutex_unlock(&curseg->curseg_mutex); 2778 mutex_unlock(&curseg->curseg_mutex);
2779 up_write(&SM_I(sbi)->curseg_lock);
2532} 2780}
2533 2781
2534void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, 2782void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -2982,7 +3230,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2982 bool to_journal = true; 3230 bool to_journal = true;
2983 struct seg_entry *se; 3231 struct seg_entry *se;
2984 3232
2985 mutex_lock(&sit_i->sentry_lock); 3233 down_write(&sit_i->sentry_lock);
2986 3234
2987 if (!sit_i->dirty_sentries) 3235 if (!sit_i->dirty_sentries)
2988 goto out; 3236 goto out;
@@ -3076,7 +3324,7 @@ out:
3076 3324
3077 cpc->trim_start = trim_start; 3325 cpc->trim_start = trim_start;
3078 } 3326 }
3079 mutex_unlock(&sit_i->sentry_lock); 3327 up_write(&sit_i->sentry_lock);
3080 3328
3081 set_prefree_as_free_segments(sbi); 3329 set_prefree_as_free_segments(sbi);
3082} 3330}
@@ -3169,7 +3417,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
3169 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; 3417 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
3170 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); 3418 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3171 sit_i->mounted_time = ktime_get_real_seconds(); 3419 sit_i->mounted_time = ktime_get_real_seconds();
3172 mutex_init(&sit_i->sentry_lock); 3420 init_rwsem(&sit_i->sentry_lock);
3173 return 0; 3421 return 0;
3174} 3422}
3175 3423
@@ -3410,7 +3658,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
3410 struct sit_info *sit_i = SIT_I(sbi); 3658 struct sit_info *sit_i = SIT_I(sbi);
3411 unsigned int segno; 3659 unsigned int segno;
3412 3660
3413 mutex_lock(&sit_i->sentry_lock); 3661 down_write(&sit_i->sentry_lock);
3414 3662
3415 sit_i->min_mtime = LLONG_MAX; 3663 sit_i->min_mtime = LLONG_MAX;
3416 3664
@@ -3427,7 +3675,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
3427 sit_i->min_mtime = mtime; 3675 sit_i->min_mtime = mtime;
3428 } 3676 }
3429 sit_i->max_mtime = get_mtime(sbi); 3677 sit_i->max_mtime = get_mtime(sbi);
3430 mutex_unlock(&sit_i->sentry_lock); 3678 up_write(&sit_i->sentry_lock);
3431} 3679}
3432 3680
3433int build_segment_manager(struct f2fs_sb_info *sbi) 3681int build_segment_manager(struct f2fs_sb_info *sbi)
@@ -3460,11 +3708,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
3460 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 3708 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
3461 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; 3709 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
3462 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; 3710 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
3711 sm_info->min_ssr_sections = reserved_sections(sbi);
3463 3712
3464 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; 3713 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
3465 3714
3466 INIT_LIST_HEAD(&sm_info->sit_entry_set); 3715 INIT_LIST_HEAD(&sm_info->sit_entry_set);
3467 3716
3717 init_rwsem(&sm_info->curseg_lock);
3718
3468 if (!f2fs_readonly(sbi->sb)) { 3719 if (!f2fs_readonly(sbi->sb)) {
3469 err = create_flush_cmd_control(sbi); 3720 err = create_flush_cmd_control(sbi);
3470 if (err) 3721 if (err)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e0a6cc23ace3..d1d394cdf61d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -231,7 +231,7 @@ struct sit_info {
231 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ 231 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
232 unsigned int dirty_sentries; /* # of dirty sentries */ 232 unsigned int dirty_sentries; /* # of dirty sentries */
233 unsigned int sents_per_block; /* # of SIT entries per block */ 233 unsigned int sents_per_block; /* # of SIT entries per block */
234 struct mutex sentry_lock; /* to protect SIT cache */ 234 struct rw_semaphore sentry_lock; /* to protect SIT cache */
235 struct seg_entry *sentries; /* SIT segment-level cache */ 235 struct seg_entry *sentries; /* SIT segment-level cache */
236 struct sec_entry *sec_entries; /* SIT section-level cache */ 236 struct sec_entry *sec_entries; /* SIT section-level cache */
237 237
@@ -497,6 +497,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
497 return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); 497 return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
498} 498}
499 499
500static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
501{
502 unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
503 get_pages(sbi, F2FS_DIRTY_DENTS);
504 unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
505 unsigned int segno, left_blocks;
506 int i;
507
508 /* check current node segment */
509 for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
510 segno = CURSEG_I(sbi, i)->segno;
511 left_blocks = sbi->blocks_per_seg -
512 get_seg_entry(sbi, segno)->ckpt_valid_blocks;
513
514 if (node_blocks > left_blocks)
515 return false;
516 }
517
518 /* check current data segment */
519 segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
520 left_blocks = sbi->blocks_per_seg -
521 get_seg_entry(sbi, segno)->ckpt_valid_blocks;
522 if (dent_blocks > left_blocks)
523 return false;
524 return true;
525}
526
500static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, 527static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
501 int freed, int needed) 528 int freed, int needed)
502{ 529{
@@ -507,6 +534,9 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
507 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 534 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
508 return false; 535 return false;
509 536
537 if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
538 has_curseg_enough_space(sbi))
539 return false;
510 return (free_sections(sbi) + freed) <= 540 return (free_sections(sbi) + freed) <=
511 (node_secs + 2 * dent_secs + imeta_secs + 541 (node_secs + 2 * dent_secs + imeta_secs +
512 reserved_sections(sbi) + needed); 542 reserved_sections(sbi) + needed);
@@ -731,7 +761,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
731static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, 761static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
732 unsigned int secno) 762 unsigned int secno)
733{ 763{
734 if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= 764 if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >
735 sbi->fggc_threshold) 765 sbi->fggc_threshold)
736 return true; 766 return true;
737 return false; 767 return false;
@@ -796,8 +826,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
796 goto wake_up; 826 goto wake_up;
797 827
798 mutex_lock(&dcc->cmd_lock); 828 mutex_lock(&dcc->cmd_lock);
799 for (i = MAX_PLIST_NUM - 1; 829 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
800 i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { 830 if (i + 1 < dcc->discard_granularity)
831 break;
801 if (!list_empty(&dcc->pend_list[i])) { 832 if (!list_empty(&dcc->pend_list[i])) {
802 wakeup = true; 833 wakeup = true;
803 break; 834 break;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 5c60fc28ec75..0b5664a1a6cc 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -28,7 +28,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
28 28
29static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) 29static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
30{ 30{
31 long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS; 31 long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS;
32 32
33 return count > 0 ? count : 0; 33 return count > 0 ? count : 0;
34} 34}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 97e03c637e90..a6c5dd450002 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,6 +44,8 @@ static struct kmem_cache *f2fs_inode_cachep;
44char *fault_name[FAULT_MAX] = { 44char *fault_name[FAULT_MAX] = {
45 [FAULT_KMALLOC] = "kmalloc", 45 [FAULT_KMALLOC] = "kmalloc",
46 [FAULT_PAGE_ALLOC] = "page alloc", 46 [FAULT_PAGE_ALLOC] = "page alloc",
47 [FAULT_PAGE_GET] = "page get",
48 [FAULT_ALLOC_BIO] = "alloc bio",
47 [FAULT_ALLOC_NID] = "alloc nid", 49 [FAULT_ALLOC_NID] = "alloc nid",
48 [FAULT_ORPHAN] = "orphan", 50 [FAULT_ORPHAN] = "orphan",
49 [FAULT_BLOCK] = "no more block", 51 [FAULT_BLOCK] = "no more block",
@@ -92,6 +94,7 @@ enum {
92 Opt_disable_ext_identify, 94 Opt_disable_ext_identify,
93 Opt_inline_xattr, 95 Opt_inline_xattr,
94 Opt_noinline_xattr, 96 Opt_noinline_xattr,
97 Opt_inline_xattr_size,
95 Opt_inline_data, 98 Opt_inline_data,
96 Opt_inline_dentry, 99 Opt_inline_dentry,
97 Opt_noinline_dentry, 100 Opt_noinline_dentry,
@@ -141,6 +144,7 @@ static match_table_t f2fs_tokens = {
141 {Opt_disable_ext_identify, "disable_ext_identify"}, 144 {Opt_disable_ext_identify, "disable_ext_identify"},
142 {Opt_inline_xattr, "inline_xattr"}, 145 {Opt_inline_xattr, "inline_xattr"},
143 {Opt_noinline_xattr, "noinline_xattr"}, 146 {Opt_noinline_xattr, "noinline_xattr"},
147 {Opt_inline_xattr_size, "inline_xattr_size=%u"},
144 {Opt_inline_data, "inline_data"}, 148 {Opt_inline_data, "inline_data"},
145 {Opt_inline_dentry, "inline_dentry"}, 149 {Opt_inline_dentry, "inline_dentry"},
146 {Opt_noinline_dentry, "noinline_dentry"}, 150 {Opt_noinline_dentry, "noinline_dentry"},
@@ -209,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
209 "quota options when quota turned on"); 213 "quota options when quota turned on");
210 return -EINVAL; 214 return -EINVAL;
211 } 215 }
216 if (f2fs_sb_has_quota_ino(sb)) {
217 f2fs_msg(sb, KERN_INFO,
218 "QUOTA feature is enabled, so ignore qf_name");
219 return 0;
220 }
221
212 qname = match_strdup(args); 222 qname = match_strdup(args);
213 if (!qname) { 223 if (!qname) {
214 f2fs_msg(sb, KERN_ERR, 224 f2fs_msg(sb, KERN_ERR,
@@ -287,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
287 return -1; 297 return -1;
288 } 298 }
289 } 299 }
300
301 if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) {
302 f2fs_msg(sbi->sb, KERN_INFO,
303 "QUOTA feature is enabled, so ignore jquota_fmt");
304 sbi->s_jquota_fmt = 0;
305 }
306 if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) {
307 f2fs_msg(sbi->sb, KERN_INFO,
308 "Filesystem with quota feature cannot be mounted RDWR "
309 "without CONFIG_QUOTA");
310 return -1;
311 }
290 return 0; 312 return 0;
291} 313}
292#endif 314#endif
@@ -383,6 +405,12 @@ static int parse_options(struct super_block *sb, char *options)
383 case Opt_noinline_xattr: 405 case Opt_noinline_xattr:
384 clear_opt(sbi, INLINE_XATTR); 406 clear_opt(sbi, INLINE_XATTR);
385 break; 407 break;
408 case Opt_inline_xattr_size:
409 if (args->from && match_int(args, &arg))
410 return -EINVAL;
411 set_opt(sbi, INLINE_XATTR_SIZE);
412 sbi->inline_xattr_size = arg;
413 break;
386#else 414#else
387 case Opt_user_xattr: 415 case Opt_user_xattr:
388 f2fs_msg(sb, KERN_INFO, 416 f2fs_msg(sb, KERN_INFO,
@@ -604,6 +632,24 @@ static int parse_options(struct super_block *sb, char *options)
604 F2FS_IO_SIZE_KB(sbi)); 632 F2FS_IO_SIZE_KB(sbi));
605 return -EINVAL; 633 return -EINVAL;
606 } 634 }
635
636 if (test_opt(sbi, INLINE_XATTR_SIZE)) {
637 if (!test_opt(sbi, INLINE_XATTR)) {
638 f2fs_msg(sb, KERN_ERR,
639 "inline_xattr_size option should be "
640 "set with inline_xattr option");
641 return -EINVAL;
642 }
643 if (!sbi->inline_xattr_size ||
644 sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE -
645 F2FS_TOTAL_EXTRA_ATTR_SIZE -
646 DEF_INLINE_RESERVED_SIZE -
647 DEF_MIN_INLINE_SIZE) {
648 f2fs_msg(sb, KERN_ERR,
649 "inline xattr size is out of range");
650 return -EINVAL;
651 }
652 }
607 return 0; 653 return 0;
608} 654}
609 655
@@ -618,13 +664,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
618 init_once((void *) fi); 664 init_once((void *) fi);
619 665
620 /* Initialize f2fs-specific inode info */ 666 /* Initialize f2fs-specific inode info */
621 fi->vfs_inode.i_version = 1;
622 atomic_set(&fi->dirty_pages, 0); 667 atomic_set(&fi->dirty_pages, 0);
623 fi->i_current_depth = 1; 668 fi->i_current_depth = 1;
624 fi->i_advise = 0; 669 fi->i_advise = 0;
625 init_rwsem(&fi->i_sem); 670 init_rwsem(&fi->i_sem);
626 INIT_LIST_HEAD(&fi->dirty_list); 671 INIT_LIST_HEAD(&fi->dirty_list);
627 INIT_LIST_HEAD(&fi->gdirty_list); 672 INIT_LIST_HEAD(&fi->gdirty_list);
673 INIT_LIST_HEAD(&fi->inmem_ilist);
628 INIT_LIST_HEAD(&fi->inmem_pages); 674 INIT_LIST_HEAD(&fi->inmem_pages);
629 mutex_init(&fi->inmem_lock); 675 mutex_init(&fi->inmem_lock);
630 init_rwsem(&fi->dio_rwsem[READ]); 676 init_rwsem(&fi->dio_rwsem[READ]);
@@ -673,7 +719,6 @@ static int f2fs_drop_inode(struct inode *inode)
673 719
674 sb_end_intwrite(inode->i_sb); 720 sb_end_intwrite(inode->i_sb);
675 721
676 fscrypt_put_encryption_info(inode, NULL);
677 spin_lock(&inode->i_lock); 722 spin_lock(&inode->i_lock);
678 atomic_dec(&inode->i_count); 723 atomic_dec(&inode->i_count);
679 } 724 }
@@ -781,6 +826,7 @@ static void f2fs_put_super(struct super_block *sb)
781{ 826{
782 struct f2fs_sb_info *sbi = F2FS_SB(sb); 827 struct f2fs_sb_info *sbi = F2FS_SB(sb);
783 int i; 828 int i;
829 bool dropped;
784 830
785 f2fs_quota_off_umount(sb); 831 f2fs_quota_off_umount(sb);
786 832
@@ -801,9 +847,9 @@ static void f2fs_put_super(struct super_block *sb)
801 } 847 }
802 848
803 /* be sure to wait for any on-going discard commands */ 849 /* be sure to wait for any on-going discard commands */
804 f2fs_wait_discard_bios(sbi, true); 850 dropped = f2fs_wait_discard_bios(sbi);
805 851
806 if (f2fs_discard_en(sbi) && !sbi->discard_blks) { 852 if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
807 struct cp_control cpc = { 853 struct cp_control cpc = {
808 .reason = CP_UMOUNT | CP_TRIMMED, 854 .reason = CP_UMOUNT | CP_TRIMMED,
809 }; 855 };
@@ -858,6 +904,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
858 struct f2fs_sb_info *sbi = F2FS_SB(sb); 904 struct f2fs_sb_info *sbi = F2FS_SB(sb);
859 int err = 0; 905 int err = 0;
860 906
907 if (unlikely(f2fs_cp_error(sbi)))
908 return 0;
909
861 trace_f2fs_sync_fs(sb, sync); 910 trace_f2fs_sync_fs(sb, sync);
862 911
863 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 912 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -957,7 +1006,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
957 buf->f_blocks = total_count - start_count; 1006 buf->f_blocks = total_count - start_count;
958 buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; 1007 buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
959 buf->f_bavail = user_block_count - valid_user_blocks(sbi) - 1008 buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
960 sbi->reserved_blocks; 1009 sbi->current_reserved_blocks;
961 1010
962 avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; 1011 avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
963 1012
@@ -1046,6 +1095,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
1046 seq_puts(seq, ",inline_xattr"); 1095 seq_puts(seq, ",inline_xattr");
1047 else 1096 else
1048 seq_puts(seq, ",noinline_xattr"); 1097 seq_puts(seq, ",noinline_xattr");
1098 if (test_opt(sbi, INLINE_XATTR_SIZE))
1099 seq_printf(seq, ",inline_xattr_size=%u",
1100 sbi->inline_xattr_size);
1049#endif 1101#endif
1050#ifdef CONFIG_F2FS_FS_POSIX_ACL 1102#ifdef CONFIG_F2FS_FS_POSIX_ACL
1051 if (test_opt(sbi, POSIX_ACL)) 1103 if (test_opt(sbi, POSIX_ACL))
@@ -1108,6 +1160,7 @@ static void default_options(struct f2fs_sb_info *sbi)
1108{ 1160{
1109 /* init some FS parameters */ 1161 /* init some FS parameters */
1110 sbi->active_logs = NR_CURSEG_TYPE; 1162 sbi->active_logs = NR_CURSEG_TYPE;
1163 sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
1111 1164
1112 set_opt(sbi, BG_GC); 1165 set_opt(sbi, BG_GC);
1113 set_opt(sbi, INLINE_XATTR); 1166 set_opt(sbi, INLINE_XATTR);
@@ -1136,6 +1189,9 @@ static void default_options(struct f2fs_sb_info *sbi)
1136#endif 1189#endif
1137} 1190}
1138 1191
1192#ifdef CONFIG_QUOTA
1193static int f2fs_enable_quotas(struct super_block *sb);
1194#endif
1139static int f2fs_remount(struct super_block *sb, int *flags, char *data) 1195static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1140{ 1196{
1141 struct f2fs_sb_info *sbi = F2FS_SB(sb); 1197 struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1202,6 +1258,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1202 if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) 1258 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
1203 goto skip; 1259 goto skip;
1204 1260
1261#ifdef CONFIG_QUOTA
1205 if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { 1262 if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
1206 err = dquot_suspend(sb, -1); 1263 err = dquot_suspend(sb, -1);
1207 if (err < 0) 1264 if (err < 0)
@@ -1209,9 +1266,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1209 } else { 1266 } else {
1210 /* dquot_resume needs RW */ 1267 /* dquot_resume needs RW */
1211 sb->s_flags &= ~MS_RDONLY; 1268 sb->s_flags &= ~MS_RDONLY;
1212 dquot_resume(sb, -1); 1269 if (sb_any_quota_suspended(sb)) {
1270 dquot_resume(sb, -1);
1271 } else if (f2fs_sb_has_quota_ino(sb)) {
1272 err = f2fs_enable_quotas(sb);
1273 if (err)
1274 goto restore_opts;
1275 }
1213 } 1276 }
1214 1277#endif
1215 /* disallow enable/disable extent_cache dynamically */ 1278 /* disallow enable/disable extent_cache dynamically */
1216 if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { 1279 if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
1217 err = -EINVAL; 1280 err = -EINVAL;
@@ -1320,8 +1383,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
1320 tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); 1383 tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
1321repeat: 1384repeat:
1322 page = read_mapping_page(mapping, blkidx, NULL); 1385 page = read_mapping_page(mapping, blkidx, NULL);
1323 if (IS_ERR(page)) 1386 if (IS_ERR(page)) {
1387 if (PTR_ERR(page) == -ENOMEM) {
1388 congestion_wait(BLK_RW_ASYNC, HZ/50);
1389 goto repeat;
1390 }
1324 return PTR_ERR(page); 1391 return PTR_ERR(page);
1392 }
1325 1393
1326 lock_page(page); 1394 lock_page(page);
1327 1395
@@ -1364,11 +1432,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
1364 while (towrite > 0) { 1432 while (towrite > 0) {
1365 tocopy = min_t(unsigned long, sb->s_blocksize - offset, 1433 tocopy = min_t(unsigned long, sb->s_blocksize - offset,
1366 towrite); 1434 towrite);
1367 1435retry:
1368 err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, 1436 err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
1369 &page, NULL); 1437 &page, NULL);
1370 if (unlikely(err)) 1438 if (unlikely(err)) {
1439 if (err == -ENOMEM) {
1440 congestion_wait(BLK_RW_ASYNC, HZ/50);
1441 goto retry;
1442 }
1371 break; 1443 break;
1444 }
1372 1445
1373 kaddr = kmap_atomic(page); 1446 kaddr = kmap_atomic(page);
1374 memcpy(kaddr + offset, data, tocopy); 1447 memcpy(kaddr + offset, data, tocopy);
@@ -1385,8 +1458,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
1385 } 1458 }
1386 1459
1387 if (len == towrite) 1460 if (len == towrite)
1388 return 0; 1461 return err;
1389 inode->i_version++;
1390 inode->i_mtime = inode->i_ctime = current_time(inode); 1462 inode->i_mtime = inode->i_ctime = current_time(inode);
1391 f2fs_mark_inode_dirty_sync(inode, false); 1463 f2fs_mark_inode_dirty_sync(inode, false);
1392 return len - towrite; 1464 return len - towrite;
@@ -1408,19 +1480,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
1408 sbi->s_jquota_fmt, type); 1480 sbi->s_jquota_fmt, type);
1409} 1481}
1410 1482
1411void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) 1483int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
1412{ 1484{
1413 int i, ret; 1485 int enabled = 0;
1486 int i, err;
1487
1488 if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
1489 err = f2fs_enable_quotas(sbi->sb);
1490 if (err) {
1491 f2fs_msg(sbi->sb, KERN_ERR,
1492 "Cannot turn on quota_ino: %d", err);
1493 return 0;
1494 }
1495 return 1;
1496 }
1414 1497
1415 for (i = 0; i < MAXQUOTAS; i++) { 1498 for (i = 0; i < MAXQUOTAS; i++) {
1416 if (sbi->s_qf_names[i]) { 1499 if (sbi->s_qf_names[i]) {
1417 ret = f2fs_quota_on_mount(sbi, i); 1500 err = f2fs_quota_on_mount(sbi, i);
1418 if (ret < 0) 1501 if (!err) {
1419 f2fs_msg(sbi->sb, KERN_ERR, 1502 enabled = 1;
1420 "Cannot turn on journaled " 1503 continue;
1421 "quota: error %d", ret); 1504 }
1505 f2fs_msg(sbi->sb, KERN_ERR,
1506 "Cannot turn on quotas: %d on %d", err, i);
1507 }
1508 }
1509 return enabled;
1510}
1511
1512static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
1513 unsigned int flags)
1514{
1515 struct inode *qf_inode;
1516 unsigned long qf_inum;
1517 int err;
1518
1519 BUG_ON(!f2fs_sb_has_quota_ino(sb));
1520
1521 qf_inum = f2fs_qf_ino(sb, type);
1522 if (!qf_inum)
1523 return -EPERM;
1524
1525 qf_inode = f2fs_iget(sb, qf_inum);
1526 if (IS_ERR(qf_inode)) {
1527 f2fs_msg(sb, KERN_ERR,
1528 "Bad quota inode %u:%lu", type, qf_inum);
1529 return PTR_ERR(qf_inode);
1530 }
1531
1532 /* Don't account quota for quota files to avoid recursion */
1533 qf_inode->i_flags |= S_NOQUOTA;
1534 err = dquot_enable(qf_inode, type, format_id, flags);
1535 iput(qf_inode);
1536 return err;
1537}
1538
1539static int f2fs_enable_quotas(struct super_block *sb)
1540{
1541 int type, err = 0;
1542 unsigned long qf_inum;
1543 bool quota_mopt[MAXQUOTAS] = {
1544 test_opt(F2FS_SB(sb), USRQUOTA),
1545 test_opt(F2FS_SB(sb), GRPQUOTA),
1546 test_opt(F2FS_SB(sb), PRJQUOTA),
1547 };
1548
1549 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
1550 for (type = 0; type < MAXQUOTAS; type++) {
1551 qf_inum = f2fs_qf_ino(sb, type);
1552 if (qf_inum) {
1553 err = f2fs_quota_enable(sb, type, QFMT_VFS_V1,
1554 DQUOT_USAGE_ENABLED |
1555 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
1556 if (err) {
1557 f2fs_msg(sb, KERN_ERR,
1558 "Failed to enable quota tracking "
1559 "(type=%d, err=%d). Please run "
1560 "fsck to fix.", type, err);
1561 for (type--; type >= 0; type--)
1562 dquot_quota_off(sb, type);
1563 return err;
1564 }
1422 } 1565 }
1423 } 1566 }
1567 return 0;
1424} 1568}
1425 1569
1426static int f2fs_quota_sync(struct super_block *sb, int type) 1570static int f2fs_quota_sync(struct super_block *sb, int type)
@@ -1491,7 +1635,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
1491 f2fs_quota_sync(sb, type); 1635 f2fs_quota_sync(sb, type);
1492 1636
1493 err = dquot_quota_off(sb, type); 1637 err = dquot_quota_off(sb, type);
1494 if (err) 1638 if (err || f2fs_sb_has_quota_ino(sb))
1495 goto out_put; 1639 goto out_put;
1496 1640
1497 inode_lock(inode); 1641 inode_lock(inode);
@@ -1651,7 +1795,7 @@ static loff_t max_file_blocks(void)
1651 1795
1652 /* 1796 /*
1653 * note: previously, result is equal to (DEF_ADDRS_PER_INODE - 1797 * note: previously, result is equal to (DEF_ADDRS_PER_INODE -
1654 * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more 1798 * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
1655 * space in inode.i_addr, it will be more safe to reassign 1799 * space in inode.i_addr, it will be more safe to reassign
1656 * result as zero. 1800 * result as zero.
1657 */ 1801 */
@@ -1960,6 +2104,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1960 for (j = HOT; j < NR_TEMP_TYPE; j++) 2104 for (j = HOT; j < NR_TEMP_TYPE; j++)
1961 mutex_init(&sbi->wio_mutex[i][j]); 2105 mutex_init(&sbi->wio_mutex[i][j]);
1962 spin_lock_init(&sbi->cp_lock); 2106 spin_lock_init(&sbi->cp_lock);
2107
2108 sbi->dirty_device = 0;
2109 spin_lock_init(&sbi->dev_lock);
1963} 2110}
1964 2111
1965static int init_percpu_info(struct f2fs_sb_info *sbi) 2112static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -2310,7 +2457,10 @@ try_onemore:
2310 2457
2311#ifdef CONFIG_QUOTA 2458#ifdef CONFIG_QUOTA
2312 sb->dq_op = &f2fs_quota_operations; 2459 sb->dq_op = &f2fs_quota_operations;
2313 sb->s_qcop = &f2fs_quotactl_ops; 2460 if (f2fs_sb_has_quota_ino(sb))
2461 sb->s_qcop = &dquot_quotactl_sysfile_ops;
2462 else
2463 sb->s_qcop = &f2fs_quotactl_ops;
2314 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 2464 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
2315#endif 2465#endif
2316 2466
@@ -2408,6 +2558,7 @@ try_onemore:
2408 le64_to_cpu(sbi->ckpt->valid_block_count); 2558 le64_to_cpu(sbi->ckpt->valid_block_count);
2409 sbi->last_valid_block_count = sbi->total_valid_block_count; 2559 sbi->last_valid_block_count = sbi->total_valid_block_count;
2410 sbi->reserved_blocks = 0; 2560 sbi->reserved_blocks = 0;
2561 sbi->current_reserved_blocks = 0;
2411 2562
2412 for (i = 0; i < NR_INODE_TYPE; i++) { 2563 for (i = 0; i < NR_INODE_TYPE; i++) {
2413 INIT_LIST_HEAD(&sbi->inode_list[i]); 2564 INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2482,10 +2633,24 @@ try_onemore:
2482 if (err) 2633 if (err)
2483 goto free_root_inode; 2634 goto free_root_inode;
2484 2635
2636#ifdef CONFIG_QUOTA
2637 /*
2638 * Turn on quotas which were not enabled for read-only mounts if
2639 * filesystem has quota feature, so that they are updated correctly.
2640 */
2641 if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) {
2642 err = f2fs_enable_quotas(sb);
2643 if (err) {
2644 f2fs_msg(sb, KERN_ERR,
2645 "Cannot turn on quotas: error %d", err);
2646 goto free_sysfs;
2647 }
2648 }
2649#endif
2485 /* if there are nt orphan nodes free them */ 2650 /* if there are nt orphan nodes free them */
2486 err = recover_orphan_inodes(sbi); 2651 err = recover_orphan_inodes(sbi);
2487 if (err) 2652 if (err)
2488 goto free_sysfs; 2653 goto free_meta;
2489 2654
2490 /* recover fsynced data */ 2655 /* recover fsynced data */
2491 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 2656 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2519,7 +2684,7 @@ try_onemore:
2519 err = -EINVAL; 2684 err = -EINVAL;
2520 f2fs_msg(sb, KERN_ERR, 2685 f2fs_msg(sb, KERN_ERR,
2521 "Need to recover fsync data"); 2686 "Need to recover fsync data");
2522 goto free_sysfs; 2687 goto free_meta;
2523 } 2688 }
2524 } 2689 }
2525skip_recovery: 2690skip_recovery:
@@ -2553,6 +2718,10 @@ skip_recovery:
2553 return 0; 2718 return 0;
2554 2719
2555free_meta: 2720free_meta:
2721#ifdef CONFIG_QUOTA
2722 if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb))
2723 f2fs_quota_off_umount(sbi->sb);
2724#endif
2556 f2fs_sync_inode_meta(sbi); 2725 f2fs_sync_inode_meta(sbi);
2557 /* 2726 /*
2558 * Some dirty meta pages can be produced by recover_orphan_inodes() 2727 * Some dirty meta pages can be produced by recover_orphan_inodes()
@@ -2561,7 +2730,9 @@ free_meta:
2561 * falls into an infinite loop in sync_meta_pages(). 2730 * falls into an infinite loop in sync_meta_pages().
2562 */ 2731 */
2563 truncate_inode_pages_final(META_MAPPING(sbi)); 2732 truncate_inode_pages_final(META_MAPPING(sbi));
2733#ifdef CONFIG_QUOTA
2564free_sysfs: 2734free_sysfs:
2735#endif
2565 f2fs_unregister_sysfs(sbi); 2736 f2fs_unregister_sysfs(sbi);
2566free_root_inode: 2737free_root_inode:
2567 dput(sb->s_root); 2738 dput(sb->s_root);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index e2c258f717cd..9835348b6e5d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -30,7 +30,7 @@ enum {
30 FAULT_INFO_RATE, /* struct f2fs_fault_info */ 30 FAULT_INFO_RATE, /* struct f2fs_fault_info */
31 FAULT_INFO_TYPE, /* struct f2fs_fault_info */ 31 FAULT_INFO_TYPE, /* struct f2fs_fault_info */
32#endif 32#endif
33 RESERVED_BLOCKS, 33 RESERVED_BLOCKS, /* struct f2fs_sb_info */
34}; 34};
35 35
36struct f2fs_attr { 36struct f2fs_attr {
@@ -63,6 +63,13 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
63 return NULL; 63 return NULL;
64} 64}
65 65
66static ssize_t dirty_segments_show(struct f2fs_attr *a,
67 struct f2fs_sb_info *sbi, char *buf)
68{
69 return snprintf(buf, PAGE_SIZE, "%llu\n",
70 (unsigned long long)(dirty_segments(sbi)));
71}
72
66static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, 73static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
67 struct f2fs_sb_info *sbi, char *buf) 74 struct f2fs_sb_info *sbi, char *buf)
68{ 75{
@@ -100,10 +107,22 @@ static ssize_t features_show(struct f2fs_attr *a,
100 if (f2fs_sb_has_inode_chksum(sb)) 107 if (f2fs_sb_has_inode_chksum(sb))
101 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", 108 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
102 len ? ", " : "", "inode_checksum"); 109 len ? ", " : "", "inode_checksum");
110 if (f2fs_sb_has_flexible_inline_xattr(sb))
111 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
112 len ? ", " : "", "flexible_inline_xattr");
113 if (f2fs_sb_has_quota_ino(sb))
114 len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
115 len ? ", " : "", "quota_ino");
103 len += snprintf(buf + len, PAGE_SIZE - len, "\n"); 116 len += snprintf(buf + len, PAGE_SIZE - len, "\n");
104 return len; 117 return len;
105} 118}
106 119
120static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
121 struct f2fs_sb_info *sbi, char *buf)
122{
123 return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks);
124}
125
107static ssize_t f2fs_sbi_show(struct f2fs_attr *a, 126static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
108 struct f2fs_sb_info *sbi, char *buf) 127 struct f2fs_sb_info *sbi, char *buf)
109{ 128{
@@ -143,34 +162,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
143#endif 162#endif
144 if (a->struct_type == RESERVED_BLOCKS) { 163 if (a->struct_type == RESERVED_BLOCKS) {
145 spin_lock(&sbi->stat_lock); 164 spin_lock(&sbi->stat_lock);
146 if ((unsigned long)sbi->total_valid_block_count + t > 165 if (t > (unsigned long)sbi->user_block_count) {
147 (unsigned long)sbi->user_block_count) {
148 spin_unlock(&sbi->stat_lock); 166 spin_unlock(&sbi->stat_lock);
149 return -EINVAL; 167 return -EINVAL;
150 } 168 }
151 *ui = t; 169 *ui = t;
170 sbi->current_reserved_blocks = min(sbi->reserved_blocks,
171 sbi->user_block_count - valid_user_blocks(sbi));
152 spin_unlock(&sbi->stat_lock); 172 spin_unlock(&sbi->stat_lock);
153 return count; 173 return count;
154 } 174 }
155 175
156 if (!strcmp(a->attr.name, "discard_granularity")) { 176 if (!strcmp(a->attr.name, "discard_granularity")) {
157 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
158 int i;
159
160 if (t == 0 || t > MAX_PLIST_NUM) 177 if (t == 0 || t > MAX_PLIST_NUM)
161 return -EINVAL; 178 return -EINVAL;
162 if (t == *ui) 179 if (t == *ui)
163 return count; 180 return count;
164
165 mutex_lock(&dcc->cmd_lock);
166 for (i = 0; i < MAX_PLIST_NUM; i++) {
167 if (i >= t - 1)
168 dcc->pend_list_tag[i] |= P_ACTIVE;
169 else
170 dcc->pend_list_tag[i] &= (~P_ACTIVE);
171 }
172 mutex_unlock(&dcc->cmd_lock);
173
174 *ui = t; 181 *ui = t;
175 return count; 182 return count;
176 } 183 }
@@ -222,6 +229,8 @@ enum feat_id {
222 FEAT_EXTRA_ATTR, 229 FEAT_EXTRA_ATTR,
223 FEAT_PROJECT_QUOTA, 230 FEAT_PROJECT_QUOTA,
224 FEAT_INODE_CHECKSUM, 231 FEAT_INODE_CHECKSUM,
232 FEAT_FLEXIBLE_INLINE_XATTR,
233 FEAT_QUOTA_INO,
225}; 234};
226 235
227static ssize_t f2fs_feature_show(struct f2fs_attr *a, 236static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -234,6 +243,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
234 case FEAT_EXTRA_ATTR: 243 case FEAT_EXTRA_ATTR:
235 case FEAT_PROJECT_QUOTA: 244 case FEAT_PROJECT_QUOTA:
236 case FEAT_INODE_CHECKSUM: 245 case FEAT_INODE_CHECKSUM:
246 case FEAT_FLEXIBLE_INLINE_XATTR:
247 case FEAT_QUOTA_INO:
237 return snprintf(buf, PAGE_SIZE, "supported\n"); 248 return snprintf(buf, PAGE_SIZE, "supported\n");
238 } 249 }
239 return 0; 250 return 0;
@@ -279,6 +290,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
279F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 290F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
280F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); 291F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
281F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); 292F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
293F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
282F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); 294F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
283F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); 295F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
284F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); 296F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -291,8 +303,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
291F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); 303F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
292F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); 304F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
293#endif 305#endif
306F2FS_GENERAL_RO_ATTR(dirty_segments);
294F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); 307F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
295F2FS_GENERAL_RO_ATTR(features); 308F2FS_GENERAL_RO_ATTR(features);
309F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
296 310
297#ifdef CONFIG_F2FS_FS_ENCRYPTION 311#ifdef CONFIG_F2FS_FS_ENCRYPTION
298F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); 312F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -304,6 +318,8 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE);
304F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); 318F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR);
305F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); 319F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
306F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); 320F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
321F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
322F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
307 323
308#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 324#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
309static struct attribute *f2fs_attrs[] = { 325static struct attribute *f2fs_attrs[] = {
@@ -321,6 +337,7 @@ static struct attribute *f2fs_attrs[] = {
321 ATTR_LIST(min_ipu_util), 337 ATTR_LIST(min_ipu_util),
322 ATTR_LIST(min_fsync_blocks), 338 ATTR_LIST(min_fsync_blocks),
323 ATTR_LIST(min_hot_blocks), 339 ATTR_LIST(min_hot_blocks),
340 ATTR_LIST(min_ssr_sections),
324 ATTR_LIST(max_victim_search), 341 ATTR_LIST(max_victim_search),
325 ATTR_LIST(dir_level), 342 ATTR_LIST(dir_level),
326 ATTR_LIST(ram_thresh), 343 ATTR_LIST(ram_thresh),
@@ -333,9 +350,11 @@ static struct attribute *f2fs_attrs[] = {
333 ATTR_LIST(inject_rate), 350 ATTR_LIST(inject_rate),
334 ATTR_LIST(inject_type), 351 ATTR_LIST(inject_type),
335#endif 352#endif
353 ATTR_LIST(dirty_segments),
336 ATTR_LIST(lifetime_write_kbytes), 354 ATTR_LIST(lifetime_write_kbytes),
337 ATTR_LIST(features), 355 ATTR_LIST(features),
338 ATTR_LIST(reserved_blocks), 356 ATTR_LIST(reserved_blocks),
357 ATTR_LIST(current_reserved_blocks),
339 NULL, 358 NULL,
340}; 359};
341 360
@@ -350,6 +369,8 @@ static struct attribute *f2fs_feat_attrs[] = {
350 ATTR_LIST(extra_attr), 369 ATTR_LIST(extra_attr),
351 ATTR_LIST(project_quota), 370 ATTR_LIST(project_quota),
352 ATTR_LIST(inode_checksum), 371 ATTR_LIST(inode_checksum),
372 ATTR_LIST(flexible_inline_xattr),
373 ATTR_LIST(quota_ino),
353 NULL, 374 NULL,
354}; 375};
355 376
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7c65540148f8..ec8961ef8cac 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,12 +217,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
217 return entry; 217 return entry;
218} 218}
219 219
220static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, 220static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
221 void **last_addr, int index, 221 void *base_addr, void **last_addr, int index,
222 size_t len, const char *name) 222 size_t len, const char *name)
223{ 223{
224 struct f2fs_xattr_entry *entry; 224 struct f2fs_xattr_entry *entry;
225 unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; 225 unsigned int inline_size = inline_xattr_size(inode);
226 226
227 list_for_each_xattr(entry, base_addr) { 227 list_for_each_xattr(entry, base_addr) {
228 if ((void *)entry + sizeof(__u32) > base_addr + inline_size || 228 if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
@@ -241,12 +241,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
241 return entry; 241 return entry;
242} 242}
243 243
244static int read_inline_xattr(struct inode *inode, struct page *ipage,
245 void *txattr_addr)
246{
247 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
248 unsigned int inline_size = inline_xattr_size(inode);
249 struct page *page = NULL;
250 void *inline_addr;
251
252 if (ipage) {
253 inline_addr = inline_xattr_addr(inode, ipage);
254 } else {
255 page = get_node_page(sbi, inode->i_ino);
256 if (IS_ERR(page))
257 return PTR_ERR(page);
258
259 inline_addr = inline_xattr_addr(inode, page);
260 }
261 memcpy(txattr_addr, inline_addr, inline_size);
262 f2fs_put_page(page, 1);
263
264 return 0;
265}
266
267static int read_xattr_block(struct inode *inode, void *txattr_addr)
268{
269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
271 unsigned int inline_size = inline_xattr_size(inode);
272 struct page *xpage;
273 void *xattr_addr;
274
275 /* The inode already has an extended attribute block. */
276 xpage = get_node_page(sbi, xnid);
277 if (IS_ERR(xpage))
278 return PTR_ERR(xpage);
279
280 xattr_addr = page_address(xpage);
281 memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE);
282 f2fs_put_page(xpage, 1);
283
284 return 0;
285}
286
244static int lookup_all_xattrs(struct inode *inode, struct page *ipage, 287static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
245 unsigned int index, unsigned int len, 288 unsigned int index, unsigned int len,
246 const char *name, struct f2fs_xattr_entry **xe, 289 const char *name, struct f2fs_xattr_entry **xe,
247 void **base_addr) 290 void **base_addr)
248{ 291{
249 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
250 void *cur_addr, *txattr_addr, *last_addr = NULL; 292 void *cur_addr, *txattr_addr, *last_addr = NULL;
251 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 293 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
252 unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; 294 unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
@@ -263,23 +305,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
263 305
264 /* read from inline xattr */ 306 /* read from inline xattr */
265 if (inline_size) { 307 if (inline_size) {
266 struct page *page = NULL; 308 err = read_inline_xattr(inode, ipage, txattr_addr);
267 void *inline_addr; 309 if (err)
268 310 goto out;
269 if (ipage) {
270 inline_addr = inline_xattr_addr(ipage);
271 } else {
272 page = get_node_page(sbi, inode->i_ino);
273 if (IS_ERR(page)) {
274 err = PTR_ERR(page);
275 goto out;
276 }
277 inline_addr = inline_xattr_addr(page);
278 }
279 memcpy(txattr_addr, inline_addr, inline_size);
280 f2fs_put_page(page, 1);
281 311
282 *xe = __find_inline_xattr(txattr_addr, &last_addr, 312 *xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
283 index, len, name); 313 index, len, name);
284 if (*xe) 314 if (*xe)
285 goto check; 315 goto check;
@@ -287,19 +317,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
287 317
288 /* read from xattr node block */ 318 /* read from xattr node block */
289 if (xnid) { 319 if (xnid) {
290 struct page *xpage; 320 err = read_xattr_block(inode, txattr_addr);
291 void *xattr_addr; 321 if (err)
292
293 /* The inode already has an extended attribute block. */
294 xpage = get_node_page(sbi, xnid);
295 if (IS_ERR(xpage)) {
296 err = PTR_ERR(xpage);
297 goto out; 322 goto out;
298 }
299
300 xattr_addr = page_address(xpage);
301 memcpy(txattr_addr + inline_size, xattr_addr, size);
302 f2fs_put_page(xpage, 1);
303 } 323 }
304 324
305 if (last_addr) 325 if (last_addr)
@@ -324,7 +344,6 @@ out:
324static int read_all_xattrs(struct inode *inode, struct page *ipage, 344static int read_all_xattrs(struct inode *inode, struct page *ipage,
325 void **base_addr) 345 void **base_addr)
326{ 346{
327 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
328 struct f2fs_xattr_header *header; 347 struct f2fs_xattr_header *header;
329 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 348 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
330 unsigned int size = VALID_XATTR_BLOCK_SIZE; 349 unsigned int size = VALID_XATTR_BLOCK_SIZE;
@@ -339,38 +358,16 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
339 358
340 /* read from inline xattr */ 359 /* read from inline xattr */
341 if (inline_size) { 360 if (inline_size) {
342 struct page *page = NULL; 361 err = read_inline_xattr(inode, ipage, txattr_addr);
343 void *inline_addr; 362 if (err)
344 363 goto fail;
345 if (ipage) {
346 inline_addr = inline_xattr_addr(ipage);
347 } else {
348 page = get_node_page(sbi, inode->i_ino);
349 if (IS_ERR(page)) {
350 err = PTR_ERR(page);
351 goto fail;
352 }
353 inline_addr = inline_xattr_addr(page);
354 }
355 memcpy(txattr_addr, inline_addr, inline_size);
356 f2fs_put_page(page, 1);
357 } 364 }
358 365
359 /* read from xattr node block */ 366 /* read from xattr node block */
360 if (xnid) { 367 if (xnid) {
361 struct page *xpage; 368 err = read_xattr_block(inode, txattr_addr);
362 void *xattr_addr; 369 if (err)
363
364 /* The inode already has an extended attribute block. */
365 xpage = get_node_page(sbi, xnid);
366 if (IS_ERR(xpage)) {
367 err = PTR_ERR(xpage);
368 goto fail; 370 goto fail;
369 }
370
371 xattr_addr = page_address(xpage);
372 memcpy(txattr_addr + inline_size, xattr_addr, size);
373 f2fs_put_page(xpage, 1);
374 } 371 }
375 372
376 header = XATTR_HDR(txattr_addr); 373 header = XATTR_HDR(txattr_addr);
@@ -392,10 +389,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
392{ 389{
393 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 390 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
394 size_t inline_size = inline_xattr_size(inode); 391 size_t inline_size = inline_xattr_size(inode);
392 struct page *in_page = NULL;
395 void *xattr_addr; 393 void *xattr_addr;
394 void *inline_addr = NULL;
396 struct page *xpage; 395 struct page *xpage;
397 nid_t new_nid = 0; 396 nid_t new_nid = 0;
398 int err; 397 int err = 0;
399 398
400 if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) 399 if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
401 if (!alloc_nid(sbi, &new_nid)) 400 if (!alloc_nid(sbi, &new_nid))
@@ -403,30 +402,30 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
403 402
404 /* write to inline xattr */ 403 /* write to inline xattr */
405 if (inline_size) { 404 if (inline_size) {
406 struct page *page = NULL;
407 void *inline_addr;
408
409 if (ipage) { 405 if (ipage) {
410 inline_addr = inline_xattr_addr(ipage); 406 inline_addr = inline_xattr_addr(inode, ipage);
411 f2fs_wait_on_page_writeback(ipage, NODE, true);
412 set_page_dirty(ipage);
413 } else { 407 } else {
414 page = get_node_page(sbi, inode->i_ino); 408 in_page = get_node_page(sbi, inode->i_ino);
415 if (IS_ERR(page)) { 409 if (IS_ERR(in_page)) {
416 alloc_nid_failed(sbi, new_nid); 410 alloc_nid_failed(sbi, new_nid);
417 return PTR_ERR(page); 411 return PTR_ERR(in_page);
418 } 412 }
419 inline_addr = inline_xattr_addr(page); 413 inline_addr = inline_xattr_addr(inode, in_page);
420 f2fs_wait_on_page_writeback(page, NODE, true);
421 } 414 }
422 memcpy(inline_addr, txattr_addr, inline_size);
423 f2fs_put_page(page, 1);
424 415
416 f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
417 NODE, true);
425 /* no need to use xattr node block */ 418 /* no need to use xattr node block */
426 if (hsize <= inline_size) { 419 if (hsize <= inline_size) {
427 err = truncate_xattr_node(inode, ipage); 420 err = truncate_xattr_node(inode);
428 alloc_nid_failed(sbi, new_nid); 421 alloc_nid_failed(sbi, new_nid);
429 return err; 422 if (err) {
423 f2fs_put_page(in_page, 1);
424 return err;
425 }
426 memcpy(inline_addr, txattr_addr, inline_size);
427 set_page_dirty(ipage ? ipage : in_page);
428 goto in_page_out;
430 } 429 }
431 } 430 }
432 431
@@ -435,7 +434,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
435 xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); 434 xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
436 if (IS_ERR(xpage)) { 435 if (IS_ERR(xpage)) {
437 alloc_nid_failed(sbi, new_nid); 436 alloc_nid_failed(sbi, new_nid);
438 return PTR_ERR(xpage); 437 goto in_page_out;
439 } 438 }
440 f2fs_bug_on(sbi, new_nid); 439 f2fs_bug_on(sbi, new_nid);
441 f2fs_wait_on_page_writeback(xpage, NODE, true); 440 f2fs_wait_on_page_writeback(xpage, NODE, true);
@@ -445,17 +444,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
445 xpage = new_node_page(&dn, XATTR_NODE_OFFSET); 444 xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
446 if (IS_ERR(xpage)) { 445 if (IS_ERR(xpage)) {
447 alloc_nid_failed(sbi, new_nid); 446 alloc_nid_failed(sbi, new_nid);
448 return PTR_ERR(xpage); 447 goto in_page_out;
449 } 448 }
450 alloc_nid_done(sbi, new_nid); 449 alloc_nid_done(sbi, new_nid);
451 } 450 }
452
453 xattr_addr = page_address(xpage); 451 xattr_addr = page_address(xpage);
452
453 if (inline_size)
454 memcpy(inline_addr, txattr_addr, inline_size);
454 memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); 455 memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
456
457 if (inline_size)
458 set_page_dirty(ipage ? ipage : in_page);
455 set_page_dirty(xpage); 459 set_page_dirty(xpage);
456 f2fs_put_page(xpage, 1);
457 460
458 return 0; 461 f2fs_put_page(xpage, 1);
462in_page_out:
463 f2fs_put_page(in_page, 1);
464 return err;
459} 465}
460 466
461int f2fs_getxattr(struct inode *inode, int index, const char *name, 467int f2fs_getxattr(struct inode *inode, int index, const char *name,
@@ -681,6 +687,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
681 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 687 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
682 int err; 688 int err;
683 689
690 err = dquot_initialize(inode);
691 if (err)
692 return err;
693
684 /* this case is only from init_inode_metadata */ 694 /* this case is only from init_inode_metadata */
685 if (ipage) 695 if (ipage)
686 return __f2fs_setxattr(inode, index, name, value, 696 return __f2fs_setxattr(inode, index, name, value,
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 2a0c453d7235..43e98d30d2df 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,8 @@
36#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) 36#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num)
37#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) 37#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num)
38 38
39#define F2FS_MAX_QUOTAS 3
40
39#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ 41#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
40#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ 42#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
41#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ 43#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
@@ -108,7 +110,8 @@ struct f2fs_super_block {
108 __u8 encryption_level; /* versioning level for encryption */ 110 __u8 encryption_level; /* versioning level for encryption */
109 __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ 111 __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
110 struct f2fs_device devs[MAX_DEVICES]; /* device list */ 112 struct f2fs_device devs[MAX_DEVICES]; /* device list */
111 __u8 reserved[327]; /* valid reserved region */ 113 __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
114 __u8 reserved[315]; /* valid reserved region */
112} __packed; 115} __packed;
113 116
114/* 117/*
@@ -184,7 +187,8 @@ struct f2fs_extent {
184} __packed; 187} __packed;
185 188
186#define F2FS_NAME_LEN 255 189#define F2FS_NAME_LEN 255
187#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ 190/* 200 bytes for inline xattrs by default */
191#define DEFAULT_INLINE_XATTR_ADDRS 50
188#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ 192#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
189#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ 193#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
190 get_extra_isize(inode)) 194 get_extra_isize(inode))
@@ -238,7 +242,7 @@ struct f2fs_inode {
238 union { 242 union {
239 struct { 243 struct {
240 __le16 i_extra_isize; /* extra inode attribute size */ 244 __le16 i_extra_isize; /* extra inode attribute size */
241 __le16 i_padding; /* padding */ 245 __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */
242 __le32 i_projid; /* project id */ 246 __le32 i_projid; /* project id */
243 __le32 i_inode_checksum;/* inode meta checksum */ 247 __le32 i_inode_checksum;/* inode meta checksum */
244 __le32 i_extra_end[0]; /* for attribute size calculation */ 248 __le32 i_extra_end[0]; /* for attribute size calculation */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7ab40491485b..8f8dd42fa57b 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -137,6 +137,18 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
137 { CP_UMOUNT, "Umount" }, \ 137 { CP_UMOUNT, "Umount" }, \
138 { CP_TRIMMED, "Trimmed" }) 138 { CP_TRIMMED, "Trimmed" })
139 139
140#define show_fsync_cpreason(type) \
141 __print_symbolic(type, \
142 { CP_NO_NEEDED, "no needed" }, \
143 { CP_NON_REGULAR, "non regular" }, \
144 { CP_HARDLINK, "hardlink" }, \
145 { CP_SB_NEED_CP, "sb needs cp" }, \
146 { CP_WRONG_PINO, "wrong pino" }, \
147 { CP_NO_SPC_ROLL, "no space roll forward" }, \
148 { CP_NODE_NEED_CP, "node needs cp" }, \
149 { CP_FASTBOOT_MODE, "fastboot mode" }, \
150 { CP_SPEC_LOG_NUM, "log type is 2" })
151
140struct victim_sel_policy; 152struct victim_sel_policy;
141struct f2fs_map_blocks; 153struct f2fs_map_blocks;
142 154
@@ -211,14 +223,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter,
211 223
212TRACE_EVENT(f2fs_sync_file_exit, 224TRACE_EVENT(f2fs_sync_file_exit,
213 225
214 TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), 226 TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret),
215 227
216 TP_ARGS(inode, need_cp, datasync, ret), 228 TP_ARGS(inode, cp_reason, datasync, ret),
217 229
218 TP_STRUCT__entry( 230 TP_STRUCT__entry(
219 __field(dev_t, dev) 231 __field(dev_t, dev)
220 __field(ino_t, ino) 232 __field(ino_t, ino)
221 __field(int, need_cp) 233 __field(int, cp_reason)
222 __field(int, datasync) 234 __field(int, datasync)
223 __field(int, ret) 235 __field(int, ret)
224 ), 236 ),
@@ -226,15 +238,15 @@ TRACE_EVENT(f2fs_sync_file_exit,
226 TP_fast_assign( 238 TP_fast_assign(
227 __entry->dev = inode->i_sb->s_dev; 239 __entry->dev = inode->i_sb->s_dev;
228 __entry->ino = inode->i_ino; 240 __entry->ino = inode->i_ino;
229 __entry->need_cp = need_cp; 241 __entry->cp_reason = cp_reason;
230 __entry->datasync = datasync; 242 __entry->datasync = datasync;
231 __entry->ret = ret; 243 __entry->ret = ret;
232 ), 244 ),
233 245
234 TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " 246 TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, "
235 "datasync = %d, ret = %d", 247 "datasync = %d, ret = %d",
236 show_dev_ino(__entry), 248 show_dev_ino(__entry),
237 __entry->need_cp ? "needed" : "not needed", 249 show_fsync_cpreason(__entry->cp_reason),
238 __entry->datasync, 250 __entry->datasync,
239 __entry->ret) 251 __entry->ret)
240); 252);
@@ -729,6 +741,91 @@ TRACE_EVENT(f2fs_get_victim,
729 __entry->free) 741 __entry->free)
730); 742);
731 743
744TRACE_EVENT(f2fs_lookup_start,
745
746 TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
747
748 TP_ARGS(dir, dentry, flags),
749
750 TP_STRUCT__entry(
751 __field(dev_t, dev)
752 __field(ino_t, ino)
753 __field(const char *, name)
754 __field(unsigned int, flags)
755 ),
756
757 TP_fast_assign(
758 __entry->dev = dir->i_sb->s_dev;
759 __entry->ino = dir->i_ino;
760 __entry->name = dentry->d_name.name;
761 __entry->flags = flags;
762 ),
763
764 TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
765 show_dev_ino(__entry),
766 __entry->name,
767 __entry->flags)
768);
769
770TRACE_EVENT(f2fs_lookup_end,
771
772 TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino,
773 int err),
774
775 TP_ARGS(dir, dentry, ino, err),
776
777 TP_STRUCT__entry(
778 __field(dev_t, dev)
779 __field(ino_t, ino)
780 __field(const char *, name)
781 __field(nid_t, cino)
782 __field(int, err)
783 ),
784
785 TP_fast_assign(
786 __entry->dev = dir->i_sb->s_dev;
787 __entry->ino = dir->i_ino;
788 __entry->name = dentry->d_name.name;
789 __entry->cino = ino;
790 __entry->err = err;
791 ),
792
793 TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
794 show_dev_ino(__entry),
795 __entry->name,
796 __entry->cino,
797 __entry->err)
798);
799
800TRACE_EVENT(f2fs_readdir,
801
802 TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err),
803
804 TP_ARGS(dir, start_pos, end_pos, err),
805
806 TP_STRUCT__entry(
807 __field(dev_t, dev)
808 __field(ino_t, ino)
809 __field(loff_t, start)
810 __field(loff_t, end)
811 __field(int, err)
812 ),
813
814 TP_fast_assign(
815 __entry->dev = dir->i_sb->s_dev;
816 __entry->ino = dir->i_ino;
817 __entry->start = start_pos;
818 __entry->end = end_pos;
819 __entry->err = err;
820 ),
821
822 TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d",
823 show_dev_ino(__entry),
824 __entry->start,
825 __entry->end,
826 __entry->err)
827);
828
732TRACE_EVENT(f2fs_fallocate, 829TRACE_EVENT(f2fs_fallocate,
733 830
734 TP_PROTO(struct inode *inode, int mode, 831 TP_PROTO(struct inode *inode, int mode,
@@ -1287,6 +1384,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
1287 TP_ARGS(dev, blkstart, blklen) 1384 TP_ARGS(dev, blkstart, blklen)
1288); 1385);
1289 1386
1387DEFINE_EVENT(f2fs_discard, f2fs_remove_discard,
1388
1389 TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
1390
1391 TP_ARGS(dev, blkstart, blklen)
1392);
1393
1290TRACE_EVENT(f2fs_issue_reset_zone, 1394TRACE_EVENT(f2fs_issue_reset_zone,
1291 1395
1292 TP_PROTO(struct block_device *dev, block_t blkstart), 1396 TP_PROTO(struct block_device *dev, block_t blkstart),