aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 12:07:36 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 12:07:36 -0500
commit09cb6464fe5e7fcd5177911429badd139c4481b7 (patch)
tree5f7af2d0778f699053da6ed2e43662fff2d51e73
parent19d37ce2a7159ee30bd59d14fe5fe13c932bd5b7 (diff)
parentc0ed4405a99ec9be2a0f062eaafc002d8d26c99f (diff)
Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch series contains several performance tuning patches regarding to the IO submission flow, in addition to supporting new features such as a ZBC-base drive and multiple devices. It also includes some major bug fixes such as: - checkpoint version control - fdatasync-related roll-forward recovery routine - memory boundary or null-pointer access in corner cases - missing error cases It has various minor clean-up patches as well" * tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits) f2fs: fix a missing size change in f2fs_setattr f2fs: fix to access nullified flush_cmd_control pointer f2fs: free meta pages if sanity check for ckpt is failed f2fs: detect wrong layout f2fs: call sync_fs when f2fs is idle Revert "f2fs: use percpu_counter for # of dirty pages in inode" f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage f2fs: do not activate auto_recovery for fallocated i_size f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack f2fs: fix 32-bit build f2fs: set ->owner for debugfs status file's file_operations f2fs: fix incorrect free inode count in ->statfs f2fs: drop duplicate header timer.h f2fs: fix wrong AUTO_RECOVER condition f2fs: do not recover i_size if it's valid f2fs: fix fdatasync f2fs: fix to account total free nid correctly f2fs: fix an infinite loop when flush nodes in cp f2fs: don't wait writeback for datas during checkpoint f2fs: fix wrong written_valid_blocks counting ...
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/checkpoint.c34
-rw-r--r--fs/f2fs/data.c192
-rw-r--r--fs/f2fs/debug.c29
-rw-r--r--fs/f2fs/dir.c30
-rw-r--r--fs/f2fs/extent_cache.c2
-rw-r--r--fs/f2fs/f2fs.h197
-rw-r--r--fs/f2fs/file.c67
-rw-r--r--fs/f2fs/gc.c29
-rw-r--r--fs/f2fs/inline.c14
-rw-r--r--fs/f2fs/inode.c47
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/f2fs/node.c226
-rw-r--r--fs/f2fs/node.h13
-rw-r--r--fs/f2fs/recovery.c46
-rw-r--r--fs/f2fs/segment.c236
-rw-r--r--fs/f2fs/segment.h28
-rw-r--r--fs/f2fs/shrinker.c10
-rw-r--r--fs/f2fs/super.c281
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--include/linux/f2fs_fs.h10
-rw-r--r--include/trace/events/f2fs.h21
22 files changed, 1048 insertions, 476 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 6fe23af509e1..8f487692c21f 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
384 if (error) 384 if (error)
385 return error; 385 return error;
386 386
387 f2fs_mark_inode_dirty_sync(inode); 387 f2fs_mark_inode_dirty_sync(inode, true);
388 388
389 if (default_acl) { 389 if (default_acl) {
390 error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, 390 error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d935c06a84f0..f73ee9534d83 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -228,7 +228,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
228 f2fs_put_page(page, 0); 228 f2fs_put_page(page, 0);
229 229
230 if (readahead) 230 if (readahead)
231 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true); 231 ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
232} 232}
233 233
234static int f2fs_write_meta_page(struct page *page, 234static int f2fs_write_meta_page(struct page *page,
@@ -770,7 +770,12 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
770 770
771 /* Sanity checking of checkpoint */ 771 /* Sanity checking of checkpoint */
772 if (sanity_check_ckpt(sbi)) 772 if (sanity_check_ckpt(sbi))
773 goto fail_no_cp; 773 goto free_fail_no_cp;
774
775 if (cur_page == cp1)
776 sbi->cur_cp_pack = 1;
777 else
778 sbi->cur_cp_pack = 2;
774 779
775 if (cp_blks <= 1) 780 if (cp_blks <= 1)
776 goto done; 781 goto done;
@@ -793,6 +798,9 @@ done:
793 f2fs_put_page(cp2, 1); 798 f2fs_put_page(cp2, 1);
794 return 0; 799 return 0;
795 800
801free_fail_no_cp:
802 f2fs_put_page(cp1, 1);
803 f2fs_put_page(cp2, 1);
796fail_no_cp: 804fail_no_cp:
797 kfree(sbi->ckpt); 805 kfree(sbi->ckpt);
798 return -EINVAL; 806 return -EINVAL;
@@ -921,7 +929,11 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
921 inode = igrab(&fi->vfs_inode); 929 inode = igrab(&fi->vfs_inode);
922 spin_unlock(&sbi->inode_lock[DIRTY_META]); 930 spin_unlock(&sbi->inode_lock[DIRTY_META]);
923 if (inode) { 931 if (inode) {
924 update_inode_page(inode); 932 sync_inode_metadata(inode, 0);
933
934 /* it's on eviction */
935 if (is_inode_flag_set(inode, FI_DIRTY_INODE))
936 update_inode_page(inode);
925 iput(inode); 937 iput(inode);
926 } 938 }
927 }; 939 };
@@ -987,7 +999,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
987{ 999{
988 up_write(&sbi->node_write); 1000 up_write(&sbi->node_write);
989 1001
990 build_free_nids(sbi); 1002 build_free_nids(sbi, false);
991 f2fs_unlock_all(sbi); 1003 f2fs_unlock_all(sbi);
992} 1004}
993 1005
@@ -998,7 +1010,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
998 for (;;) { 1010 for (;;) {
999 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 1011 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
1000 1012
1001 if (!atomic_read(&sbi->nr_wb_bios)) 1013 if (!get_pages(sbi, F2FS_WB_CP_DATA))
1002 break; 1014 break;
1003 1015
1004 io_schedule_timeout(5*HZ); 1016 io_schedule_timeout(5*HZ);
@@ -1123,7 +1135,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1123 le32_to_cpu(ckpt->checksum_offset))) 1135 le32_to_cpu(ckpt->checksum_offset)))
1124 = cpu_to_le32(crc32); 1136 = cpu_to_le32(crc32);
1125 1137
1126 start_blk = __start_cp_addr(sbi); 1138 start_blk = __start_cp_next_addr(sbi);
1127 1139
1128 /* need to wait for end_io results */ 1140 /* need to wait for end_io results */
1129 wait_on_all_pages_writeback(sbi); 1141 wait_on_all_pages_writeback(sbi);
@@ -1184,9 +1196,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1184 if (unlikely(f2fs_cp_error(sbi))) 1196 if (unlikely(f2fs_cp_error(sbi)))
1185 return -EIO; 1197 return -EIO;
1186 1198
1187 clear_prefree_segments(sbi, cpc);
1188 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1199 clear_sbi_flag(sbi, SBI_IS_DIRTY);
1189 clear_sbi_flag(sbi, SBI_NEED_CP); 1200 clear_sbi_flag(sbi, SBI_NEED_CP);
1201 __set_cp_next_pack(sbi);
1190 1202
1191 /* 1203 /*
1192 * redirty superblock if metadata like node page or inode cache is 1204 * redirty superblock if metadata like node page or inode cache is
@@ -1261,8 +1273,12 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1261 1273
1262 /* unlock all the fs_lock[] in do_checkpoint() */ 1274 /* unlock all the fs_lock[] in do_checkpoint() */
1263 err = do_checkpoint(sbi, cpc); 1275 err = do_checkpoint(sbi, cpc);
1264 1276 if (err) {
1265 f2fs_wait_all_discard_bio(sbi); 1277 release_discard_addrs(sbi);
1278 } else {
1279 clear_prefree_segments(sbi, cpc);
1280 f2fs_wait_all_discard_bio(sbi);
1281 }
1266 1282
1267 unblock_operations(sbi); 1283 unblock_operations(sbi);
1268 stat_inc_cp_count(sbi->stat_info); 1284 stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9e5561fa4cb6..7c344b3ad70f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -29,6 +29,26 @@
29#include "trace.h" 29#include "trace.h"
30#include <trace/events/f2fs.h> 30#include <trace/events/f2fs.h>
31 31
32static bool __is_cp_guaranteed(struct page *page)
33{
34 struct address_space *mapping = page->mapping;
35 struct inode *inode;
36 struct f2fs_sb_info *sbi;
37
38 if (!mapping)
39 return false;
40
41 inode = mapping->host;
42 sbi = F2FS_I_SB(inode);
43
44 if (inode->i_ino == F2FS_META_INO(sbi) ||
45 inode->i_ino == F2FS_NODE_INO(sbi) ||
46 S_ISDIR(inode->i_mode) ||
47 is_cold_data(page))
48 return true;
49 return false;
50}
51
32static void f2fs_read_end_io(struct bio *bio) 52static void f2fs_read_end_io(struct bio *bio)
33{ 53{
34 struct bio_vec *bvec; 54 struct bio_vec *bvec;
@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
71 91
72 bio_for_each_segment_all(bvec, bio, i) { 92 bio_for_each_segment_all(bvec, bio, i) {
73 struct page *page = bvec->bv_page; 93 struct page *page = bvec->bv_page;
94 enum count_type type = WB_DATA_TYPE(page);
74 95
75 fscrypt_pullback_bio_page(&page, true); 96 fscrypt_pullback_bio_page(&page, true);
76 97
@@ -78,9 +99,11 @@ static void f2fs_write_end_io(struct bio *bio)
78 mapping_set_error(page->mapping, -EIO); 99 mapping_set_error(page->mapping, -EIO);
79 f2fs_stop_checkpoint(sbi, true); 100 f2fs_stop_checkpoint(sbi, true);
80 } 101 }
102 dec_page_count(sbi, type);
103 clear_cold_data(page);
81 end_page_writeback(page); 104 end_page_writeback(page);
82 } 105 }
83 if (atomic_dec_and_test(&sbi->nr_wb_bios) && 106 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
84 wq_has_sleeper(&sbi->cp_wait)) 107 wq_has_sleeper(&sbi->cp_wait))
85 wake_up(&sbi->cp_wait); 108 wake_up(&sbi->cp_wait);
86 109
@@ -88,6 +111,46 @@ static void f2fs_write_end_io(struct bio *bio)
88} 111}
89 112
90/* 113/*
114 * Return true, if pre_bio's bdev is same as its target device.
115 */
116struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
117 block_t blk_addr, struct bio *bio)
118{
119 struct block_device *bdev = sbi->sb->s_bdev;
120 int i;
121
122 for (i = 0; i < sbi->s_ndevs; i++) {
123 if (FDEV(i).start_blk <= blk_addr &&
124 FDEV(i).end_blk >= blk_addr) {
125 blk_addr -= FDEV(i).start_blk;
126 bdev = FDEV(i).bdev;
127 break;
128 }
129 }
130 if (bio) {
131 bio->bi_bdev = bdev;
132 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
133 }
134 return bdev;
135}
136
137int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
138{
139 int i;
140
141 for (i = 0; i < sbi->s_ndevs; i++)
142 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
143 return i;
144 return 0;
145}
146
147static bool __same_bdev(struct f2fs_sb_info *sbi,
148 block_t blk_addr, struct bio *bio)
149{
150 return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
151}
152
153/*
91 * Low-level block read/write IO operations. 154 * Low-level block read/write IO operations.
92 */ 155 */
93static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, 156static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
@@ -97,8 +160,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
97 160
98 bio = f2fs_bio_alloc(npages); 161 bio = f2fs_bio_alloc(npages);
99 162
100 bio->bi_bdev = sbi->sb->s_bdev; 163 f2fs_target_device(sbi, blk_addr, bio);
101 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
102 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 164 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
103 bio->bi_private = is_read ? NULL : sbi; 165 bio->bi_private = is_read ? NULL : sbi;
104 166
@@ -109,8 +171,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
109 struct bio *bio, enum page_type type) 171 struct bio *bio, enum page_type type)
110{ 172{
111 if (!is_read_io(bio_op(bio))) { 173 if (!is_read_io(bio_op(bio))) {
112 atomic_inc(&sbi->nr_wb_bios); 174 if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
113 if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
114 current->plug && (type == DATA || type == NODE)) 175 current->plug && (type == DATA || type == NODE))
115 blk_finish_plug(current->plug); 176 blk_finish_plug(current->plug);
116 } 177 }
@@ -268,22 +329,24 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
268 verify_block_addr(sbi, fio->old_blkaddr); 329 verify_block_addr(sbi, fio->old_blkaddr);
269 verify_block_addr(sbi, fio->new_blkaddr); 330 verify_block_addr(sbi, fio->new_blkaddr);
270 331
332 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
333
334 if (!is_read)
335 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
336
271 down_write(&io->io_rwsem); 337 down_write(&io->io_rwsem);
272 338
273 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || 339 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
274 (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags))) 340 (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
341 !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
275 __submit_merged_bio(io); 342 __submit_merged_bio(io);
276alloc_new: 343alloc_new:
277 if (io->bio == NULL) { 344 if (io->bio == NULL) {
278 int bio_blocks = MAX_BIO_BLOCKS(sbi);
279
280 io->bio = __bio_alloc(sbi, fio->new_blkaddr, 345 io->bio = __bio_alloc(sbi, fio->new_blkaddr,
281 bio_blocks, is_read); 346 BIO_MAX_PAGES, is_read);
282 io->fio = *fio; 347 io->fio = *fio;
283 } 348 }
284 349
285 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
286
287 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < 350 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
288 PAGE_SIZE) { 351 PAGE_SIZE) {
289 __submit_merged_bio(io); 352 __submit_merged_bio(io);
@@ -588,7 +651,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
588 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 651 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
589 struct f2fs_summary sum; 652 struct f2fs_summary sum;
590 struct node_info ni; 653 struct node_info ni;
591 int seg = CURSEG_WARM_DATA;
592 pgoff_t fofs; 654 pgoff_t fofs;
593 blkcnt_t count = 1; 655 blkcnt_t count = 1;
594 656
@@ -606,11 +668,8 @@ alloc:
606 get_node_info(sbi, dn->nid, &ni); 668 get_node_info(sbi, dn->nid, &ni);
607 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 669 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
608 670
609 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
610 seg = CURSEG_DIRECT_IO;
611
612 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, 671 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
613 &sum, seg); 672 &sum, CURSEG_WARM_DATA);
614 set_data_blkaddr(dn); 673 set_data_blkaddr(dn);
615 674
616 /* update i_size */ 675 /* update i_size */
@@ -622,11 +681,18 @@ alloc:
622 return 0; 681 return 0;
623} 682}
624 683
625ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) 684static inline bool __force_buffered_io(struct inode *inode, int rw)
685{
686 return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
687 (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
688 F2FS_I_SB(inode)->s_ndevs);
689}
690
691int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
626{ 692{
627 struct inode *inode = file_inode(iocb->ki_filp); 693 struct inode *inode = file_inode(iocb->ki_filp);
628 struct f2fs_map_blocks map; 694 struct f2fs_map_blocks map;
629 ssize_t ret = 0; 695 int err = 0;
630 696
631 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 697 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
632 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); 698 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@ -638,19 +704,22 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
638 map.m_next_pgofs = NULL; 704 map.m_next_pgofs = NULL;
639 705
640 if (iocb->ki_flags & IOCB_DIRECT) { 706 if (iocb->ki_flags & IOCB_DIRECT) {
641 ret = f2fs_convert_inline_inode(inode); 707 err = f2fs_convert_inline_inode(inode);
642 if (ret) 708 if (err)
643 return ret; 709 return err;
644 return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); 710 return f2fs_map_blocks(inode, &map, 1,
711 __force_buffered_io(inode, WRITE) ?
712 F2FS_GET_BLOCK_PRE_AIO :
713 F2FS_GET_BLOCK_PRE_DIO);
645 } 714 }
646 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) { 715 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
647 ret = f2fs_convert_inline_inode(inode); 716 err = f2fs_convert_inline_inode(inode);
648 if (ret) 717 if (err)
649 return ret; 718 return err;
650 } 719 }
651 if (!f2fs_has_inline_data(inode)) 720 if (!f2fs_has_inline_data(inode))
652 return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); 721 return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
653 return ret; 722 return err;
654} 723}
655 724
656/* 725/*
@@ -674,7 +743,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
674 unsigned int ofs_in_node, last_ofs_in_node; 743 unsigned int ofs_in_node, last_ofs_in_node;
675 blkcnt_t prealloc; 744 blkcnt_t prealloc;
676 struct extent_info ei; 745 struct extent_info ei;
677 bool allocated = false;
678 block_t blkaddr; 746 block_t blkaddr;
679 747
680 if (!maxblocks) 748 if (!maxblocks)
@@ -714,7 +782,7 @@ next_dnode:
714 } 782 }
715 783
716 prealloc = 0; 784 prealloc = 0;
717 ofs_in_node = dn.ofs_in_node; 785 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
718 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 786 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
719 787
720next_block: 788next_block:
@@ -733,10 +801,8 @@ next_block:
733 } 801 }
734 } else { 802 } else {
735 err = __allocate_data_block(&dn); 803 err = __allocate_data_block(&dn);
736 if (!err) { 804 if (!err)
737 set_inode_flag(inode, FI_APPEND_WRITE); 805 set_inode_flag(inode, FI_APPEND_WRITE);
738 allocated = true;
739 }
740 } 806 }
741 if (err) 807 if (err)
742 goto sync_out; 808 goto sync_out;
@@ -791,7 +857,6 @@ skip:
791 err = reserve_new_blocks(&dn, prealloc); 857 err = reserve_new_blocks(&dn, prealloc);
792 if (err) 858 if (err)
793 goto sync_out; 859 goto sync_out;
794 allocated = dn.node_changed;
795 860
796 map->m_len += dn.ofs_in_node - ofs_in_node; 861 map->m_len += dn.ofs_in_node - ofs_in_node;
797 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 862 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -810,9 +875,8 @@ skip:
810 875
811 if (create) { 876 if (create) {
812 f2fs_unlock_op(sbi); 877 f2fs_unlock_op(sbi);
813 f2fs_balance_fs(sbi, allocated); 878 f2fs_balance_fs(sbi, dn.node_changed);
814 } 879 }
815 allocated = false;
816 goto next_dnode; 880 goto next_dnode;
817 881
818sync_out: 882sync_out:
@@ -820,7 +884,7 @@ sync_out:
820unlock_out: 884unlock_out:
821 if (create) { 885 if (create) {
822 f2fs_unlock_op(sbi); 886 f2fs_unlock_op(sbi);
823 f2fs_balance_fs(sbi, allocated); 887 f2fs_balance_fs(sbi, dn.node_changed);
824 } 888 }
825out: 889out:
826 trace_f2fs_map_blocks(inode, map, err); 890 trace_f2fs_map_blocks(inode, map, err);
@@ -832,19 +896,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
832 pgoff_t *next_pgofs) 896 pgoff_t *next_pgofs)
833{ 897{
834 struct f2fs_map_blocks map; 898 struct f2fs_map_blocks map;
835 int ret; 899 int err;
836 900
837 map.m_lblk = iblock; 901 map.m_lblk = iblock;
838 map.m_len = bh->b_size >> inode->i_blkbits; 902 map.m_len = bh->b_size >> inode->i_blkbits;
839 map.m_next_pgofs = next_pgofs; 903 map.m_next_pgofs = next_pgofs;
840 904
841 ret = f2fs_map_blocks(inode, &map, create, flag); 905 err = f2fs_map_blocks(inode, &map, create, flag);
842 if (!ret) { 906 if (!err) {
843 map_bh(bh, inode->i_sb, map.m_pblk); 907 map_bh(bh, inode->i_sb, map.m_pblk);
844 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; 908 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
845 bh->b_size = map.m_len << inode->i_blkbits; 909 bh->b_size = map.m_len << inode->i_blkbits;
846 } 910 }
847 return ret; 911 return err;
848} 912}
849 913
850static int get_data_block(struct inode *inode, sector_t iblock, 914static int get_data_block(struct inode *inode, sector_t iblock,
@@ -889,7 +953,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
889 struct buffer_head map_bh; 953 struct buffer_head map_bh;
890 sector_t start_blk, last_blk; 954 sector_t start_blk, last_blk;
891 pgoff_t next_pgofs; 955 pgoff_t next_pgofs;
892 loff_t isize;
893 u64 logical = 0, phys = 0, size = 0; 956 u64 logical = 0, phys = 0, size = 0;
894 u32 flags = 0; 957 u32 flags = 0;
895 int ret = 0; 958 int ret = 0;
@@ -906,13 +969,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
906 969
907 inode_lock(inode); 970 inode_lock(inode);
908 971
909 isize = i_size_read(inode);
910 if (start >= isize)
911 goto out;
912
913 if (start + len > isize)
914 len = isize - start;
915
916 if (logical_to_blk(inode, len) == 0) 972 if (logical_to_blk(inode, len) == 0)
917 len = blk_to_logical(inode, 1); 973 len = blk_to_logical(inode, 1);
918 974
@@ -931,13 +987,11 @@ next:
931 /* HOLE */ 987 /* HOLE */
932 if (!buffer_mapped(&map_bh)) { 988 if (!buffer_mapped(&map_bh)) {
933 start_blk = next_pgofs; 989 start_blk = next_pgofs;
934 /* Go through holes util pass the EOF */ 990
935 if (blk_to_logical(inode, start_blk) < isize) 991 if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
992 F2FS_I_SB(inode)->max_file_blocks))
936 goto prep_next; 993 goto prep_next;
937 /* Found a hole beyond isize means no more extents. 994
938 * Note that the premise is that filesystems don't
939 * punch holes beyond isize and keep size unchanged.
940 */
941 flags |= FIEMAP_EXTENT_LAST; 995 flags |= FIEMAP_EXTENT_LAST;
942 } 996 }
943 997
@@ -980,7 +1034,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
980{ 1034{
981 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1035 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
982 struct fscrypt_ctx *ctx = NULL; 1036 struct fscrypt_ctx *ctx = NULL;
983 struct block_device *bdev = sbi->sb->s_bdev;
984 struct bio *bio; 1037 struct bio *bio;
985 1038
986 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { 1039 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -998,8 +1051,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
998 fscrypt_release_ctx(ctx); 1051 fscrypt_release_ctx(ctx);
999 return ERR_PTR(-ENOMEM); 1052 return ERR_PTR(-ENOMEM);
1000 } 1053 }
1001 bio->bi_bdev = bdev; 1054 f2fs_target_device(sbi, blkaddr, bio);
1002 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
1003 bio->bi_end_io = f2fs_read_end_io; 1055 bio->bi_end_io = f2fs_read_end_io;
1004 bio->bi_private = ctx; 1056 bio->bi_private = ctx;
1005 1057
@@ -1094,7 +1146,8 @@ got_it:
1094 * This page will go to BIO. Do we need to send this 1146 * This page will go to BIO. Do we need to send this
1095 * BIO off first? 1147 * BIO off first?
1096 */ 1148 */
1097 if (bio && (last_block_in_bio != block_nr - 1)) { 1149 if (bio && (last_block_in_bio != block_nr - 1 ||
1150 !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
1098submit_and_realloc: 1151submit_and_realloc:
1099 __submit_bio(F2FS_I_SB(inode), bio, DATA); 1152 __submit_bio(F2FS_I_SB(inode), bio, DATA);
1100 bio = NULL; 1153 bio = NULL;
@@ -1309,7 +1362,6 @@ done:
1309 if (err && err != -ENOENT) 1362 if (err && err != -ENOENT)
1310 goto redirty_out; 1363 goto redirty_out;
1311 1364
1312 clear_cold_data(page);
1313out: 1365out:
1314 inode_dec_dirty_pages(inode); 1366 inode_dec_dirty_pages(inode);
1315 if (err) 1367 if (err)
@@ -1330,6 +1382,8 @@ out:
1330 1382
1331redirty_out: 1383redirty_out:
1332 redirty_page_for_writepage(wbc, page); 1384 redirty_page_for_writepage(wbc, page);
1385 if (!err)
1386 return AOP_WRITEPAGE_ACTIVATE;
1333 unlock_page(page); 1387 unlock_page(page);
1334 return err; 1388 return err;
1335} 1389}
@@ -1425,6 +1479,15 @@ continue_unlock:
1425 1479
1426 ret = mapping->a_ops->writepage(page, wbc); 1480 ret = mapping->a_ops->writepage(page, wbc);
1427 if (unlikely(ret)) { 1481 if (unlikely(ret)) {
1482 /*
1483 * keep nr_to_write, since vfs uses this to
1484 * get # of written pages.
1485 */
1486 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1487 unlock_page(page);
1488 ret = 0;
1489 continue;
1490 }
1428 done_index = page->index + 1; 1491 done_index = page->index + 1;
1429 done = 1; 1492 done = 1;
1430 break; 1493 break;
@@ -1712,7 +1775,6 @@ static int f2fs_write_end(struct file *file,
1712 goto unlock_out; 1775 goto unlock_out;
1713 1776
1714 set_page_dirty(page); 1777 set_page_dirty(page);
1715 clear_cold_data(page);
1716 1778
1717 if (pos + copied > i_size_read(inode)) 1779 if (pos + copied > i_size_read(inode))
1718 f2fs_i_size_write(inode, pos + copied); 1780 f2fs_i_size_write(inode, pos + copied);
@@ -1749,9 +1811,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1749 if (err) 1811 if (err)
1750 return err; 1812 return err;
1751 1813
1752 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1814 if (__force_buffered_io(inode, rw))
1753 return 0;
1754 if (test_opt(F2FS_I_SB(inode), LFS))
1755 return 0; 1815 return 0;
1756 1816
1757 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1817 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@ -1783,12 +1843,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
1783 return; 1843 return;
1784 1844
1785 if (PageDirty(page)) { 1845 if (PageDirty(page)) {
1786 if (inode->i_ino == F2FS_META_INO(sbi)) 1846 if (inode->i_ino == F2FS_META_INO(sbi)) {
1787 dec_page_count(sbi, F2FS_DIRTY_META); 1847 dec_page_count(sbi, F2FS_DIRTY_META);
1788 else if (inode->i_ino == F2FS_NODE_INO(sbi)) 1848 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
1789 dec_page_count(sbi, F2FS_DIRTY_NODES); 1849 dec_page_count(sbi, F2FS_DIRTY_NODES);
1790 else 1850 } else {
1791 inode_dec_dirty_pages(inode); 1851 inode_dec_dirty_pages(inode);
1852 remove_dirty_inode(inode);
1853 }
1792 } 1854 }
1793 1855
1794 /* This is atomic written page, keep Private */ 1856 /* This is atomic written page, keep Private */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fb245bd302e4..fbd5184140d0 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,7 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 50 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; 51 si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 52 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
53 si->wb_bios = atomic_read(&sbi->nr_wb_bios); 53 si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
54 si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
54 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 55 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
55 si->rsvd_segs = reserved_segments(sbi); 56 si->rsvd_segs = reserved_segments(sbi);
56 si->overp_segs = overprovision_segments(sbi); 57 si->overp_segs = overprovision_segments(sbi);
@@ -74,7 +75,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
74 si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; 75 si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
75 si->sits = MAIN_SEGS(sbi); 76 si->sits = MAIN_SEGS(sbi);
76 si->dirty_sits = SIT_I(sbi)->dirty_sentries; 77 si->dirty_sits = SIT_I(sbi)->dirty_sentries;
77 si->fnids = NM_I(sbi)->fcnt; 78 si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
79 si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
78 si->bg_gc = sbi->bg_gc; 80 si->bg_gc = sbi->bg_gc;
79 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) 81 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
80 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) 82 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -194,7 +196,9 @@ get_cache:
194 si->cache_mem += sizeof(struct flush_cmd_control); 196 si->cache_mem += sizeof(struct flush_cmd_control);
195 197
196 /* free nids */ 198 /* free nids */
197 si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); 199 si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
200 NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
201 sizeof(struct free_nid);
198 si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); 202 si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
199 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 203 si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
200 sizeof(struct nat_entry_set); 204 sizeof(struct nat_entry_set);
@@ -310,22 +314,22 @@ static int stat_show(struct seq_file *s, void *v)
310 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 314 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
311 si->ext_tree, si->zombie_tree, si->ext_node); 315 si->ext_tree, si->zombie_tree, si->ext_node);
312 seq_puts(s, "\nBalancing F2FS Async:\n"); 316 seq_puts(s, "\nBalancing F2FS Async:\n");
313 seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", 317 seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
314 si->inmem_pages, si->wb_bios); 318 si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
315 seq_printf(s, " - nodes: %4lld in %4d\n", 319 seq_printf(s, " - nodes: %4d in %4d\n",
316 si->ndirty_node, si->node_pages); 320 si->ndirty_node, si->node_pages);
317 seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", 321 seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
318 si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); 322 si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
319 seq_printf(s, " - datas: %4lld in files:%4d\n", 323 seq_printf(s, " - datas: %4d in files:%4d\n",
320 si->ndirty_data, si->ndirty_files); 324 si->ndirty_data, si->ndirty_files);
321 seq_printf(s, " - meta: %4lld in %4d\n", 325 seq_printf(s, " - meta: %4d in %4d\n",
322 si->ndirty_meta, si->meta_pages); 326 si->ndirty_meta, si->meta_pages);
323 seq_printf(s, " - imeta: %4lld\n", 327 seq_printf(s, " - imeta: %4d\n",
324 si->ndirty_imeta); 328 si->ndirty_imeta);
325 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 329 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
326 si->dirty_nats, si->nats, si->dirty_sits, si->sits); 330 si->dirty_nats, si->nats, si->dirty_sits, si->sits);
327 seq_printf(s, " - free_nids: %9d\n", 331 seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
328 si->fnids); 332 si->free_nids, si->alloc_nids);
329 seq_puts(s, "\nDistribution of User Blocks:"); 333 seq_puts(s, "\nDistribution of User Blocks:");
330 seq_puts(s, " [ valid | invalid | free ]\n"); 334 seq_puts(s, " [ valid | invalid | free ]\n");
331 seq_puts(s, " ["); 335 seq_puts(s, " [");
@@ -373,6 +377,7 @@ static int stat_open(struct inode *inode, struct file *file)
373} 377}
374 378
375static const struct file_operations stat_fops = { 379static const struct file_operations stat_fops = {
380 .owner = THIS_MODULE,
376 .open = stat_open, 381 .open = stat_open,
377 .read = seq_read, 382 .read = seq_read,
378 .llseek = seq_lseek, 383 .llseek = seq_lseek,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 369f4513be37..827c5daef4fc 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -136,7 +136,7 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
136 136
137 /* show encrypted name */ 137 /* show encrypted name */
138 if (fname->hash) { 138 if (fname->hash) {
139 if (de->hash_code == fname->hash) 139 if (de->hash_code == cpu_to_le32(fname->hash))
140 goto found; 140 goto found;
141 } else if (de_name.len == name->len && 141 } else if (de_name.len == name->len &&
142 de->hash_code == namehash && 142 de->hash_code == namehash &&
@@ -313,7 +313,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
313 set_page_dirty(page); 313 set_page_dirty(page);
314 314
315 dir->i_mtime = dir->i_ctime = current_time(dir); 315 dir->i_mtime = dir->i_ctime = current_time(dir);
316 f2fs_mark_inode_dirty_sync(dir); 316 f2fs_mark_inode_dirty_sync(dir, false);
317 f2fs_put_page(page, 1); 317 f2fs_put_page(page, 1);
318} 318}
319 319
@@ -466,7 +466,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
466 clear_inode_flag(inode, FI_NEW_INODE); 466 clear_inode_flag(inode, FI_NEW_INODE);
467 } 467 }
468 dir->i_mtime = dir->i_ctime = current_time(dir); 468 dir->i_mtime = dir->i_ctime = current_time(dir);
469 f2fs_mark_inode_dirty_sync(dir); 469 f2fs_mark_inode_dirty_sync(dir, false);
470 470
471 if (F2FS_I(dir)->i_current_depth != current_depth) 471 if (F2FS_I(dir)->i_current_depth != current_depth)
472 f2fs_i_depth_write(dir, current_depth); 472 f2fs_i_depth_write(dir, current_depth);
@@ -731,7 +731,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
731 set_page_dirty(page); 731 set_page_dirty(page);
732 732
733 dir->i_ctime = dir->i_mtime = current_time(dir); 733 dir->i_ctime = dir->i_mtime = current_time(dir);
734 f2fs_mark_inode_dirty_sync(dir); 734 f2fs_mark_inode_dirty_sync(dir, false);
735 735
736 if (inode) 736 if (inode)
737 f2fs_drop_nlink(dir, inode); 737 f2fs_drop_nlink(dir, inode);
@@ -742,6 +742,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
742 ClearPagePrivate(page); 742 ClearPagePrivate(page);
743 ClearPageUptodate(page); 743 ClearPageUptodate(page);
744 inode_dec_dirty_pages(dir); 744 inode_dec_dirty_pages(dir);
745 remove_dirty_inode(dir);
745 } 746 }
746 f2fs_put_page(page, 1); 747 f2fs_put_page(page, 1);
747} 748}
@@ -784,7 +785,7 @@ bool f2fs_empty_dir(struct inode *dir)
784 return true; 785 return true;
785} 786}
786 787
787bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, 788int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
788 unsigned int start_pos, struct fscrypt_str *fstr) 789 unsigned int start_pos, struct fscrypt_str *fstr)
789{ 790{
790 unsigned char d_type = DT_UNKNOWN; 791 unsigned char d_type = DT_UNKNOWN;
@@ -819,7 +820,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
819 (u32)de->hash_code, 0, 820 (u32)de->hash_code, 0,
820 &de_name, fstr); 821 &de_name, fstr);
821 if (err) 822 if (err)
822 return true; 823 return err;
823 824
824 de_name = *fstr; 825 de_name = *fstr;
825 fstr->len = save_len; 826 fstr->len = save_len;
@@ -827,12 +828,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
827 828
828 if (!dir_emit(ctx, de_name.name, de_name.len, 829 if (!dir_emit(ctx, de_name.name, de_name.len,
829 le32_to_cpu(de->ino), d_type)) 830 le32_to_cpu(de->ino), d_type))
830 return true; 831 return 1;
831 832
832 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 833 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
833 ctx->pos = start_pos + bit_pos; 834 ctx->pos = start_pos + bit_pos;
834 } 835 }
835 return false; 836 return 0;
836} 837}
837 838
838static int f2fs_readdir(struct file *file, struct dir_context *ctx) 839static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -871,17 +872,21 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
871 dentry_page = get_lock_data_page(inode, n, false); 872 dentry_page = get_lock_data_page(inode, n, false);
872 if (IS_ERR(dentry_page)) { 873 if (IS_ERR(dentry_page)) {
873 err = PTR_ERR(dentry_page); 874 err = PTR_ERR(dentry_page);
874 if (err == -ENOENT) 875 if (err == -ENOENT) {
876 err = 0;
875 continue; 877 continue;
876 else 878 } else {
877 goto out; 879 goto out;
880 }
878 } 881 }
879 882
880 dentry_blk = kmap(dentry_page); 883 dentry_blk = kmap(dentry_page);
881 884
882 make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); 885 make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
883 886
884 if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) { 887 err = f2fs_fill_dentries(ctx, &d,
888 n * NR_DENTRY_IN_BLOCK, &fstr);
889 if (err) {
885 kunmap(dentry_page); 890 kunmap(dentry_page);
886 f2fs_put_page(dentry_page, 1); 891 f2fs_put_page(dentry_page, 1);
887 break; 892 break;
@@ -891,10 +896,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
891 kunmap(dentry_page); 896 kunmap(dentry_page);
892 f2fs_put_page(dentry_page, 1); 897 f2fs_put_page(dentry_page, 1);
893 } 898 }
894 err = 0;
895out: 899out:
896 fscrypt_fname_free_buffer(&fstr); 900 fscrypt_fname_free_buffer(&fstr);
897 return err; 901 return err < 0 ? err : 0;
898} 902}
899 903
900static int f2fs_dir_open(struct inode *inode, struct file *filp) 904static int f2fs_dir_open(struct inode *inode, struct file *filp)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2b06d4fcd954..4db44da7ef69 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -172,7 +172,7 @@ static void __drop_largest_extent(struct inode *inode,
172 172
173 if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { 173 if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
174 largest->len = 0; 174 largest->len = 0;
175 f2fs_mark_inode_dirty_sync(inode); 175 f2fs_mark_inode_dirty_sync(inode, true);
176 } 176 }
177} 177}
178 178
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2cf4f7f09e32..23c86e8cf523 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -103,7 +103,7 @@ struct f2fs_mount_info {
103}; 103};
104 104
105#define F2FS_FEATURE_ENCRYPT 0x0001 105#define F2FS_FEATURE_ENCRYPT 0x0001
106#define F2FS_FEATURE_HMSMR 0x0002 106#define F2FS_FEATURE_BLKZONED 0x0002
107 107
108#define F2FS_HAS_FEATURE(sb, mask) \ 108#define F2FS_HAS_FEATURE(sb, mask) \
109 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) 109 ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -401,6 +401,7 @@ struct f2fs_map_blocks {
401#define FADVISE_LOST_PINO_BIT 0x02 401#define FADVISE_LOST_PINO_BIT 0x02
402#define FADVISE_ENCRYPT_BIT 0x04 402#define FADVISE_ENCRYPT_BIT 0x04
403#define FADVISE_ENC_NAME_BIT 0x08 403#define FADVISE_ENC_NAME_BIT 0x08
404#define FADVISE_KEEP_SIZE_BIT 0x10
404 405
405#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) 406#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
406#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) 407#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
@@ -413,6 +414,8 @@ struct f2fs_map_blocks {
413#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT) 414#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
414#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT) 415#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
415#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) 416#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
417#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
418#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
416 419
417#define DEF_DIR_LEVEL 0 420#define DEF_DIR_LEVEL 0
418 421
@@ -428,7 +431,7 @@ struct f2fs_inode_info {
428 /* Use below internally in f2fs*/ 431 /* Use below internally in f2fs*/
429 unsigned long flags; /* use to pass per-file flags */ 432 unsigned long flags; /* use to pass per-file flags */
430 struct rw_semaphore i_sem; /* protect fi info */ 433 struct rw_semaphore i_sem; /* protect fi info */
431 struct percpu_counter dirty_pages; /* # of dirty pages */ 434 atomic_t dirty_pages; /* # of dirty pages */
432 f2fs_hash_t chash; /* hash value of given file name */ 435 f2fs_hash_t chash; /* hash value of given file name */
433 unsigned int clevel; /* maximum level of given file name */ 436 unsigned int clevel; /* maximum level of given file name */
434 nid_t i_xattr_nid; /* node id that contains xattrs */ 437 nid_t i_xattr_nid; /* node id that contains xattrs */
@@ -493,20 +496,26 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
493 return __is_extent_mergeable(cur, front); 496 return __is_extent_mergeable(cur, front);
494} 497}
495 498
496extern void f2fs_mark_inode_dirty_sync(struct inode *); 499extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
497static inline void __try_update_largest_extent(struct inode *inode, 500static inline void __try_update_largest_extent(struct inode *inode,
498 struct extent_tree *et, struct extent_node *en) 501 struct extent_tree *et, struct extent_node *en)
499{ 502{
500 if (en->ei.len > et->largest.len) { 503 if (en->ei.len > et->largest.len) {
501 et->largest = en->ei; 504 et->largest = en->ei;
502 f2fs_mark_inode_dirty_sync(inode); 505 f2fs_mark_inode_dirty_sync(inode, true);
503 } 506 }
504} 507}
505 508
509enum nid_list {
510 FREE_NID_LIST,
511 ALLOC_NID_LIST,
512 MAX_NID_LIST,
513};
514
506struct f2fs_nm_info { 515struct f2fs_nm_info {
507 block_t nat_blkaddr; /* base disk address of NAT */ 516 block_t nat_blkaddr; /* base disk address of NAT */
508 nid_t max_nid; /* maximum possible node ids */ 517 nid_t max_nid; /* maximum possible node ids */
509 nid_t available_nids; /* maximum available node ids */ 518 nid_t available_nids; /* # of available node ids */
510 nid_t next_scan_nid; /* the next nid to be scanned */ 519 nid_t next_scan_nid; /* the next nid to be scanned */
511 unsigned int ram_thresh; /* control the memory footprint */ 520 unsigned int ram_thresh; /* control the memory footprint */
512 unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ 521 unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
@@ -522,9 +531,9 @@ struct f2fs_nm_info {
522 531
523 /* free node ids management */ 532 /* free node ids management */
524 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 533 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
525 struct list_head free_nid_list; /* a list for free nids */ 534 struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
526 spinlock_t free_nid_list_lock; /* protect free nid list */ 535 unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
527 unsigned int fcnt; /* the number of free node id */ 536 spinlock_t nid_list_lock; /* protect nid lists ops */
528 struct mutex build_lock; /* lock for build free nids */ 537 struct mutex build_lock; /* lock for build free nids */
529 538
530 /* for checkpoint */ 539 /* for checkpoint */
@@ -585,7 +594,6 @@ enum {
585 CURSEG_WARM_NODE, /* direct node blocks of normal files */ 594 CURSEG_WARM_NODE, /* direct node blocks of normal files */
586 CURSEG_COLD_NODE, /* indirect node blocks */ 595 CURSEG_COLD_NODE, /* indirect node blocks */
587 NO_CHECK_TYPE, 596 NO_CHECK_TYPE,
588 CURSEG_DIRECT_IO, /* to use for the direct IO path */
589}; 597};
590 598
591struct flush_cmd { 599struct flush_cmd {
@@ -649,6 +657,7 @@ struct f2fs_sm_info {
649 * f2fs monitors the number of several block types such as on-writeback, 657 * f2fs monitors the number of several block types such as on-writeback,
650 * dirty dentry blocks, dirty node blocks, and dirty meta blocks. 658 * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
651 */ 659 */
660#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
652enum count_type { 661enum count_type {
653 F2FS_DIRTY_DENTS, 662 F2FS_DIRTY_DENTS,
654 F2FS_DIRTY_DATA, 663 F2FS_DIRTY_DATA,
@@ -656,6 +665,8 @@ enum count_type {
656 F2FS_DIRTY_META, 665 F2FS_DIRTY_META,
657 F2FS_INMEM_PAGES, 666 F2FS_INMEM_PAGES,
658 F2FS_DIRTY_IMETA, 667 F2FS_DIRTY_IMETA,
668 F2FS_WB_CP_DATA,
669 F2FS_WB_DATA,
659 NR_COUNT_TYPE, 670 NR_COUNT_TYPE,
660}; 671};
661 672
@@ -704,6 +715,20 @@ struct f2fs_bio_info {
704 struct rw_semaphore io_rwsem; /* blocking op for bio */ 715 struct rw_semaphore io_rwsem; /* blocking op for bio */
705}; 716};
706 717
718#define FDEV(i) (sbi->devs[i])
719#define RDEV(i) (raw_super->devs[i])
720struct f2fs_dev_info {
721 struct block_device *bdev;
722 char path[MAX_PATH_LEN];
723 unsigned int total_segments;
724 block_t start_blk;
725 block_t end_blk;
726#ifdef CONFIG_BLK_DEV_ZONED
727 unsigned int nr_blkz; /* Total number of zones */
728 u8 *blkz_type; /* Array of zones type */
729#endif
730};
731
707enum inode_type { 732enum inode_type {
708 DIR_INODE, /* for dirty dir inode */ 733 DIR_INODE, /* for dirty dir inode */
709 FILE_INODE, /* for dirty regular/symlink inode */ 734 FILE_INODE, /* for dirty regular/symlink inode */
@@ -750,6 +775,12 @@ struct f2fs_sb_info {
750 u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; 775 u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
751 u8 key_prefix_size; 776 u8 key_prefix_size;
752#endif 777#endif
778
779#ifdef CONFIG_BLK_DEV_ZONED
780 unsigned int blocks_per_blkz; /* F2FS blocks per zone */
781 unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */
782#endif
783
753 /* for node-related operations */ 784 /* for node-related operations */
754 struct f2fs_nm_info *nm_info; /* node manager */ 785 struct f2fs_nm_info *nm_info; /* node manager */
755 struct inode *node_inode; /* cache node blocks */ 786 struct inode *node_inode; /* cache node blocks */
@@ -764,6 +795,7 @@ struct f2fs_sb_info {
764 795
765 /* for checkpoint */ 796 /* for checkpoint */
766 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 797 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
798 int cur_cp_pack; /* remain current cp pack */
767 spinlock_t cp_lock; /* for flag in ckpt */ 799 spinlock_t cp_lock; /* for flag in ckpt */
768 struct inode *meta_inode; /* cache meta blocks */ 800 struct inode *meta_inode; /* cache meta blocks */
769 struct mutex cp_mutex; /* checkpoint procedure lock */ 801 struct mutex cp_mutex; /* checkpoint procedure lock */
@@ -815,10 +847,9 @@ struct f2fs_sb_info {
815 block_t discard_blks; /* discard command candidats */ 847 block_t discard_blks; /* discard command candidats */
816 block_t last_valid_block_count; /* for recovery */ 848 block_t last_valid_block_count; /* for recovery */
817 u32 s_next_generation; /* for NFS support */ 849 u32 s_next_generation; /* for NFS support */
818 atomic_t nr_wb_bios; /* # of writeback bios */
819 850
820 /* # of pages, see count_type */ 851 /* # of pages, see count_type */
821 struct percpu_counter nr_pages[NR_COUNT_TYPE]; 852 atomic_t nr_pages[NR_COUNT_TYPE];
822 /* # of allocated blocks */ 853 /* # of allocated blocks */
823 struct percpu_counter alloc_valid_block_count; 854 struct percpu_counter alloc_valid_block_count;
824 855
@@ -863,6 +894,8 @@ struct f2fs_sb_info {
863 894
864 /* For shrinker support */ 895 /* For shrinker support */
865 struct list_head s_list; 896 struct list_head s_list;
897 int s_ndevs; /* number of devices */
898 struct f2fs_dev_info *devs; /* for device list */
866 struct mutex umount_mutex; 899 struct mutex umount_mutex;
867 unsigned int shrinker_run_no; 900 unsigned int shrinker_run_no;
868 901
@@ -1105,13 +1138,6 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
1105 spin_unlock(&sbi->cp_lock); 1138 spin_unlock(&sbi->cp_lock);
1106} 1139}
1107 1140
1108static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
1109{
1110 struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
1111
1112 return blk_queue_discard(q);
1113}
1114
1115static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) 1141static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
1116{ 1142{
1117 down_read(&sbi->cp_rwsem); 1143 down_read(&sbi->cp_rwsem);
@@ -1232,9 +1258,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
1232 1258
1233static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 1259static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
1234{ 1260{
1235 percpu_counter_inc(&sbi->nr_pages[count_type]); 1261 atomic_inc(&sbi->nr_pages[count_type]);
1236 1262
1237 if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES) 1263 if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
1264 count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
1238 return; 1265 return;
1239 1266
1240 set_sbi_flag(sbi, SBI_IS_DIRTY); 1267 set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -1242,14 +1269,14 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
1242 1269
1243static inline void inode_inc_dirty_pages(struct inode *inode) 1270static inline void inode_inc_dirty_pages(struct inode *inode)
1244{ 1271{
1245 percpu_counter_inc(&F2FS_I(inode)->dirty_pages); 1272 atomic_inc(&F2FS_I(inode)->dirty_pages);
1246 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1273 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1247 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1274 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1248} 1275}
1249 1276
1250static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1277static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
1251{ 1278{
1252 percpu_counter_dec(&sbi->nr_pages[count_type]); 1279 atomic_dec(&sbi->nr_pages[count_type]);
1253} 1280}
1254 1281
1255static inline void inode_dec_dirty_pages(struct inode *inode) 1282static inline void inode_dec_dirty_pages(struct inode *inode)
@@ -1258,19 +1285,19 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
1258 !S_ISLNK(inode->i_mode)) 1285 !S_ISLNK(inode->i_mode))
1259 return; 1286 return;
1260 1287
1261 percpu_counter_dec(&F2FS_I(inode)->dirty_pages); 1288 atomic_dec(&F2FS_I(inode)->dirty_pages);
1262 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1289 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1263 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1290 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1264} 1291}
1265 1292
1266static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) 1293static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
1267{ 1294{
1268 return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); 1295 return atomic_read(&sbi->nr_pages[count_type]);
1269} 1296}
1270 1297
1271static inline s64 get_dirty_pages(struct inode *inode) 1298static inline int get_dirty_pages(struct inode *inode)
1272{ 1299{
1273 return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages); 1300 return atomic_read(&F2FS_I(inode)->dirty_pages);
1274} 1301}
1275 1302
1276static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 1303static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
@@ -1329,22 +1356,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
1329 1356
1330static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) 1357static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
1331{ 1358{
1332 block_t start_addr; 1359 block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
1333 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1334 unsigned long long ckpt_version = cur_cp_version(ckpt);
1335
1336 start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
1337 1360
1338 /* 1361 if (sbi->cur_cp_pack == 2)
1339 * odd numbered checkpoint should at cp segment 0
1340 * and even segment must be at cp segment 1
1341 */
1342 if (!(ckpt_version & 1))
1343 start_addr += sbi->blocks_per_seg; 1362 start_addr += sbi->blocks_per_seg;
1363 return start_addr;
1364}
1365
1366static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
1367{
1368 block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
1344 1369
1370 if (sbi->cur_cp_pack == 1)
1371 start_addr += sbi->blocks_per_seg;
1345 return start_addr; 1372 return start_addr;
1346} 1373}
1347 1374
1375static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
1376{
1377 sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
1378}
1379
1348static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) 1380static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
1349{ 1381{
1350 return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); 1382 return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
@@ -1621,7 +1653,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
1621 return; 1653 return;
1622 case FI_DATA_EXIST: 1654 case FI_DATA_EXIST:
1623 case FI_INLINE_DOTS: 1655 case FI_INLINE_DOTS:
1624 f2fs_mark_inode_dirty_sync(inode); 1656 f2fs_mark_inode_dirty_sync(inode, true);
1625 } 1657 }
1626} 1658}
1627 1659
@@ -1648,7 +1680,7 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode)
1648{ 1680{
1649 F2FS_I(inode)->i_acl_mode = mode; 1681 F2FS_I(inode)->i_acl_mode = mode;
1650 set_inode_flag(inode, FI_ACL_MODE); 1682 set_inode_flag(inode, FI_ACL_MODE);
1651 f2fs_mark_inode_dirty_sync(inode); 1683 f2fs_mark_inode_dirty_sync(inode, false);
1652} 1684}
1653 1685
1654static inline void f2fs_i_links_write(struct inode *inode, bool inc) 1686static inline void f2fs_i_links_write(struct inode *inode, bool inc)
@@ -1657,7 +1689,7 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
1657 inc_nlink(inode); 1689 inc_nlink(inode);
1658 else 1690 else
1659 drop_nlink(inode); 1691 drop_nlink(inode);
1660 f2fs_mark_inode_dirty_sync(inode); 1692 f2fs_mark_inode_dirty_sync(inode, true);
1661} 1693}
1662 1694
1663static inline void f2fs_i_blocks_write(struct inode *inode, 1695static inline void f2fs_i_blocks_write(struct inode *inode,
@@ -1668,7 +1700,7 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
1668 1700
1669 inode->i_blocks = add ? inode->i_blocks + diff : 1701 inode->i_blocks = add ? inode->i_blocks + diff :
1670 inode->i_blocks - diff; 1702 inode->i_blocks - diff;
1671 f2fs_mark_inode_dirty_sync(inode); 1703 f2fs_mark_inode_dirty_sync(inode, true);
1672 if (clean || recover) 1704 if (clean || recover)
1673 set_inode_flag(inode, FI_AUTO_RECOVER); 1705 set_inode_flag(inode, FI_AUTO_RECOVER);
1674} 1706}
@@ -1682,34 +1714,27 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
1682 return; 1714 return;
1683 1715
1684 i_size_write(inode, i_size); 1716 i_size_write(inode, i_size);
1685 f2fs_mark_inode_dirty_sync(inode); 1717 f2fs_mark_inode_dirty_sync(inode, true);
1686 if (clean || recover) 1718 if (clean || recover)
1687 set_inode_flag(inode, FI_AUTO_RECOVER); 1719 set_inode_flag(inode, FI_AUTO_RECOVER);
1688} 1720}
1689 1721
1690static inline bool f2fs_skip_inode_update(struct inode *inode)
1691{
1692 if (!is_inode_flag_set(inode, FI_AUTO_RECOVER))
1693 return false;
1694 return F2FS_I(inode)->last_disk_size == i_size_read(inode);
1695}
1696
1697static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) 1722static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
1698{ 1723{
1699 F2FS_I(inode)->i_current_depth = depth; 1724 F2FS_I(inode)->i_current_depth = depth;
1700 f2fs_mark_inode_dirty_sync(inode); 1725 f2fs_mark_inode_dirty_sync(inode, true);
1701} 1726}
1702 1727
1703static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) 1728static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
1704{ 1729{
1705 F2FS_I(inode)->i_xattr_nid = xnid; 1730 F2FS_I(inode)->i_xattr_nid = xnid;
1706 f2fs_mark_inode_dirty_sync(inode); 1731 f2fs_mark_inode_dirty_sync(inode, true);
1707} 1732}
1708 1733
1709static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) 1734static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)
1710{ 1735{
1711 F2FS_I(inode)->i_pino = pino; 1736 F2FS_I(inode)->i_pino = pino;
1712 f2fs_mark_inode_dirty_sync(inode); 1737 f2fs_mark_inode_dirty_sync(inode, true);
1713} 1738}
1714 1739
1715static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) 1740static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
@@ -1837,13 +1862,31 @@ static inline int is_file(struct inode *inode, int type)
1837static inline void set_file(struct inode *inode, int type) 1862static inline void set_file(struct inode *inode, int type)
1838{ 1863{
1839 F2FS_I(inode)->i_advise |= type; 1864 F2FS_I(inode)->i_advise |= type;
1840 f2fs_mark_inode_dirty_sync(inode); 1865 f2fs_mark_inode_dirty_sync(inode, true);
1841} 1866}
1842 1867
1843static inline void clear_file(struct inode *inode, int type) 1868static inline void clear_file(struct inode *inode, int type)
1844{ 1869{
1845 F2FS_I(inode)->i_advise &= ~type; 1870 F2FS_I(inode)->i_advise &= ~type;
1846 f2fs_mark_inode_dirty_sync(inode); 1871 f2fs_mark_inode_dirty_sync(inode, true);
1872}
1873
1874static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
1875{
1876 if (dsync) {
1877 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1878 bool ret;
1879
1880 spin_lock(&sbi->inode_lock[DIRTY_META]);
1881 ret = list_empty(&F2FS_I(inode)->gdirty_list);
1882 spin_unlock(&sbi->inode_lock[DIRTY_META]);
1883 return ret;
1884 }
1885 if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) ||
1886 file_keep_isize(inode) ||
1887 i_size_read(inode) & PAGE_MASK)
1888 return false;
1889 return F2FS_I(inode)->last_disk_size == i_size_read(inode);
1847} 1890}
1848 1891
1849static inline int f2fs_readonly(struct super_block *sb) 1892static inline int f2fs_readonly(struct super_block *sb)
@@ -1955,7 +1998,7 @@ void set_de_type(struct f2fs_dir_entry *, umode_t);
1955unsigned char get_de_type(struct f2fs_dir_entry *); 1998unsigned char get_de_type(struct f2fs_dir_entry *);
1956struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 1999struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
1957 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 2000 f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
1958bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 2001int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
1959 unsigned int, struct fscrypt_str *); 2002 unsigned int, struct fscrypt_str *);
1960void do_make_empty_dir(struct inode *, struct inode *, 2003void do_make_empty_dir(struct inode *, struct inode *,
1961 struct f2fs_dentry_ptr *); 2004 struct f2fs_dentry_ptr *);
@@ -1995,7 +2038,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
1995/* 2038/*
1996 * super.c 2039 * super.c
1997 */ 2040 */
1998int f2fs_inode_dirtied(struct inode *); 2041int f2fs_inode_dirtied(struct inode *, bool);
1999void f2fs_inode_synced(struct inode *); 2042void f2fs_inode_synced(struct inode *);
2000int f2fs_commit_super(struct f2fs_sb_info *, bool); 2043int f2fs_commit_super(struct f2fs_sb_info *, bool);
2001int f2fs_sync_fs(struct super_block *, int); 2044int f2fs_sync_fs(struct super_block *, int);
@@ -2034,7 +2077,7 @@ void move_node_page(struct page *, int);
2034int fsync_node_pages(struct f2fs_sb_info *, struct inode *, 2077int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
2035 struct writeback_control *, bool); 2078 struct writeback_control *, bool);
2036int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); 2079int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
2037void build_free_nids(struct f2fs_sb_info *); 2080void build_free_nids(struct f2fs_sb_info *, bool);
2038bool alloc_nid(struct f2fs_sb_info *, nid_t *); 2081bool alloc_nid(struct f2fs_sb_info *, nid_t *);
2039void alloc_nid_done(struct f2fs_sb_info *, nid_t); 2082void alloc_nid_done(struct f2fs_sb_info *, nid_t);
2040void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 2083void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
@@ -2060,7 +2103,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *, bool);
2060void f2fs_balance_fs_bg(struct f2fs_sb_info *); 2103void f2fs_balance_fs_bg(struct f2fs_sb_info *);
2061int f2fs_issue_flush(struct f2fs_sb_info *); 2104int f2fs_issue_flush(struct f2fs_sb_info *);
2062int create_flush_cmd_control(struct f2fs_sb_info *); 2105int create_flush_cmd_control(struct f2fs_sb_info *);
2063void destroy_flush_cmd_control(struct f2fs_sb_info *); 2106void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
2064void invalidate_blocks(struct f2fs_sb_info *, block_t); 2107void invalidate_blocks(struct f2fs_sb_info *, block_t);
2065bool is_checkpointed_data(struct f2fs_sb_info *, block_t); 2108bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
2066void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 2109void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
@@ -2132,12 +2175,15 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
2132void f2fs_flush_merged_bios(struct f2fs_sb_info *); 2175void f2fs_flush_merged_bios(struct f2fs_sb_info *);
2133int f2fs_submit_page_bio(struct f2fs_io_info *); 2176int f2fs_submit_page_bio(struct f2fs_io_info *);
2134void f2fs_submit_page_mbio(struct f2fs_io_info *); 2177void f2fs_submit_page_mbio(struct f2fs_io_info *);
2178struct block_device *f2fs_target_device(struct f2fs_sb_info *,
2179 block_t, struct bio *);
2180int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
2135void set_data_blkaddr(struct dnode_of_data *); 2181void set_data_blkaddr(struct dnode_of_data *);
2136void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 2182void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
2137int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); 2183int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
2138int reserve_new_block(struct dnode_of_data *); 2184int reserve_new_block(struct dnode_of_data *);
2139int f2fs_get_block(struct dnode_of_data *, pgoff_t); 2185int f2fs_get_block(struct dnode_of_data *, pgoff_t);
2140ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); 2186int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
2141int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 2187int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
2142struct page *get_read_data_page(struct inode *, pgoff_t, int, bool); 2188struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
2143struct page *find_data_page(struct inode *, pgoff_t); 2189struct page *find_data_page(struct inode *, pgoff_t);
@@ -2160,7 +2206,7 @@ int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
2160int start_gc_thread(struct f2fs_sb_info *); 2206int start_gc_thread(struct f2fs_sb_info *);
2161void stop_gc_thread(struct f2fs_sb_info *); 2207void stop_gc_thread(struct f2fs_sb_info *);
2162block_t start_bidx_of_node(unsigned int, struct inode *); 2208block_t start_bidx_of_node(unsigned int, struct inode *);
2163int f2fs_gc(struct f2fs_sb_info *, bool); 2209int f2fs_gc(struct f2fs_sb_info *, bool, bool);
2164void build_gc_manager(struct f2fs_sb_info *); 2210void build_gc_manager(struct f2fs_sb_info *);
2165 2211
2166/* 2212/*
@@ -2181,12 +2227,12 @@ struct f2fs_stat_info {
2181 unsigned long long hit_largest, hit_cached, hit_rbtree; 2227 unsigned long long hit_largest, hit_cached, hit_rbtree;
2182 unsigned long long hit_total, total_ext; 2228 unsigned long long hit_total, total_ext;
2183 int ext_tree, zombie_tree, ext_node; 2229 int ext_tree, zombie_tree, ext_node;
2184 s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; 2230 int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
2185 s64 inmem_pages; 2231 int inmem_pages;
2186 unsigned int ndirty_dirs, ndirty_files, ndirty_all; 2232 unsigned int ndirty_dirs, ndirty_files, ndirty_all;
2187 int nats, dirty_nats, sits, dirty_sits, fnids; 2233 int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
2188 int total_count, utilization; 2234 int total_count, utilization;
2189 int bg_gc, wb_bios; 2235 int bg_gc, nr_wb_cp_data, nr_wb_data;
2190 int inline_xattr, inline_inode, inline_dir, orphans; 2236 int inline_xattr, inline_inode, inline_dir, orphans;
2191 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; 2237 unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
2192 unsigned int bimodal, avg_vblocks; 2238 unsigned int bimodal, avg_vblocks;
@@ -2412,9 +2458,30 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb)
2412 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); 2458 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
2413} 2459}
2414 2460
2415static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb) 2461static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
2462{
2463 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED);
2464}
2465
2466#ifdef CONFIG_BLK_DEV_ZONED
2467static inline int get_blkz_type(struct f2fs_sb_info *sbi,
2468 struct block_device *bdev, block_t blkaddr)
2469{
2470 unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
2471 int i;
2472
2473 for (i = 0; i < sbi->s_ndevs; i++)
2474 if (FDEV(i).bdev == bdev)
2475 return FDEV(i).blkz_type[zno];
2476 return -EINVAL;
2477}
2478#endif
2479
2480static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
2416{ 2481{
2417 return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR); 2482 struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
2483
2484 return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb);
2418} 2485}
2419 2486
2420static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) 2487static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c7865073cd26..383b5c29f46b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -94,8 +94,6 @@ mapped:
94 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 94 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
95 f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); 95 f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
96 96
97 /* if gced page is attached, don't write to cold segment */
98 clear_cold_data(page);
99out: 97out:
100 sb_end_pagefault(inode->i_sb); 98 sb_end_pagefault(inode->i_sb);
101 f2fs_update_time(sbi, REQ_TIME); 99 f2fs_update_time(sbi, REQ_TIME);
@@ -210,7 +208,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
210 } 208 }
211 209
212 /* if the inode is dirty, let's recover all the time */ 210 /* if the inode is dirty, let's recover all the time */
213 if (!datasync && !f2fs_skip_inode_update(inode)) { 211 if (!f2fs_skip_inode_update(inode, datasync)) {
214 f2fs_write_inode(inode, NULL); 212 f2fs_write_inode(inode, NULL);
215 goto go_write; 213 goto go_write;
216 } 214 }
@@ -264,7 +262,7 @@ sync_nodes:
264 } 262 }
265 263
266 if (need_inode_block_update(sbi, ino)) { 264 if (need_inode_block_update(sbi, ino)) {
267 f2fs_mark_inode_dirty_sync(inode); 265 f2fs_mark_inode_dirty_sync(inode, true);
268 f2fs_write_inode(inode, NULL); 266 f2fs_write_inode(inode, NULL);
269 goto sync_nodes; 267 goto sync_nodes;
270 } 268 }
@@ -632,7 +630,7 @@ int f2fs_truncate(struct inode *inode)
632 return err; 630 return err;
633 631
634 inode->i_mtime = inode->i_ctime = current_time(inode); 632 inode->i_mtime = inode->i_ctime = current_time(inode);
635 f2fs_mark_inode_dirty_sync(inode); 633 f2fs_mark_inode_dirty_sync(inode, false);
636 return 0; 634 return 0;
637} 635}
638 636
@@ -679,6 +677,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
679{ 677{
680 struct inode *inode = d_inode(dentry); 678 struct inode *inode = d_inode(dentry);
681 int err; 679 int err;
680 bool size_changed = false;
682 681
683 err = setattr_prepare(dentry, attr); 682 err = setattr_prepare(dentry, attr);
684 if (err) 683 if (err)
@@ -694,7 +693,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
694 err = f2fs_truncate(inode); 693 err = f2fs_truncate(inode);
695 if (err) 694 if (err)
696 return err; 695 return err;
697 f2fs_balance_fs(F2FS_I_SB(inode), true);
698 } else { 696 } else {
699 /* 697 /*
700 * do not trim all blocks after i_size if target size is 698 * do not trim all blocks after i_size if target size is
@@ -710,6 +708,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
710 } 708 }
711 inode->i_mtime = inode->i_ctime = current_time(inode); 709 inode->i_mtime = inode->i_ctime = current_time(inode);
712 } 710 }
711
712 size_changed = true;
713 } 713 }
714 714
715 __setattr_copy(inode, attr); 715 __setattr_copy(inode, attr);
@@ -722,7 +722,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
722 } 722 }
723 } 723 }
724 724
725 f2fs_mark_inode_dirty_sync(inode); 725 /* file size may changed here */
726 f2fs_mark_inode_dirty_sync(inode, size_changed);
727
728 /* inode change will produce dirty node pages flushed by checkpoint */
729 f2fs_balance_fs(F2FS_I_SB(inode), true);
730
726 return err; 731 return err;
727} 732}
728 733
@@ -967,7 +972,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
967 new_size = (dst + i) << PAGE_SHIFT; 972 new_size = (dst + i) << PAGE_SHIFT;
968 if (dst_inode->i_size < new_size) 973 if (dst_inode->i_size < new_size)
969 f2fs_i_size_write(dst_inode, new_size); 974 f2fs_i_size_write(dst_inode, new_size);
970 } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); 975 } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
971 976
972 f2fs_put_dnode(&dn); 977 f2fs_put_dnode(&dn);
973 } else { 978 } else {
@@ -1218,6 +1223,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1218 ret = f2fs_do_zero_range(&dn, index, end); 1223 ret = f2fs_do_zero_range(&dn, index, end);
1219 f2fs_put_dnode(&dn); 1224 f2fs_put_dnode(&dn);
1220 f2fs_unlock_op(sbi); 1225 f2fs_unlock_op(sbi);
1226
1227 f2fs_balance_fs(sbi, dn.node_changed);
1228
1221 if (ret) 1229 if (ret)
1222 goto out; 1230 goto out;
1223 1231
@@ -1313,15 +1321,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1313 pgoff_t pg_end; 1321 pgoff_t pg_end;
1314 loff_t new_size = i_size_read(inode); 1322 loff_t new_size = i_size_read(inode);
1315 loff_t off_end; 1323 loff_t off_end;
1316 int ret; 1324 int err;
1317 1325
1318 ret = inode_newsize_ok(inode, (len + offset)); 1326 err = inode_newsize_ok(inode, (len + offset));
1319 if (ret) 1327 if (err)
1320 return ret; 1328 return err;
1321 1329
1322 ret = f2fs_convert_inline_inode(inode); 1330 err = f2fs_convert_inline_inode(inode);
1323 if (ret) 1331 if (err)
1324 return ret; 1332 return err;
1325 1333
1326 f2fs_balance_fs(sbi, true); 1334 f2fs_balance_fs(sbi, true);
1327 1335
@@ -1333,12 +1341,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1333 if (off_end) 1341 if (off_end)
1334 map.m_len++; 1342 map.m_len++;
1335 1343
1336 ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); 1344 err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
1337 if (ret) { 1345 if (err) {
1338 pgoff_t last_off; 1346 pgoff_t last_off;
1339 1347
1340 if (!map.m_len) 1348 if (!map.m_len)
1341 return ret; 1349 return err;
1342 1350
1343 last_off = map.m_lblk + map.m_len - 1; 1351 last_off = map.m_lblk + map.m_len - 1;
1344 1352
@@ -1352,7 +1360,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1352 if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) 1360 if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
1353 f2fs_i_size_write(inode, new_size); 1361 f2fs_i_size_write(inode, new_size);
1354 1362
1355 return ret; 1363 return err;
1356} 1364}
1357 1365
1358static long f2fs_fallocate(struct file *file, int mode, 1366static long f2fs_fallocate(struct file *file, int mode,
@@ -1393,7 +1401,9 @@ static long f2fs_fallocate(struct file *file, int mode,
1393 1401
1394 if (!ret) { 1402 if (!ret) {
1395 inode->i_mtime = inode->i_ctime = current_time(inode); 1403 inode->i_mtime = inode->i_ctime = current_time(inode);
1396 f2fs_mark_inode_dirty_sync(inode); 1404 f2fs_mark_inode_dirty_sync(inode, false);
1405 if (mode & FALLOC_FL_KEEP_SIZE)
1406 file_set_keep_isize(inode);
1397 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1407 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1398 } 1408 }
1399 1409
@@ -1526,7 +1536,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1526 goto out; 1536 goto out;
1527 1537
1528 f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, 1538 f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
1529 "Unexpected flush for atomic writes: ino=%lu, npages=%lld", 1539 "Unexpected flush for atomic writes: ino=%lu, npages=%u",
1530 inode->i_ino, get_dirty_pages(inode)); 1540 inode->i_ino, get_dirty_pages(inode));
1531 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); 1541 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
1532 if (ret) 1542 if (ret)
@@ -1842,7 +1852,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
1842 mutex_lock(&sbi->gc_mutex); 1852 mutex_lock(&sbi->gc_mutex);
1843 } 1853 }
1844 1854
1845 ret = f2fs_gc(sbi, sync); 1855 ret = f2fs_gc(sbi, sync, true);
1846out: 1856out:
1847 mnt_drop_write_file(filp); 1857 mnt_drop_write_file(filp);
1848 return ret; 1858 return ret;
@@ -2256,12 +2266,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2256 inode_lock(inode); 2266 inode_lock(inode);
2257 ret = generic_write_checks(iocb, from); 2267 ret = generic_write_checks(iocb, from);
2258 if (ret > 0) { 2268 if (ret > 0) {
2259 ret = f2fs_preallocate_blocks(iocb, from); 2269 int err = f2fs_preallocate_blocks(iocb, from);
2260 if (!ret) { 2270
2261 blk_start_plug(&plug); 2271 if (err) {
2262 ret = __generic_file_write_iter(iocb, from); 2272 inode_unlock(inode);
2263 blk_finish_plug(&plug); 2273 return err;
2264 } 2274 }
2275 blk_start_plug(&plug);
2276 ret = __generic_file_write_iter(iocb, from);
2277 blk_finish_plug(&plug);
2265 } 2278 }
2266 inode_unlock(inode); 2279 inode_unlock(inode);
2267 2280
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index fcca12b97a2a..88bfc3dff496 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -82,7 +82,7 @@ static int gc_thread_func(void *data)
82 stat_inc_bggc_count(sbi); 82 stat_inc_bggc_count(sbi);
83 83
84 /* if return value is not zero, no victim was selected */ 84 /* if return value is not zero, no victim was selected */
85 if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC))) 85 if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
86 wait_ms = gc_th->no_gc_sleep_time; 86 wait_ms = gc_th->no_gc_sleep_time;
87 87
88 trace_f2fs_background_gc(sbi->sb, wait_ms, 88 trace_f2fs_background_gc(sbi->sb, wait_ms,
@@ -544,7 +544,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
544 return true; 544 return true;
545} 545}
546 546
547static void move_encrypted_block(struct inode *inode, block_t bidx) 547static void move_encrypted_block(struct inode *inode, block_t bidx,
548 unsigned int segno, int off)
548{ 549{
549 struct f2fs_io_info fio = { 550 struct f2fs_io_info fio = {
550 .sbi = F2FS_I_SB(inode), 551 .sbi = F2FS_I_SB(inode),
@@ -565,6 +566,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
565 if (!page) 566 if (!page)
566 return; 567 return;
567 568
569 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
570 goto out;
571
568 set_new_dnode(&dn, inode, NULL, NULL, 0); 572 set_new_dnode(&dn, inode, NULL, NULL, 0);
569 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); 573 err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
570 if (err) 574 if (err)
@@ -645,7 +649,8 @@ out:
645 f2fs_put_page(page, 1); 649 f2fs_put_page(page, 1);
646} 650}
647 651
648static void move_data_page(struct inode *inode, block_t bidx, int gc_type) 652static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
653 unsigned int segno, int off)
649{ 654{
650 struct page *page; 655 struct page *page;
651 656
@@ -653,6 +658,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
653 if (IS_ERR(page)) 658 if (IS_ERR(page))
654 return; 659 return;
655 660
661 if (!check_valid_map(F2FS_I_SB(inode), segno, off))
662 goto out;
663
656 if (gc_type == BG_GC) { 664 if (gc_type == BG_GC) {
657 if (PageWriteback(page)) 665 if (PageWriteback(page))
658 goto out; 666 goto out;
@@ -673,8 +681,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
673retry: 681retry:
674 set_page_dirty(page); 682 set_page_dirty(page);
675 f2fs_wait_on_page_writeback(page, DATA, true); 683 f2fs_wait_on_page_writeback(page, DATA, true);
676 if (clear_page_dirty_for_io(page)) 684 if (clear_page_dirty_for_io(page)) {
677 inode_dec_dirty_pages(inode); 685 inode_dec_dirty_pages(inode);
686 remove_dirty_inode(inode);
687 }
678 688
679 set_cold_data(page); 689 set_cold_data(page);
680 690
@@ -683,8 +693,6 @@ retry:
683 congestion_wait(BLK_RW_ASYNC, HZ/50); 693 congestion_wait(BLK_RW_ASYNC, HZ/50);
684 goto retry; 694 goto retry;
685 } 695 }
686
687 clear_cold_data(page);
688 } 696 }
689out: 697out:
690 f2fs_put_page(page, 1); 698 f2fs_put_page(page, 1);
@@ -794,9 +802,9 @@ next_step:
794 start_bidx = start_bidx_of_node(nofs, inode) 802 start_bidx = start_bidx_of_node(nofs, inode)
795 + ofs_in_node; 803 + ofs_in_node;
796 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 804 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
797 move_encrypted_block(inode, start_bidx); 805 move_encrypted_block(inode, start_bidx, segno, off);
798 else 806 else
799 move_data_page(inode, start_bidx, gc_type); 807 move_data_page(inode, start_bidx, gc_type, segno, off);
800 808
801 if (locked) { 809 if (locked) {
802 up_write(&fi->dio_rwsem[WRITE]); 810 up_write(&fi->dio_rwsem[WRITE]);
@@ -899,7 +907,7 @@ next:
899 return sec_freed; 907 return sec_freed;
900} 908}
901 909
902int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) 910int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
903{ 911{
904 unsigned int segno; 912 unsigned int segno;
905 int gc_type = sync ? FG_GC : BG_GC; 913 int gc_type = sync ? FG_GC : BG_GC;
@@ -940,6 +948,9 @@ gc_more:
940 if (ret) 948 if (ret)
941 goto stop; 949 goto stop;
942 } 950 }
951 } else if (gc_type == BG_GC && !background) {
952 /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
953 goto stop;
943 } 954 }
944 955
945 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) 956 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 2e7f54c191b4..e32a9e527968 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -137,8 +137,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
137 fio.old_blkaddr = dn->data_blkaddr; 137 fio.old_blkaddr = dn->data_blkaddr;
138 write_data_page(dn, &fio); 138 write_data_page(dn, &fio);
139 f2fs_wait_on_page_writeback(page, DATA, true); 139 f2fs_wait_on_page_writeback(page, DATA, true);
140 if (dirty) 140 if (dirty) {
141 inode_dec_dirty_pages(dn->inode); 141 inode_dec_dirty_pages(dn->inode);
142 remove_dirty_inode(dn->inode);
143 }
142 144
143 /* this converted inline_data should be recovered. */ 145 /* this converted inline_data should be recovered. */
144 set_inode_flag(dn->inode, FI_APPEND_WRITE); 146 set_inode_flag(dn->inode, FI_APPEND_WRITE);
@@ -419,7 +421,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
419 } 421 }
420 422
421 new_name.name = d.filename[bit_pos]; 423 new_name.name = d.filename[bit_pos];
422 new_name.len = de->name_len; 424 new_name.len = le16_to_cpu(de->name_len);
423 425
424 ino = le32_to_cpu(de->ino); 426 ino = le32_to_cpu(de->ino);
425 fake_mode = get_de_type(de) << S_SHIFT; 427 fake_mode = get_de_type(de) << S_SHIFT;
@@ -573,7 +575,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
573 f2fs_put_page(page, 1); 575 f2fs_put_page(page, 1);
574 576
575 dir->i_ctime = dir->i_mtime = current_time(dir); 577 dir->i_ctime = dir->i_mtime = current_time(dir);
576 f2fs_mark_inode_dirty_sync(dir); 578 f2fs_mark_inode_dirty_sync(dir, false);
577 579
578 if (inode) 580 if (inode)
579 f2fs_drop_nlink(dir, inode); 581 f2fs_drop_nlink(dir, inode);
@@ -610,6 +612,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
610 struct f2fs_inline_dentry *inline_dentry = NULL; 612 struct f2fs_inline_dentry *inline_dentry = NULL;
611 struct page *ipage = NULL; 613 struct page *ipage = NULL;
612 struct f2fs_dentry_ptr d; 614 struct f2fs_dentry_ptr d;
615 int err;
613 616
614 if (ctx->pos == NR_INLINE_DENTRY) 617 if (ctx->pos == NR_INLINE_DENTRY)
615 return 0; 618 return 0;
@@ -622,11 +625,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
622 625
623 make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); 626 make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
624 627
625 if (!f2fs_fill_dentries(ctx, &d, 0, fstr)) 628 err = f2fs_fill_dentries(ctx, &d, 0, fstr);
629 if (!err)
626 ctx->pos = NR_INLINE_DENTRY; 630 ctx->pos = NR_INLINE_DENTRY;
627 631
628 f2fs_put_page(ipage, 1); 632 f2fs_put_page(ipage, 1);
629 return 0; 633 return err < 0 ? err : 0;
630} 634}
631 635
632int f2fs_inline_data_fiemap(struct inode *inode, 636int f2fs_inline_data_fiemap(struct inode *inode,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d7369895a78a..af06bda51a54 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -19,10 +19,11 @@
19 19
20#include <trace/events/f2fs.h> 20#include <trace/events/f2fs.h>
21 21
22void f2fs_mark_inode_dirty_sync(struct inode *inode) 22void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
23{ 23{
24 if (f2fs_inode_dirtied(inode)) 24 if (f2fs_inode_dirtied(inode, sync))
25 return; 25 return;
26
26 mark_inode_dirty_sync(inode); 27 mark_inode_dirty_sync(inode);
27} 28}
28 29
@@ -43,7 +44,7 @@ void f2fs_set_inode_flags(struct inode *inode)
43 new_fl |= S_DIRSYNC; 44 new_fl |= S_DIRSYNC;
44 inode_set_flags(inode, new_fl, 45 inode_set_flags(inode, new_fl,
45 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 46 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
46 f2fs_mark_inode_dirty_sync(inode); 47 f2fs_mark_inode_dirty_sync(inode, false);
47} 48}
48 49
49static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 50static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -252,6 +253,7 @@ retry:
252int update_inode(struct inode *inode, struct page *node_page) 253int update_inode(struct inode *inode, struct page *node_page)
253{ 254{
254 struct f2fs_inode *ri; 255 struct f2fs_inode *ri;
256 struct extent_tree *et = F2FS_I(inode)->extent_tree;
255 257
256 f2fs_inode_synced(inode); 258 f2fs_inode_synced(inode);
257 259
@@ -267,11 +269,13 @@ int update_inode(struct inode *inode, struct page *node_page)
267 ri->i_size = cpu_to_le64(i_size_read(inode)); 269 ri->i_size = cpu_to_le64(i_size_read(inode));
268 ri->i_blocks = cpu_to_le64(inode->i_blocks); 270 ri->i_blocks = cpu_to_le64(inode->i_blocks);
269 271
270 if (F2FS_I(inode)->extent_tree) 272 if (et) {
271 set_raw_extent(&F2FS_I(inode)->extent_tree->largest, 273 read_lock(&et->lock);
272 &ri->i_ext); 274 set_raw_extent(&et->largest, &ri->i_ext);
273 else 275 read_unlock(&et->lock);
276 } else {
274 memset(&ri->i_ext, 0, sizeof(ri->i_ext)); 277 memset(&ri->i_ext, 0, sizeof(ri->i_ext));
278 }
275 set_raw_inline(inode, ri); 279 set_raw_inline(inode, ri);
276 280
277 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 281 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -335,7 +339,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
335 * We need to balance fs here to prevent from producing dirty node pages 339 * We need to balance fs here to prevent from producing dirty node pages
336 * during the urgent cleaning time when runing out of free sections. 340 * during the urgent cleaning time when runing out of free sections.
337 */ 341 */
338 if (update_inode_page(inode)) 342 if (update_inode_page(inode) && wbc && wbc->nr_to_write)
339 f2fs_balance_fs(sbi, true); 343 f2fs_balance_fs(sbi, true);
340 return 0; 344 return 0;
341} 345}
@@ -373,6 +377,9 @@ void f2fs_evict_inode(struct inode *inode)
373 goto no_delete; 377 goto no_delete;
374#endif 378#endif
375 379
380 remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
381 remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
382
376 sb_start_intwrite(inode->i_sb); 383 sb_start_intwrite(inode->i_sb);
377 set_inode_flag(inode, FI_NO_ALLOC); 384 set_inode_flag(inode, FI_NO_ALLOC);
378 i_size_write(inode, 0); 385 i_size_write(inode, 0);
@@ -384,6 +391,8 @@ retry:
384 f2fs_lock_op(sbi); 391 f2fs_lock_op(sbi);
385 err = remove_inode_page(inode); 392 err = remove_inode_page(inode);
386 f2fs_unlock_op(sbi); 393 f2fs_unlock_op(sbi);
394 if (err == -ENOENT)
395 err = 0;
387 } 396 }
388 397
389 /* give more chances, if ENOMEM case */ 398 /* give more chances, if ENOMEM case */
@@ -403,10 +412,12 @@ no_delete:
403 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 412 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
404 if (xnid) 413 if (xnid)
405 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 414 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
406 if (is_inode_flag_set(inode, FI_APPEND_WRITE)) 415 if (inode->i_nlink) {
407 add_ino_entry(sbi, inode->i_ino, APPEND_INO); 416 if (is_inode_flag_set(inode, FI_APPEND_WRITE))
408 if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) 417 add_ino_entry(sbi, inode->i_ino, APPEND_INO);
409 add_ino_entry(sbi, inode->i_ino, UPDATE_INO); 418 if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
419 add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
420 }
410 if (is_inode_flag_set(inode, FI_FREE_NID)) { 421 if (is_inode_flag_set(inode, FI_FREE_NID)) {
411 alloc_nid_failed(sbi, inode->i_ino); 422 alloc_nid_failed(sbi, inode->i_ino);
412 clear_inode_flag(inode, FI_FREE_NID); 423 clear_inode_flag(inode, FI_FREE_NID);
@@ -424,6 +435,18 @@ void handle_failed_inode(struct inode *inode)
424 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 435 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
425 struct node_info ni; 436 struct node_info ni;
426 437
438 /*
439 * clear nlink of inode in order to release resource of inode
440 * immediately.
441 */
442 clear_nlink(inode);
443
444 /*
445 * we must call this to avoid inode being remained as dirty, resulting
446 * in a panic when flushing dirty inodes in gdirty_list.
447 */
448 update_inode_page(inode);
449
427 /* don't make bad inode, since it becomes a regular file. */ 450 /* don't make bad inode, since it becomes a regular file. */
428 unlock_new_inode(inode); 451 unlock_new_inode(inode);
429 452
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 489fa0d5f914..db33b5631dc8 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -778,7 +778,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
778 up_write(&F2FS_I(old_inode)->i_sem); 778 up_write(&F2FS_I(old_inode)->i_sem);
779 779
780 old_inode->i_ctime = current_time(old_inode); 780 old_inode->i_ctime = current_time(old_inode);
781 f2fs_mark_inode_dirty_sync(old_inode); 781 f2fs_mark_inode_dirty_sync(old_inode, false);
782 782
783 f2fs_delete_entry(old_entry, old_page, old_dir, NULL); 783 f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
784 784
@@ -938,7 +938,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
938 f2fs_i_links_write(old_dir, old_nlink > 0); 938 f2fs_i_links_write(old_dir, old_nlink > 0);
939 up_write(&F2FS_I(old_dir)->i_sem); 939 up_write(&F2FS_I(old_dir)->i_sem);
940 } 940 }
941 f2fs_mark_inode_dirty_sync(old_dir); 941 f2fs_mark_inode_dirty_sync(old_dir, false);
942 942
943 /* update directory entry info of new dir inode */ 943 /* update directory entry info of new dir inode */
944 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 944 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
@@ -953,7 +953,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
953 f2fs_i_links_write(new_dir, new_nlink > 0); 953 f2fs_i_links_write(new_dir, new_nlink > 0);
954 up_write(&F2FS_I(new_dir)->i_sem); 954 up_write(&F2FS_I(new_dir)->i_sem);
955 } 955 }
956 f2fs_mark_inode_dirty_sync(new_dir); 956 f2fs_mark_inode_dirty_sync(new_dir, false);
957 957
958 f2fs_unlock_op(sbi); 958 f2fs_unlock_op(sbi);
959 959
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d1e29deb4598..b9078fdb3743 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -45,8 +45,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively 45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
46 */ 46 */
47 if (type == FREE_NIDS) { 47 if (type == FREE_NIDS) {
48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 48 mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
49 PAGE_SHIFT; 49 sizeof(struct free_nid)) >> PAGE_SHIFT;
50 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); 50 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
51 } else if (type == NAT_ENTRIES) { 51 } else if (type == NAT_ENTRIES) {
52 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 52 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
@@ -270,8 +270,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
270 e = grab_nat_entry(nm_i, nid); 270 e = grab_nat_entry(nm_i, nid);
271 node_info_from_raw_nat(&e->ni, ne); 271 node_info_from_raw_nat(&e->ni, ne);
272 } else { 272 } else {
273 f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino || 273 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
274 nat_get_blkaddr(e) != ne->block_addr || 274 nat_get_blkaddr(e) !=
275 le32_to_cpu(ne->block_addr) ||
275 nat_get_version(e) != ne->version); 276 nat_get_version(e) != ne->version);
276 } 277 }
277} 278}
@@ -1204,6 +1205,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1204 1205
1205 ret = f2fs_write_inline_data(inode, page); 1206 ret = f2fs_write_inline_data(inode, page);
1206 inode_dec_dirty_pages(inode); 1207 inode_dec_dirty_pages(inode);
1208 remove_dirty_inode(inode);
1207 if (ret) 1209 if (ret)
1208 set_page_dirty(page); 1210 set_page_dirty(page);
1209page_out: 1211page_out:
@@ -1338,7 +1340,8 @@ retry:
1338 if (unlikely(f2fs_cp_error(sbi))) { 1340 if (unlikely(f2fs_cp_error(sbi))) {
1339 f2fs_put_page(last_page, 0); 1341 f2fs_put_page(last_page, 0);
1340 pagevec_release(&pvec); 1342 pagevec_release(&pvec);
1341 return -EIO; 1343 ret = -EIO;
1344 goto out;
1342 } 1345 }
1343 1346
1344 if (!IS_DNODE(page) || !is_cold_node(page)) 1347 if (!IS_DNODE(page) || !is_cold_node(page))
@@ -1407,11 +1410,12 @@ continue_unlock:
1407 "Retry to write fsync mark: ino=%u, idx=%lx", 1410 "Retry to write fsync mark: ino=%u, idx=%lx",
1408 ino, last_page->index); 1411 ino, last_page->index);
1409 lock_page(last_page); 1412 lock_page(last_page);
1413 f2fs_wait_on_page_writeback(last_page, NODE, true);
1410 set_page_dirty(last_page); 1414 set_page_dirty(last_page);
1411 unlock_page(last_page); 1415 unlock_page(last_page);
1412 goto retry; 1416 goto retry;
1413 } 1417 }
1414 1418out:
1415 if (nwritten) 1419 if (nwritten)
1416 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); 1420 f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
1417 return ret ? -EIO: 0; 1421 return ret ? -EIO: 0;
@@ -1692,11 +1696,35 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1692 return radix_tree_lookup(&nm_i->free_nid_root, n); 1696 return radix_tree_lookup(&nm_i->free_nid_root, n);
1693} 1697}
1694 1698
1695static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, 1699static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
1696 struct free_nid *i) 1700 struct free_nid *i, enum nid_list list, bool new)
1697{ 1701{
1702 struct f2fs_nm_info *nm_i = NM_I(sbi);
1703
1704 if (new) {
1705 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
1706 if (err)
1707 return err;
1708 }
1709
1710 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1711 i->state != NID_ALLOC);
1712 nm_i->nid_cnt[list]++;
1713 list_add_tail(&i->list, &nm_i->nid_list[list]);
1714 return 0;
1715}
1716
1717static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
1718 struct free_nid *i, enum nid_list list, bool reuse)
1719{
1720 struct f2fs_nm_info *nm_i = NM_I(sbi);
1721
1722 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1723 i->state != NID_ALLOC);
1724 nm_i->nid_cnt[list]--;
1698 list_del(&i->list); 1725 list_del(&i->list);
1699 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1726 if (!reuse)
1727 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1700} 1728}
1701 1729
1702static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) 1730static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
@@ -1704,9 +1732,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1704 struct f2fs_nm_info *nm_i = NM_I(sbi); 1732 struct f2fs_nm_info *nm_i = NM_I(sbi);
1705 struct free_nid *i; 1733 struct free_nid *i;
1706 struct nat_entry *ne; 1734 struct nat_entry *ne;
1707 1735 int err;
1708 if (!available_free_memory(sbi, FREE_NIDS))
1709 return -1;
1710 1736
1711 /* 0 nid should not be used */ 1737 /* 0 nid should not be used */
1712 if (unlikely(nid == 0)) 1738 if (unlikely(nid == 0))
@@ -1729,33 +1755,30 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1729 return 0; 1755 return 0;
1730 } 1756 }
1731 1757
1732 spin_lock(&nm_i->free_nid_list_lock); 1758 spin_lock(&nm_i->nid_list_lock);
1733 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { 1759 err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
1734 spin_unlock(&nm_i->free_nid_list_lock); 1760 spin_unlock(&nm_i->nid_list_lock);
1735 radix_tree_preload_end(); 1761 radix_tree_preload_end();
1762 if (err) {
1736 kmem_cache_free(free_nid_slab, i); 1763 kmem_cache_free(free_nid_slab, i);
1737 return 0; 1764 return 0;
1738 } 1765 }
1739 list_add_tail(&i->list, &nm_i->free_nid_list);
1740 nm_i->fcnt++;
1741 spin_unlock(&nm_i->free_nid_list_lock);
1742 radix_tree_preload_end();
1743 return 1; 1766 return 1;
1744} 1767}
1745 1768
1746static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1769static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1747{ 1770{
1771 struct f2fs_nm_info *nm_i = NM_I(sbi);
1748 struct free_nid *i; 1772 struct free_nid *i;
1749 bool need_free = false; 1773 bool need_free = false;
1750 1774
1751 spin_lock(&nm_i->free_nid_list_lock); 1775 spin_lock(&nm_i->nid_list_lock);
1752 i = __lookup_free_nid_list(nm_i, nid); 1776 i = __lookup_free_nid_list(nm_i, nid);
1753 if (i && i->state == NID_NEW) { 1777 if (i && i->state == NID_NEW) {
1754 __del_from_free_nid_list(nm_i, i); 1778 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
1755 nm_i->fcnt--;
1756 need_free = true; 1779 need_free = true;
1757 } 1780 }
1758 spin_unlock(&nm_i->free_nid_list_lock); 1781 spin_unlock(&nm_i->nid_list_lock);
1759 1782
1760 if (need_free) 1783 if (need_free)
1761 kmem_cache_free(free_nid_slab, i); 1784 kmem_cache_free(free_nid_slab, i);
@@ -1778,14 +1801,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
1778 1801
1779 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1802 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1780 f2fs_bug_on(sbi, blk_addr == NEW_ADDR); 1803 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1781 if (blk_addr == NULL_ADDR) { 1804 if (blk_addr == NULL_ADDR)
1782 if (add_free_nid(sbi, start_nid, true) < 0) 1805 add_free_nid(sbi, start_nid, true);
1783 break;
1784 }
1785 } 1806 }
1786} 1807}
1787 1808
1788void build_free_nids(struct f2fs_sb_info *sbi) 1809static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1789{ 1810{
1790 struct f2fs_nm_info *nm_i = NM_I(sbi); 1811 struct f2fs_nm_info *nm_i = NM_I(sbi);
1791 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1812 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1794,7 +1815,10 @@ void build_free_nids(struct f2fs_sb_info *sbi)
1794 nid_t nid = nm_i->next_scan_nid; 1815 nid_t nid = nm_i->next_scan_nid;
1795 1816
1796 /* Enough entries */ 1817 /* Enough entries */
1797 if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) 1818 if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
1819 return;
1820
1821 if (!sync && !available_free_memory(sbi, FREE_NIDS))
1798 return; 1822 return;
1799 1823
1800 /* readahead nat pages to be scanned */ 1824 /* readahead nat pages to be scanned */
@@ -1830,7 +1854,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
1830 if (addr == NULL_ADDR) 1854 if (addr == NULL_ADDR)
1831 add_free_nid(sbi, nid, true); 1855 add_free_nid(sbi, nid, true);
1832 else 1856 else
1833 remove_free_nid(nm_i, nid); 1857 remove_free_nid(sbi, nid);
1834 } 1858 }
1835 up_read(&curseg->journal_rwsem); 1859 up_read(&curseg->journal_rwsem);
1836 up_read(&nm_i->nat_tree_lock); 1860 up_read(&nm_i->nat_tree_lock);
@@ -1839,6 +1863,13 @@ void build_free_nids(struct f2fs_sb_info *sbi)
1839 nm_i->ra_nid_pages, META_NAT, false); 1863 nm_i->ra_nid_pages, META_NAT, false);
1840} 1864}
1841 1865
1866void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
1867{
1868 mutex_lock(&NM_I(sbi)->build_lock);
1869 __build_free_nids(sbi, sync);
1870 mutex_unlock(&NM_I(sbi)->build_lock);
1871}
1872
1842/* 1873/*
1843 * If this function returns success, caller can obtain a new nid 1874 * If this function returns success, caller can obtain a new nid
1844 * from second parameter of this function. 1875 * from second parameter of this function.
@@ -1853,31 +1884,31 @@ retry:
1853 if (time_to_inject(sbi, FAULT_ALLOC_NID)) 1884 if (time_to_inject(sbi, FAULT_ALLOC_NID))
1854 return false; 1885 return false;
1855#endif 1886#endif
1856 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1887 spin_lock(&nm_i->nid_list_lock);
1857 return false;
1858 1888
1859 spin_lock(&nm_i->free_nid_list_lock); 1889 if (unlikely(nm_i->available_nids == 0)) {
1890 spin_unlock(&nm_i->nid_list_lock);
1891 return false;
1892 }
1860 1893
1861 /* We should not use stale free nids created by build_free_nids */ 1894 /* We should not use stale free nids created by build_free_nids */
1862 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1895 if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
1863 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1896 f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
1864 list_for_each_entry(i, &nm_i->free_nid_list, list) 1897 i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
1865 if (i->state == NID_NEW) 1898 struct free_nid, list);
1866 break;
1867
1868 f2fs_bug_on(sbi, i->state != NID_NEW);
1869 *nid = i->nid; 1899 *nid = i->nid;
1900
1901 __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
1870 i->state = NID_ALLOC; 1902 i->state = NID_ALLOC;
1871 nm_i->fcnt--; 1903 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
1872 spin_unlock(&nm_i->free_nid_list_lock); 1904 nm_i->available_nids--;
1905 spin_unlock(&nm_i->nid_list_lock);
1873 return true; 1906 return true;
1874 } 1907 }
1875 spin_unlock(&nm_i->free_nid_list_lock); 1908 spin_unlock(&nm_i->nid_list_lock);
1876 1909
1877 /* Let's scan nat pages and its caches to get free nids */ 1910 /* Let's scan nat pages and its caches to get free nids */
1878 mutex_lock(&nm_i->build_lock); 1911 build_free_nids(sbi, true);
1879 build_free_nids(sbi);
1880 mutex_unlock(&nm_i->build_lock);
1881 goto retry; 1912 goto retry;
1882} 1913}
1883 1914
@@ -1889,11 +1920,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1889 struct f2fs_nm_info *nm_i = NM_I(sbi); 1920 struct f2fs_nm_info *nm_i = NM_I(sbi);
1890 struct free_nid *i; 1921 struct free_nid *i;
1891 1922
1892 spin_lock(&nm_i->free_nid_list_lock); 1923 spin_lock(&nm_i->nid_list_lock);
1893 i = __lookup_free_nid_list(nm_i, nid); 1924 i = __lookup_free_nid_list(nm_i, nid);
1894 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1925 f2fs_bug_on(sbi, !i);
1895 __del_from_free_nid_list(nm_i, i); 1926 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
1896 spin_unlock(&nm_i->free_nid_list_lock); 1927 spin_unlock(&nm_i->nid_list_lock);
1897 1928
1898 kmem_cache_free(free_nid_slab, i); 1929 kmem_cache_free(free_nid_slab, i);
1899} 1930}
@@ -1910,17 +1941,22 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1910 if (!nid) 1941 if (!nid)
1911 return; 1942 return;
1912 1943
1913 spin_lock(&nm_i->free_nid_list_lock); 1944 spin_lock(&nm_i->nid_list_lock);
1914 i = __lookup_free_nid_list(nm_i, nid); 1945 i = __lookup_free_nid_list(nm_i, nid);
1915 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); 1946 f2fs_bug_on(sbi, !i);
1947
1916 if (!available_free_memory(sbi, FREE_NIDS)) { 1948 if (!available_free_memory(sbi, FREE_NIDS)) {
1917 __del_from_free_nid_list(nm_i, i); 1949 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
1918 need_free = true; 1950 need_free = true;
1919 } else { 1951 } else {
1952 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
1920 i->state = NID_NEW; 1953 i->state = NID_NEW;
1921 nm_i->fcnt++; 1954 __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
1922 } 1955 }
1923 spin_unlock(&nm_i->free_nid_list_lock); 1956
1957 nm_i->available_nids++;
1958
1959 spin_unlock(&nm_i->nid_list_lock);
1924 1960
1925 if (need_free) 1961 if (need_free)
1926 kmem_cache_free(free_nid_slab, i); 1962 kmem_cache_free(free_nid_slab, i);
@@ -1932,24 +1968,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
1932 struct free_nid *i, *next; 1968 struct free_nid *i, *next;
1933 int nr = nr_shrink; 1969 int nr = nr_shrink;
1934 1970
1935 if (nm_i->fcnt <= MAX_FREE_NIDS) 1971 if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
1936 return 0; 1972 return 0;
1937 1973
1938 if (!mutex_trylock(&nm_i->build_lock)) 1974 if (!mutex_trylock(&nm_i->build_lock))
1939 return 0; 1975 return 0;
1940 1976
1941 spin_lock(&nm_i->free_nid_list_lock); 1977 spin_lock(&nm_i->nid_list_lock);
1942 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { 1978 list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
1943 if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) 1979 list) {
1980 if (nr_shrink <= 0 ||
1981 nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
1944 break; 1982 break;
1945 if (i->state == NID_ALLOC) 1983
1946 continue; 1984 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
1947 __del_from_free_nid_list(nm_i, i);
1948 kmem_cache_free(free_nid_slab, i); 1985 kmem_cache_free(free_nid_slab, i);
1949 nm_i->fcnt--;
1950 nr_shrink--; 1986 nr_shrink--;
1951 } 1987 }
1952 spin_unlock(&nm_i->free_nid_list_lock); 1988 spin_unlock(&nm_i->nid_list_lock);
1953 mutex_unlock(&nm_i->build_lock); 1989 mutex_unlock(&nm_i->build_lock);
1954 1990
1955 return nr - nr_shrink; 1991 return nr - nr_shrink;
@@ -2005,7 +2041,7 @@ recover_xnid:
2005 if (unlikely(!inc_valid_node_count(sbi, inode))) 2041 if (unlikely(!inc_valid_node_count(sbi, inode)))
2006 f2fs_bug_on(sbi, 1); 2042 f2fs_bug_on(sbi, 1);
2007 2043
2008 remove_free_nid(NM_I(sbi), new_xnid); 2044 remove_free_nid(sbi, new_xnid);
2009 get_node_info(sbi, new_xnid, &ni); 2045 get_node_info(sbi, new_xnid, &ni);
2010 ni.ino = inode->i_ino; 2046 ni.ino = inode->i_ino;
2011 set_node_addr(sbi, &ni, NEW_ADDR, false); 2047 set_node_addr(sbi, &ni, NEW_ADDR, false);
@@ -2035,7 +2071,7 @@ retry:
2035 } 2071 }
2036 2072
2037 /* Should not use this inode from free nid list */ 2073 /* Should not use this inode from free nid list */
2038 remove_free_nid(NM_I(sbi), ino); 2074 remove_free_nid(sbi, ino);
2039 2075
2040 if (!PageUptodate(ipage)) 2076 if (!PageUptodate(ipage))
2041 SetPageUptodate(ipage); 2077 SetPageUptodate(ipage);
@@ -2069,7 +2105,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
2069 struct f2fs_node *rn; 2105 struct f2fs_node *rn;
2070 struct f2fs_summary *sum_entry; 2106 struct f2fs_summary *sum_entry;
2071 block_t addr; 2107 block_t addr;
2072 int bio_blocks = MAX_BIO_BLOCKS(sbi);
2073 int i, idx, last_offset, nrpages; 2108 int i, idx, last_offset, nrpages;
2074 2109
2075 /* scan the node segment */ 2110 /* scan the node segment */
@@ -2078,7 +2113,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
2078 sum_entry = &sum->entries[0]; 2113 sum_entry = &sum->entries[0];
2079 2114
2080 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 2115 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2081 nrpages = min(last_offset - i, bio_blocks); 2116 nrpages = min(last_offset - i, BIO_MAX_PAGES);
2082 2117
2083 /* readahead node pages */ 2118 /* readahead node pages */
2084 ra_meta_pages(sbi, addr, nrpages, META_POR, true); 2119 ra_meta_pages(sbi, addr, nrpages, META_POR, true);
@@ -2120,6 +2155,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2120 ne = grab_nat_entry(nm_i, nid); 2155 ne = grab_nat_entry(nm_i, nid);
2121 node_info_from_raw_nat(&ne->ni, &raw_ne); 2156 node_info_from_raw_nat(&ne->ni, &raw_ne);
2122 } 2157 }
2158
2159 /*
2160 * if a free nat in journal has not been used after last
2161 * checkpoint, we should remove it from available nids,
2162 * since later we will add it again.
2163 */
2164 if (!get_nat_flag(ne, IS_DIRTY) &&
2165 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
2166 spin_lock(&nm_i->nid_list_lock);
2167 nm_i->available_nids--;
2168 spin_unlock(&nm_i->nid_list_lock);
2169 }
2170
2123 __set_nat_cache_dirty(nm_i, ne); 2171 __set_nat_cache_dirty(nm_i, ne);
2124 } 2172 }
2125 update_nats_in_cursum(journal, -i); 2173 update_nats_in_cursum(journal, -i);
@@ -2192,8 +2240,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2192 raw_nat_from_node_info(raw_ne, &ne->ni); 2240 raw_nat_from_node_info(raw_ne, &ne->ni);
2193 nat_reset_flag(ne); 2241 nat_reset_flag(ne);
2194 __clear_nat_cache_dirty(NM_I(sbi), ne); 2242 __clear_nat_cache_dirty(NM_I(sbi), ne);
2195 if (nat_get_blkaddr(ne) == NULL_ADDR) 2243 if (nat_get_blkaddr(ne) == NULL_ADDR) {
2196 add_free_nid(sbi, nid, false); 2244 add_free_nid(sbi, nid, false);
2245 spin_lock(&NM_I(sbi)->nid_list_lock);
2246 NM_I(sbi)->available_nids++;
2247 spin_unlock(&NM_I(sbi)->nid_list_lock);
2248 }
2197 } 2249 }
2198 2250
2199 if (to_journal) 2251 if (to_journal)
@@ -2268,21 +2320,24 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
2268 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 2320 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
2269 2321
2270 /* not used nids: 0, node, meta, (and root counted as valid node) */ 2322 /* not used nids: 0, node, meta, (and root counted as valid node) */
2271 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; 2323 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
2272 nm_i->fcnt = 0; 2324 F2FS_RESERVED_NODE_NUM;
2325 nm_i->nid_cnt[FREE_NID_LIST] = 0;
2326 nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
2273 nm_i->nat_cnt = 0; 2327 nm_i->nat_cnt = 0;
2274 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 2328 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
2275 nm_i->ra_nid_pages = DEF_RA_NID_PAGES; 2329 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
2276 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; 2330 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
2277 2331
2278 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 2332 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
2279 INIT_LIST_HEAD(&nm_i->free_nid_list); 2333 INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
2334 INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
2280 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); 2335 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
2281 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); 2336 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
2282 INIT_LIST_HEAD(&nm_i->nat_entries); 2337 INIT_LIST_HEAD(&nm_i->nat_entries);
2283 2338
2284 mutex_init(&nm_i->build_lock); 2339 mutex_init(&nm_i->build_lock);
2285 spin_lock_init(&nm_i->free_nid_list_lock); 2340 spin_lock_init(&nm_i->nid_list_lock);
2286 init_rwsem(&nm_i->nat_tree_lock); 2341 init_rwsem(&nm_i->nat_tree_lock);
2287 2342
2288 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 2343 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
@@ -2310,7 +2365,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
2310 if (err) 2365 if (err)
2311 return err; 2366 return err;
2312 2367
2313 build_free_nids(sbi); 2368 build_free_nids(sbi, true);
2314 return 0; 2369 return 0;
2315} 2370}
2316 2371
@@ -2327,17 +2382,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2327 return; 2382 return;
2328 2383
2329 /* destroy free nid list */ 2384 /* destroy free nid list */
2330 spin_lock(&nm_i->free_nid_list_lock); 2385 spin_lock(&nm_i->nid_list_lock);
2331 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 2386 list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
2332 f2fs_bug_on(sbi, i->state == NID_ALLOC); 2387 list) {
2333 __del_from_free_nid_list(nm_i, i); 2388 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2334 nm_i->fcnt--; 2389 spin_unlock(&nm_i->nid_list_lock);
2335 spin_unlock(&nm_i->free_nid_list_lock);
2336 kmem_cache_free(free_nid_slab, i); 2390 kmem_cache_free(free_nid_slab, i);
2337 spin_lock(&nm_i->free_nid_list_lock); 2391 spin_lock(&nm_i->nid_list_lock);
2338 } 2392 }
2339 f2fs_bug_on(sbi, nm_i->fcnt); 2393 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
2340 spin_unlock(&nm_i->free_nid_list_lock); 2394 f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
2395 f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
2396 spin_unlock(&nm_i->nid_list_lock);
2341 2397
2342 /* destroy nat cache */ 2398 /* destroy nat cache */
2343 down_write(&nm_i->nat_tree_lock); 2399 down_write(&nm_i->nat_tree_lock);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 868bec65e51c..e7997e240366 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -169,14 +169,15 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
169 struct f2fs_nm_info *nm_i = NM_I(sbi); 169 struct f2fs_nm_info *nm_i = NM_I(sbi);
170 struct free_nid *fnid; 170 struct free_nid *fnid;
171 171
172 spin_lock(&nm_i->free_nid_list_lock); 172 spin_lock(&nm_i->nid_list_lock);
173 if (nm_i->fcnt <= 0) { 173 if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
174 spin_unlock(&nm_i->free_nid_list_lock); 174 spin_unlock(&nm_i->nid_list_lock);
175 return; 175 return;
176 } 176 }
177 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); 177 fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
178 struct free_nid, list);
178 *nid = fnid->nid; 179 *nid = fnid->nid;
179 spin_unlock(&nm_i->free_nid_list_lock); 180 spin_unlock(&nm_i->nid_list_lock);
180} 181}
181 182
182/* 183/*
@@ -313,7 +314,7 @@ static inline bool is_recoverable_dnode(struct page *page)
313 ((unsigned char *)ckpt + crc_offset))); 314 ((unsigned char *)ckpt + crc_offset)));
314 cp_ver |= (crc << 32); 315 cp_ver |= (crc << 32);
315 } 316 }
316 return cpu_to_le64(cp_ver) == cpver_of_node(page); 317 return cp_ver == cpver_of_node(page);
317} 318}
318 319
319/* 320/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2fc84a991325..981a9584b62f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -180,13 +180,15 @@ static void recover_inode(struct inode *inode, struct page *page)
180 180
181 inode->i_mode = le16_to_cpu(raw->i_mode); 181 inode->i_mode = le16_to_cpu(raw->i_mode);
182 f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); 182 f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
183 inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); 183 inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); 184 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
185 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); 185 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
186 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 186 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); 187 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
188 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); 188 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189 189
190 F2FS_I(inode)->i_advise = raw->i_advise;
191
190 if (file_enc_name(inode)) 192 if (file_enc_name(inode))
191 name = "<encrypted>"; 193 name = "<encrypted>";
192 else 194 else
@@ -196,32 +198,6 @@ static void recover_inode(struct inode *inode, struct page *page)
196 ino_of_node(page), name); 198 ino_of_node(page), name);
197} 199}
198 200
199static bool is_same_inode(struct inode *inode, struct page *ipage)
200{
201 struct f2fs_inode *ri = F2FS_INODE(ipage);
202 struct timespec disk;
203
204 if (!IS_INODE(ipage))
205 return true;
206
207 disk.tv_sec = le64_to_cpu(ri->i_ctime);
208 disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
209 if (timespec_compare(&inode->i_ctime, &disk) > 0)
210 return false;
211
212 disk.tv_sec = le64_to_cpu(ri->i_atime);
213 disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
214 if (timespec_compare(&inode->i_atime, &disk) > 0)
215 return false;
216
217 disk.tv_sec = le64_to_cpu(ri->i_mtime);
218 disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
219 if (timespec_compare(&inode->i_mtime, &disk) > 0)
220 return false;
221
222 return true;
223}
224
225static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 201static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
226{ 202{
227 struct curseg_info *curseg; 203 struct curseg_info *curseg;
@@ -248,10 +224,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
248 goto next; 224 goto next;
249 225
250 entry = get_fsync_inode(head, ino_of_node(page)); 226 entry = get_fsync_inode(head, ino_of_node(page));
251 if (entry) { 227 if (!entry) {
252 if (!is_same_inode(entry->inode, page))
253 goto next;
254 } else {
255 if (IS_INODE(page) && is_dent_dnode(page)) { 228 if (IS_INODE(page) && is_dent_dnode(page)) {
256 err = recover_inode_page(sbi, page); 229 err = recover_inode_page(sbi, page);
257 if (err) 230 if (err)
@@ -454,7 +427,8 @@ retry_dn:
454 continue; 427 continue;
455 } 428 }
456 429
457 if ((start + 1) << PAGE_SHIFT > i_size_read(inode)) 430 if (!file_keep_isize(inode) &&
431 (i_size_read(inode) <= (start << PAGE_SHIFT)))
458 f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); 432 f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
459 433
460 /* 434 /*
@@ -507,8 +481,10 @@ err:
507 f2fs_put_dnode(&dn); 481 f2fs_put_dnode(&dn);
508out: 482out:
509 f2fs_msg(sbi->sb, KERN_NOTICE, 483 f2fs_msg(sbi->sb, KERN_NOTICE,
510 "recover_data: ino = %lx, recovered = %d blocks, err = %d", 484 "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
511 inode->i_ino, recovered, err); 485 inode->i_ino,
486 file_keep_isize(inode) ? "keep" : "recover",
487 recovered, err);
512 return err; 488 return err;
513} 489}
514 490
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f1b4a1775ebe..0738f48293cc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -274,8 +274,10 @@ static int __commit_inmem_pages(struct inode *inode,
274 274
275 set_page_dirty(page); 275 set_page_dirty(page);
276 f2fs_wait_on_page_writeback(page, DATA, true); 276 f2fs_wait_on_page_writeback(page, DATA, true);
277 if (clear_page_dirty_for_io(page)) 277 if (clear_page_dirty_for_io(page)) {
278 inode_dec_dirty_pages(inode); 278 inode_dec_dirty_pages(inode);
279 remove_dirty_inode(inode);
280 }
279 281
280 fio.page = page; 282 fio.page = page;
281 err = do_write_data_page(&fio); 283 err = do_write_data_page(&fio);
@@ -287,7 +289,6 @@ static int __commit_inmem_pages(struct inode *inode,
287 /* record old blkaddr for revoking */ 289 /* record old blkaddr for revoking */
288 cur->old_addr = fio.old_blkaddr; 290 cur->old_addr = fio.old_blkaddr;
289 291
290 clear_cold_data(page);
291 submit_bio = true; 292 submit_bio = true;
292 } 293 }
293 unlock_page(page); 294 unlock_page(page);
@@ -363,7 +364,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
363 */ 364 */
364 if (has_not_enough_free_secs(sbi, 0, 0)) { 365 if (has_not_enough_free_secs(sbi, 0, 0)) {
365 mutex_lock(&sbi->gc_mutex); 366 mutex_lock(&sbi->gc_mutex);
366 f2fs_gc(sbi, false); 367 f2fs_gc(sbi, false, false);
367 } 368 }
368} 369}
369 370
@@ -380,14 +381,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
380 if (!available_free_memory(sbi, FREE_NIDS)) 381 if (!available_free_memory(sbi, FREE_NIDS))
381 try_to_free_nids(sbi, MAX_FREE_NIDS); 382 try_to_free_nids(sbi, MAX_FREE_NIDS);
382 else 383 else
383 build_free_nids(sbi); 384 build_free_nids(sbi, false);
385
386 if (!is_idle(sbi))
387 return;
384 388
385 /* checkpoint is the only way to shrink partial cached entries */ 389 /* checkpoint is the only way to shrink partial cached entries */
386 if (!available_free_memory(sbi, NAT_ENTRIES) || 390 if (!available_free_memory(sbi, NAT_ENTRIES) ||
387 !available_free_memory(sbi, INO_ENTRIES) || 391 !available_free_memory(sbi, INO_ENTRIES) ||
388 excess_prefree_segs(sbi) || 392 excess_prefree_segs(sbi) ||
389 excess_dirty_nats(sbi) || 393 excess_dirty_nats(sbi) ||
390 (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { 394 f2fs_time_over(sbi, CP_TIME)) {
391 if (test_opt(sbi, DATA_FLUSH)) { 395 if (test_opt(sbi, DATA_FLUSH)) {
392 struct blk_plug plug; 396 struct blk_plug plug;
393 397
@@ -400,6 +404,33 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
400 } 404 }
401} 405}
402 406
407static int __submit_flush_wait(struct block_device *bdev)
408{
409 struct bio *bio = f2fs_bio_alloc(0);
410 int ret;
411
412 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
413 bio->bi_bdev = bdev;
414 ret = submit_bio_wait(bio);
415 bio_put(bio);
416 return ret;
417}
418
419static int submit_flush_wait(struct f2fs_sb_info *sbi)
420{
421 int ret = __submit_flush_wait(sbi->sb->s_bdev);
422 int i;
423
424 if (sbi->s_ndevs && !ret) {
425 for (i = 1; i < sbi->s_ndevs; i++) {
426 ret = __submit_flush_wait(FDEV(i).bdev);
427 if (ret)
428 break;
429 }
430 }
431 return ret;
432}
433
403static int issue_flush_thread(void *data) 434static int issue_flush_thread(void *data)
404{ 435{
405 struct f2fs_sb_info *sbi = data; 436 struct f2fs_sb_info *sbi = data;
@@ -410,25 +441,18 @@ repeat:
410 return 0; 441 return 0;
411 442
412 if (!llist_empty(&fcc->issue_list)) { 443 if (!llist_empty(&fcc->issue_list)) {
413 struct bio *bio;
414 struct flush_cmd *cmd, *next; 444 struct flush_cmd *cmd, *next;
415 int ret; 445 int ret;
416 446
417 bio = f2fs_bio_alloc(0);
418
419 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 447 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
420 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 448 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
421 449
422 bio->bi_bdev = sbi->sb->s_bdev; 450 ret = submit_flush_wait(sbi);
423 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
424 ret = submit_bio_wait(bio);
425
426 llist_for_each_entry_safe(cmd, next, 451 llist_for_each_entry_safe(cmd, next,
427 fcc->dispatch_list, llnode) { 452 fcc->dispatch_list, llnode) {
428 cmd->ret = ret; 453 cmd->ret = ret;
429 complete(&cmd->wait); 454 complete(&cmd->wait);
430 } 455 }
431 bio_put(bio);
432 fcc->dispatch_list = NULL; 456 fcc->dispatch_list = NULL;
433 } 457 }
434 458
@@ -449,15 +473,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
449 return 0; 473 return 0;
450 474
451 if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { 475 if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
452 struct bio *bio = f2fs_bio_alloc(0);
453 int ret; 476 int ret;
454 477
455 atomic_inc(&fcc->submit_flush); 478 atomic_inc(&fcc->submit_flush);
456 bio->bi_bdev = sbi->sb->s_bdev; 479 ret = submit_flush_wait(sbi);
457 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
458 ret = submit_bio_wait(bio);
459 atomic_dec(&fcc->submit_flush); 480 atomic_dec(&fcc->submit_flush);
460 bio_put(bio);
461 return ret; 481 return ret;
462 } 482 }
463 483
@@ -469,8 +489,13 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
469 if (!fcc->dispatch_list) 489 if (!fcc->dispatch_list)
470 wake_up(&fcc->flush_wait_queue); 490 wake_up(&fcc->flush_wait_queue);
471 491
472 wait_for_completion(&cmd.wait); 492 if (fcc->f2fs_issue_flush) {
473 atomic_dec(&fcc->submit_flush); 493 wait_for_completion(&cmd.wait);
494 atomic_dec(&fcc->submit_flush);
495 } else {
496 llist_del_all(&fcc->issue_list);
497 atomic_set(&fcc->submit_flush, 0);
498 }
474 499
475 return cmd.ret; 500 return cmd.ret;
476} 501}
@@ -481,6 +506,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
481 struct flush_cmd_control *fcc; 506 struct flush_cmd_control *fcc;
482 int err = 0; 507 int err = 0;
483 508
509 if (SM_I(sbi)->cmd_control_info) {
510 fcc = SM_I(sbi)->cmd_control_info;
511 goto init_thread;
512 }
513
484 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); 514 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
485 if (!fcc) 515 if (!fcc)
486 return -ENOMEM; 516 return -ENOMEM;
@@ -488,6 +518,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
488 init_waitqueue_head(&fcc->flush_wait_queue); 518 init_waitqueue_head(&fcc->flush_wait_queue);
489 init_llist_head(&fcc->issue_list); 519 init_llist_head(&fcc->issue_list);
490 SM_I(sbi)->cmd_control_info = fcc; 520 SM_I(sbi)->cmd_control_info = fcc;
521init_thread:
491 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 522 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
492 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 523 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
493 if (IS_ERR(fcc->f2fs_issue_flush)) { 524 if (IS_ERR(fcc->f2fs_issue_flush)) {
@@ -500,14 +531,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
500 return err; 531 return err;
501} 532}
502 533
503void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 534void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
504{ 535{
505 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 536 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
506 537
507 if (fcc && fcc->f2fs_issue_flush) 538 if (fcc && fcc->f2fs_issue_flush) {
508 kthread_stop(fcc->f2fs_issue_flush); 539 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
509 kfree(fcc); 540
510 SM_I(sbi)->cmd_control_info = NULL; 541 fcc->f2fs_issue_flush = NULL;
542 kthread_stop(flush_thread);
543 }
544 if (free) {
545 kfree(fcc);
546 SM_I(sbi)->cmd_control_info = NULL;
547 }
511} 548}
512 549
513static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 550static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -633,15 +670,23 @@ static void f2fs_submit_bio_wait_endio(struct bio *bio)
633} 670}
634 671
635/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ 672/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
636int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector, 673static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
637 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) 674 struct block_device *bdev, block_t blkstart, block_t blklen)
638{ 675{
639 struct block_device *bdev = sbi->sb->s_bdev;
640 struct bio *bio = NULL; 676 struct bio *bio = NULL;
641 int err; 677 int err;
642 678
643 err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, 679 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
644 &bio); 680
681 if (sbi->s_ndevs) {
682 int devi = f2fs_target_device_index(sbi, blkstart);
683
684 blkstart -= FDEV(devi).start_blk;
685 }
686 err = __blkdev_issue_discard(bdev,
687 SECTOR_FROM_BLOCK(blkstart),
688 SECTOR_FROM_BLOCK(blklen),
689 GFP_NOFS, 0, &bio);
645 if (!err && bio) { 690 if (!err && bio) {
646 struct bio_entry *be = __add_bio_entry(sbi, bio); 691 struct bio_entry *be = __add_bio_entry(sbi, bio);
647 692
@@ -654,24 +699,101 @@ int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
654 return err; 699 return err;
655} 700}
656 701
702#ifdef CONFIG_BLK_DEV_ZONED
703static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
704 struct block_device *bdev, block_t blkstart, block_t blklen)
705{
706 sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
707 sector_t sector;
708 int devi = 0;
709
710 if (sbi->s_ndevs) {
711 devi = f2fs_target_device_index(sbi, blkstart);
712 blkstart -= FDEV(devi).start_blk;
713 }
714 sector = SECTOR_FROM_BLOCK(blkstart);
715
716 if (sector & (bdev_zone_size(bdev) - 1) ||
717 nr_sects != bdev_zone_size(bdev)) {
718 f2fs_msg(sbi->sb, KERN_INFO,
719 "(%d) %s: Unaligned discard attempted (block %x + %x)",
720 devi, sbi->s_ndevs ? FDEV(devi).path: "",
721 blkstart, blklen);
722 return -EIO;
723 }
724
725 /*
726 * We need to know the type of the zone: for conventional zones,
727 * use regular discard if the drive supports it. For sequential
728 * zones, reset the zone write pointer.
729 */
730 switch (get_blkz_type(sbi, bdev, blkstart)) {
731
732 case BLK_ZONE_TYPE_CONVENTIONAL:
733 if (!blk_queue_discard(bdev_get_queue(bdev)))
734 return 0;
735 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
736 case BLK_ZONE_TYPE_SEQWRITE_REQ:
737 case BLK_ZONE_TYPE_SEQWRITE_PREF:
738 trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
739 return blkdev_reset_zones(bdev, sector,
740 nr_sects, GFP_NOFS);
741 default:
742 /* Unknown zone type: broken device ? */
743 return -EIO;
744 }
745}
746#endif
747
748static int __issue_discard_async(struct f2fs_sb_info *sbi,
749 struct block_device *bdev, block_t blkstart, block_t blklen)
750{
751#ifdef CONFIG_BLK_DEV_ZONED
752 if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
753 bdev_zoned_model(bdev) != BLK_ZONED_NONE)
754 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
755#endif
756 return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
757}
758
657static int f2fs_issue_discard(struct f2fs_sb_info *sbi, 759static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
658 block_t blkstart, block_t blklen) 760 block_t blkstart, block_t blklen)
659{ 761{
660 sector_t start = SECTOR_FROM_BLOCK(blkstart); 762 sector_t start = blkstart, len = 0;
661 sector_t len = SECTOR_FROM_BLOCK(blklen); 763 struct block_device *bdev;
662 struct seg_entry *se; 764 struct seg_entry *se;
663 unsigned int offset; 765 unsigned int offset;
664 block_t i; 766 block_t i;
767 int err = 0;
768
769 bdev = f2fs_target_device(sbi, blkstart, NULL);
770
771 for (i = blkstart; i < blkstart + blklen; i++, len++) {
772 if (i != start) {
773 struct block_device *bdev2 =
774 f2fs_target_device(sbi, i, NULL);
775
776 if (bdev2 != bdev) {
777 err = __issue_discard_async(sbi, bdev,
778 start, len);
779 if (err)
780 return err;
781 bdev = bdev2;
782 start = i;
783 len = 0;
784 }
785 }
665 786
666 for (i = blkstart; i < blkstart + blklen; i++) {
667 se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); 787 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
668 offset = GET_BLKOFF_FROM_SEG0(sbi, i); 788 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
669 789
670 if (!f2fs_test_and_set_bit(offset, se->discard_map)) 790 if (!f2fs_test_and_set_bit(offset, se->discard_map))
671 sbi->discard_blks--; 791 sbi->discard_blks--;
672 } 792 }
673 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 793
674 return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); 794 if (len)
795 err = __issue_discard_async(sbi, bdev, start, len);
796 return err;
675} 797}
676 798
677static void __add_discard_entry(struct f2fs_sb_info *sbi, 799static void __add_discard_entry(struct f2fs_sb_info *sbi,
@@ -1296,25 +1418,21 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1296 stat_inc_seg_type(sbi, curseg); 1418 stat_inc_seg_type(sbi, curseg);
1297} 1419}
1298 1420
1299static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1300{
1301 struct curseg_info *curseg = CURSEG_I(sbi, type);
1302 unsigned int old_segno;
1303
1304 old_segno = curseg->segno;
1305 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1306 locate_dirty_segment(sbi, old_segno);
1307}
1308
1309void allocate_new_segments(struct f2fs_sb_info *sbi) 1421void allocate_new_segments(struct f2fs_sb_info *sbi)
1310{ 1422{
1423 struct curseg_info *curseg;
1424 unsigned int old_segno;
1311 int i; 1425 int i;
1312 1426
1313 if (test_opt(sbi, LFS)) 1427 if (test_opt(sbi, LFS))
1314 return; 1428 return;
1315 1429
1316 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) 1430 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1317 __allocate_new_segments(sbi, i); 1431 curseg = CURSEG_I(sbi, i);
1432 old_segno = curseg->segno;
1433 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1434 locate_dirty_segment(sbi, old_segno);
1435 }
1318} 1436}
1319 1437
1320static const struct segment_allocation default_salloc_ops = { 1438static const struct segment_allocation default_salloc_ops = {
@@ -1448,21 +1566,11 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1448 struct f2fs_summary *sum, int type) 1566 struct f2fs_summary *sum, int type)
1449{ 1567{
1450 struct sit_info *sit_i = SIT_I(sbi); 1568 struct sit_info *sit_i = SIT_I(sbi);
1451 struct curseg_info *curseg; 1569 struct curseg_info *curseg = CURSEG_I(sbi, type);
1452 bool direct_io = (type == CURSEG_DIRECT_IO);
1453
1454 type = direct_io ? CURSEG_WARM_DATA : type;
1455
1456 curseg = CURSEG_I(sbi, type);
1457 1570
1458 mutex_lock(&curseg->curseg_mutex); 1571 mutex_lock(&curseg->curseg_mutex);
1459 mutex_lock(&sit_i->sentry_lock); 1572 mutex_lock(&sit_i->sentry_lock);
1460 1573
1461 /* direct_io'ed data is aligned to the segment for better performance */
1462 if (direct_io && curseg->next_blkoff &&
1463 !has_not_enough_free_secs(sbi, 0, 0))
1464 __allocate_new_segments(sbi, type);
1465
1466 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1574 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1467 1575
1468 /* 1576 /*
@@ -2166,7 +2274,6 @@ out:
2166static int build_sit_info(struct f2fs_sb_info *sbi) 2274static int build_sit_info(struct f2fs_sb_info *sbi)
2167{ 2275{
2168 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 2276 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2169 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2170 struct sit_info *sit_i; 2277 struct sit_info *sit_i;
2171 unsigned int sit_segs, start; 2278 unsigned int sit_segs, start;
2172 char *src_bitmap, *dst_bitmap; 2279 char *src_bitmap, *dst_bitmap;
@@ -2233,7 +2340,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
2233 2340
2234 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); 2341 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2235 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; 2342 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2236 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count); 2343 sit_i->written_valid_blocks = 0;
2237 sit_i->sit_bitmap = dst_bitmap; 2344 sit_i->sit_bitmap = dst_bitmap;
2238 sit_i->bitmap_size = bitmap_size; 2345 sit_i->bitmap_size = bitmap_size;
2239 sit_i->dirty_sentries = 0; 2346 sit_i->dirty_sentries = 0;
@@ -2315,10 +2422,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
2315 int sit_blk_cnt = SIT_BLK_CNT(sbi); 2422 int sit_blk_cnt = SIT_BLK_CNT(sbi);
2316 unsigned int i, start, end; 2423 unsigned int i, start, end;
2317 unsigned int readed, start_blk = 0; 2424 unsigned int readed, start_blk = 0;
2318 int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
2319 2425
2320 do { 2426 do {
2321 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); 2427 readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
2428 META_SIT, true);
2322 2429
2323 start = start_blk * sit_i->sents_per_block; 2430 start = start_blk * sit_i->sents_per_block;
2324 end = (start_blk + readed) * sit_i->sents_per_block; 2431 end = (start_blk + readed) * sit_i->sents_per_block;
@@ -2387,6 +2494,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
2387 struct seg_entry *sentry = get_seg_entry(sbi, start); 2494 struct seg_entry *sentry = get_seg_entry(sbi, start);
2388 if (!sentry->valid_blocks) 2495 if (!sentry->valid_blocks)
2389 __set_free(sbi, start); 2496 __set_free(sbi, start);
2497 else
2498 SIT_I(sbi)->written_valid_blocks +=
2499 sentry->valid_blocks;
2390 } 2500 }
2391 2501
2392 /* set use the current segments */ 2502 /* set use the current segments */
@@ -2645,7 +2755,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
2645 2755
2646 if (!sm_info) 2756 if (!sm_info)
2647 return; 2757 return;
2648 destroy_flush_cmd_control(sbi); 2758 destroy_flush_cmd_control(sbi, true);
2649 destroy_dirty_segmap(sbi); 2759 destroy_dirty_segmap(sbi);
2650 destroy_curseg(sbi); 2760 destroy_curseg(sbi);
2651 destroy_free_segmap(sbi); 2761 destroy_free_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index fecb856ad874..9d44ce83acb2 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -18,6 +18,8 @@
18#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ 18#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
19#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ 19#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
20 20
21#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
22
21/* L: Logical segment # in volume, R: Relative segment # in main area */ 23/* L: Logical segment # in volume, R: Relative segment # in main area */
22#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 24#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
23#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 25#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -102,8 +104,6 @@
102 (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) 104 (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
103#define SECTOR_TO_BLOCK(sectors) \ 105#define SECTOR_TO_BLOCK(sectors) \
104 (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) 106 (sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
105#define MAX_BIO_BLOCKS(sbi) \
106 ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
107 107
108/* 108/*
109 * indicate a block allocation direction: RIGHT and LEFT. 109 * indicate a block allocation direction: RIGHT and LEFT.
@@ -471,11 +471,12 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
471{ 471{
472 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 472 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
473 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 473 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
474 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
474 475
475 if (test_opt(sbi, LFS)) 476 if (test_opt(sbi, LFS))
476 return false; 477 return false;
477 478
478 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 479 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
479 reserved_sections(sbi) + 1); 480 reserved_sections(sbi) + 1);
480} 481}
481 482
@@ -484,14 +485,14 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
484{ 485{
485 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 486 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
486 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 487 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
487 488 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
488 node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
489 489
490 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 490 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
491 return false; 491 return false;
492 492
493 return (free_sections(sbi) + freed) <= 493 return (free_sections(sbi) + freed) <=
494 (node_secs + 2 * dent_secs + reserved_sections(sbi) + needed); 494 (node_secs + 2 * dent_secs + imeta_secs +
495 reserved_sections(sbi) + needed);
495} 496}
496 497
497static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) 498static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
@@ -695,13 +696,6 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
695 return false; 696 return false;
696} 697}
697 698
698static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
699{
700 struct block_device *bdev = sbi->sb->s_bdev;
701 struct request_queue *q = bdev_get_queue(bdev);
702 return SECTOR_TO_BLOCK(queue_max_sectors(q));
703}
704
705/* 699/*
706 * It is very important to gather dirty pages and write at once, so that we can 700 * It is very important to gather dirty pages and write at once, so that we can
707 * submit a big bio without interfering other data writes. 701 * submit a big bio without interfering other data writes.
@@ -719,7 +713,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
719 else if (type == NODE) 713 else if (type == NODE)
720 return 8 * sbi->blocks_per_seg; 714 return 8 * sbi->blocks_per_seg;
721 else if (type == META) 715 else if (type == META)
722 return 8 * MAX_BIO_BLOCKS(sbi); 716 return 8 * BIO_MAX_PAGES;
723 else 717 else
724 return 0; 718 return 0;
725} 719}
@@ -736,11 +730,9 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
736 return 0; 730 return 0;
737 731
738 nr_to_write = wbc->nr_to_write; 732 nr_to_write = wbc->nr_to_write;
739 733 desired = BIO_MAX_PAGES;
740 if (type == NODE) 734 if (type == NODE)
741 desired = 2 * max_hw_blocks(sbi); 735 desired <<= 1;
742 else
743 desired = MAX_BIO_BLOCKS(sbi);
744 736
745 wbc->nr_to_write = desired; 737 wbc->nr_to_write = desired;
746 return desired - nr_to_write; 738 return desired - nr_to_write;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 46c915425923..5c60fc28ec75 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -21,14 +21,16 @@ static unsigned int shrinker_run_no;
21 21
22static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) 22static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
23{ 23{
24 return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; 24 long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
25
26 return count > 0 ? count : 0;
25} 27}
26 28
27static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) 29static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
28{ 30{
29 if (NM_I(sbi)->fcnt > MAX_FREE_NIDS) 31 long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
30 return NM_I(sbi)->fcnt - MAX_FREE_NIDS; 32
31 return 0; 33 return count > 0 ? count : 0;
32} 34}
33 35
34static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) 36static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2cac6bb86080..702638e21c76 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -412,14 +412,20 @@ static int parse_options(struct super_block *sb, char *options)
412 q = bdev_get_queue(sb->s_bdev); 412 q = bdev_get_queue(sb->s_bdev);
413 if (blk_queue_discard(q)) { 413 if (blk_queue_discard(q)) {
414 set_opt(sbi, DISCARD); 414 set_opt(sbi, DISCARD);
415 } else { 415 } else if (!f2fs_sb_mounted_blkzoned(sb)) {
416 f2fs_msg(sb, KERN_WARNING, 416 f2fs_msg(sb, KERN_WARNING,
417 "mounting with \"discard\" option, but " 417 "mounting with \"discard\" option, but "
418 "the device does not support discard"); 418 "the device does not support discard");
419 } 419 }
420 break; 420 break;
421 case Opt_nodiscard: 421 case Opt_nodiscard:
422 if (f2fs_sb_mounted_blkzoned(sb)) {
423 f2fs_msg(sb, KERN_WARNING,
424 "discard is required for zoned block devices");
425 return -EINVAL;
426 }
422 clear_opt(sbi, DISCARD); 427 clear_opt(sbi, DISCARD);
428 break;
423 case Opt_noheap: 429 case Opt_noheap:
424 set_opt(sbi, NOHEAP); 430 set_opt(sbi, NOHEAP);
425 break; 431 break;
@@ -512,6 +518,13 @@ static int parse_options(struct super_block *sb, char *options)
512 return -ENOMEM; 518 return -ENOMEM;
513 if (strlen(name) == 8 && 519 if (strlen(name) == 8 &&
514 !strncmp(name, "adaptive", 8)) { 520 !strncmp(name, "adaptive", 8)) {
521 if (f2fs_sb_mounted_blkzoned(sb)) {
522 f2fs_msg(sb, KERN_WARNING,
523 "adaptive mode is not allowed with "
524 "zoned block device feature");
525 kfree(name);
526 return -EINVAL;
527 }
515 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); 528 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
516 } else if (strlen(name) == 3 && 529 } else if (strlen(name) == 3 &&
517 !strncmp(name, "lfs", 3)) { 530 !strncmp(name, "lfs", 3)) {
@@ -558,13 +571,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
558 571
559 init_once((void *) fi); 572 init_once((void *) fi);
560 573
561 if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
562 kmem_cache_free(f2fs_inode_cachep, fi);
563 return NULL;
564 }
565
566 /* Initialize f2fs-specific inode info */ 574 /* Initialize f2fs-specific inode info */
567 fi->vfs_inode.i_version = 1; 575 fi->vfs_inode.i_version = 1;
576 atomic_set(&fi->dirty_pages, 0);
568 fi->i_current_depth = 1; 577 fi->i_current_depth = 1;
569 fi->i_advise = 0; 578 fi->i_advise = 0;
570 init_rwsem(&fi->i_sem); 579 init_rwsem(&fi->i_sem);
@@ -620,24 +629,25 @@ static int f2fs_drop_inode(struct inode *inode)
620 return generic_drop_inode(inode); 629 return generic_drop_inode(inode);
621} 630}
622 631
623int f2fs_inode_dirtied(struct inode *inode) 632int f2fs_inode_dirtied(struct inode *inode, bool sync)
624{ 633{
625 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 634 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
635 int ret = 0;
626 636
627 spin_lock(&sbi->inode_lock[DIRTY_META]); 637 spin_lock(&sbi->inode_lock[DIRTY_META]);
628 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { 638 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
629 spin_unlock(&sbi->inode_lock[DIRTY_META]); 639 ret = 1;
630 return 1; 640 } else {
641 set_inode_flag(inode, FI_DIRTY_INODE);
642 stat_inc_dirty_inode(sbi, DIRTY_META);
631 } 643 }
632 644 if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
633 set_inode_flag(inode, FI_DIRTY_INODE); 645 list_add_tail(&F2FS_I(inode)->gdirty_list,
634 list_add_tail(&F2FS_I(inode)->gdirty_list,
635 &sbi->inode_list[DIRTY_META]); 646 &sbi->inode_list[DIRTY_META]);
636 inc_page_count(sbi, F2FS_DIRTY_IMETA); 647 inc_page_count(sbi, F2FS_DIRTY_IMETA);
637 stat_inc_dirty_inode(sbi, DIRTY_META); 648 }
638 spin_unlock(&sbi->inode_lock[DIRTY_META]); 649 spin_unlock(&sbi->inode_lock[DIRTY_META]);
639 650 return ret;
640 return 0;
641} 651}
642 652
643void f2fs_inode_synced(struct inode *inode) 653void f2fs_inode_synced(struct inode *inode)
@@ -649,10 +659,12 @@ void f2fs_inode_synced(struct inode *inode)
649 spin_unlock(&sbi->inode_lock[DIRTY_META]); 659 spin_unlock(&sbi->inode_lock[DIRTY_META]);
650 return; 660 return;
651 } 661 }
652 list_del_init(&F2FS_I(inode)->gdirty_list); 662 if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
663 list_del_init(&F2FS_I(inode)->gdirty_list);
664 dec_page_count(sbi, F2FS_DIRTY_IMETA);
665 }
653 clear_inode_flag(inode, FI_DIRTY_INODE); 666 clear_inode_flag(inode, FI_DIRTY_INODE);
654 clear_inode_flag(inode, FI_AUTO_RECOVER); 667 clear_inode_flag(inode, FI_AUTO_RECOVER);
655 dec_page_count(sbi, F2FS_DIRTY_IMETA);
656 stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); 668 stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
657 spin_unlock(&sbi->inode_lock[DIRTY_META]); 669 spin_unlock(&sbi->inode_lock[DIRTY_META]);
658} 670}
@@ -676,7 +688,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
676 if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) 688 if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
677 clear_inode_flag(inode, FI_AUTO_RECOVER); 689 clear_inode_flag(inode, FI_AUTO_RECOVER);
678 690
679 f2fs_inode_dirtied(inode); 691 f2fs_inode_dirtied(inode, false);
680} 692}
681 693
682static void f2fs_i_callback(struct rcu_head *head) 694static void f2fs_i_callback(struct rcu_head *head)
@@ -687,20 +699,28 @@ static void f2fs_i_callback(struct rcu_head *head)
687 699
688static void f2fs_destroy_inode(struct inode *inode) 700static void f2fs_destroy_inode(struct inode *inode)
689{ 701{
690 percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
691 call_rcu(&inode->i_rcu, f2fs_i_callback); 702 call_rcu(&inode->i_rcu, f2fs_i_callback);
692} 703}
693 704
694static void destroy_percpu_info(struct f2fs_sb_info *sbi) 705static void destroy_percpu_info(struct f2fs_sb_info *sbi)
695{ 706{
696 int i;
697
698 for (i = 0; i < NR_COUNT_TYPE; i++)
699 percpu_counter_destroy(&sbi->nr_pages[i]);
700 percpu_counter_destroy(&sbi->alloc_valid_block_count); 707 percpu_counter_destroy(&sbi->alloc_valid_block_count);
701 percpu_counter_destroy(&sbi->total_valid_inode_count); 708 percpu_counter_destroy(&sbi->total_valid_inode_count);
702} 709}
703 710
711static void destroy_device_list(struct f2fs_sb_info *sbi)
712{
713 int i;
714
715 for (i = 0; i < sbi->s_ndevs; i++) {
716 blkdev_put(FDEV(i).bdev, FMODE_EXCL);
717#ifdef CONFIG_BLK_DEV_ZONED
718 kfree(FDEV(i).blkz_type);
719#endif
720 }
721 kfree(sbi->devs);
722}
723
704static void f2fs_put_super(struct super_block *sb) 724static void f2fs_put_super(struct super_block *sb)
705{ 725{
706 struct f2fs_sb_info *sbi = F2FS_SB(sb); 726 struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -738,7 +758,6 @@ static void f2fs_put_super(struct super_block *sb)
738 * In addition, EIO will skip do checkpoint, we need this as well. 758 * In addition, EIO will skip do checkpoint, we need this as well.
739 */ 759 */
740 release_ino_entry(sbi, true); 760 release_ino_entry(sbi, true);
741 release_discard_addrs(sbi);
742 761
743 f2fs_leave_shrinker(sbi); 762 f2fs_leave_shrinker(sbi);
744 mutex_unlock(&sbi->umount_mutex); 763 mutex_unlock(&sbi->umount_mutex);
@@ -762,6 +781,8 @@ static void f2fs_put_super(struct super_block *sb)
762 crypto_free_shash(sbi->s_chksum_driver); 781 crypto_free_shash(sbi->s_chksum_driver);
763 kfree(sbi->raw_super); 782 kfree(sbi->raw_super);
764 783
784 destroy_device_list(sbi);
785
765 destroy_percpu_info(sbi); 786 destroy_percpu_info(sbi);
766 kfree(sbi); 787 kfree(sbi);
767} 788}
@@ -789,13 +810,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
789 810
790static int f2fs_freeze(struct super_block *sb) 811static int f2fs_freeze(struct super_block *sb)
791{ 812{
792 int err;
793
794 if (f2fs_readonly(sb)) 813 if (f2fs_readonly(sb))
795 return 0; 814 return 0;
796 815
797 err = f2fs_sync_fs(sb, 1); 816 /* IO error happened before */
798 return err; 817 if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
818 return -EIO;
819
820 /* must be clean, since sync_filesystem() was already called */
821 if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
822 return -EINVAL;
823 return 0;
799} 824}
800 825
801static int f2fs_unfreeze(struct super_block *sb) 826static int f2fs_unfreeze(struct super_block *sb)
@@ -822,7 +847,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
822 buf->f_bavail = user_block_count - valid_user_blocks(sbi); 847 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
823 848
824 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; 849 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
825 buf->f_ffree = buf->f_files - valid_inode_count(sbi); 850 buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
851 buf->f_bavail);
826 852
827 buf->f_namelen = F2FS_NAME_LEN; 853 buf->f_namelen = F2FS_NAME_LEN;
828 buf->f_fsid.val[0] = (u32)id; 854 buf->f_fsid.val[0] = (u32)id;
@@ -974,7 +1000,7 @@ static void default_options(struct f2fs_sb_info *sbi)
974 set_opt(sbi, EXTENT_CACHE); 1000 set_opt(sbi, EXTENT_CACHE);
975 sbi->sb->s_flags |= MS_LAZYTIME; 1001 sbi->sb->s_flags |= MS_LAZYTIME;
976 set_opt(sbi, FLUSH_MERGE); 1002 set_opt(sbi, FLUSH_MERGE);
977 if (f2fs_sb_mounted_hmsmr(sbi->sb)) { 1003 if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
978 set_opt_mode(sbi, F2FS_MOUNT_LFS); 1004 set_opt_mode(sbi, F2FS_MOUNT_LFS);
979 set_opt(sbi, DISCARD); 1005 set_opt(sbi, DISCARD);
980 } else { 1006 } else {
@@ -1076,8 +1102,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1076 * or if flush_merge is not passed in mount option. 1102 * or if flush_merge is not passed in mount option.
1077 */ 1103 */
1078 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { 1104 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
1079 destroy_flush_cmd_control(sbi); 1105 clear_opt(sbi, FLUSH_MERGE);
1080 } else if (!SM_I(sbi)->cmd_control_info) { 1106 destroy_flush_cmd_control(sbi, false);
1107 } else {
1081 err = create_flush_cmd_control(sbi); 1108 err = create_flush_cmd_control(sbi);
1082 if (err) 1109 if (err)
1083 goto restore_gc; 1110 goto restore_gc;
@@ -1426,6 +1453,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1426 unsigned int total, fsmeta; 1453 unsigned int total, fsmeta;
1427 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1454 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1428 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1455 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1456 unsigned int ovp_segments, reserved_segments;
1429 1457
1430 total = le32_to_cpu(raw_super->segment_count); 1458 total = le32_to_cpu(raw_super->segment_count);
1431 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); 1459 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1437,6 +1465,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1437 if (unlikely(fsmeta >= total)) 1465 if (unlikely(fsmeta >= total))
1438 return 1; 1466 return 1;
1439 1467
1468 ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1469 reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1470
1471 if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1472 ovp_segments == 0 || reserved_segments == 0)) {
1473 f2fs_msg(sbi->sb, KERN_ERR,
1474 "Wrong layout: check mkfs.f2fs version");
1475 return 1;
1476 }
1477
1440 if (unlikely(f2fs_cp_error(sbi))) { 1478 if (unlikely(f2fs_cp_error(sbi))) {
1441 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 1479 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1442 return 1; 1480 return 1;
@@ -1447,6 +1485,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1447static void init_sb_info(struct f2fs_sb_info *sbi) 1485static void init_sb_info(struct f2fs_sb_info *sbi)
1448{ 1486{
1449 struct f2fs_super_block *raw_super = sbi->raw_super; 1487 struct f2fs_super_block *raw_super = sbi->raw_super;
1488 int i;
1450 1489
1451 sbi->log_sectors_per_block = 1490 sbi->log_sectors_per_block =
1452 le32_to_cpu(raw_super->log_sectors_per_block); 1491 le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1471,6 +1510,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1471 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; 1510 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1472 clear_sbi_flag(sbi, SBI_NEED_FSCK); 1511 clear_sbi_flag(sbi, SBI_NEED_FSCK);
1473 1512
1513 for (i = 0; i < NR_COUNT_TYPE; i++)
1514 atomic_set(&sbi->nr_pages[i], 0);
1515
1474 INIT_LIST_HEAD(&sbi->s_list); 1516 INIT_LIST_HEAD(&sbi->s_list);
1475 mutex_init(&sbi->umount_mutex); 1517 mutex_init(&sbi->umount_mutex);
1476 mutex_init(&sbi->wio_mutex[NODE]); 1518 mutex_init(&sbi->wio_mutex[NODE]);
@@ -1486,13 +1528,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1486 1528
1487static int init_percpu_info(struct f2fs_sb_info *sbi) 1529static int init_percpu_info(struct f2fs_sb_info *sbi)
1488{ 1530{
1489 int i, err; 1531 int err;
1490
1491 for (i = 0; i < NR_COUNT_TYPE; i++) {
1492 err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
1493 if (err)
1494 return err;
1495 }
1496 1532
1497 err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL); 1533 err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1498 if (err) 1534 if (err)
@@ -1502,6 +1538,71 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
1502 GFP_KERNEL); 1538 GFP_KERNEL);
1503} 1539}
1504 1540
1541#ifdef CONFIG_BLK_DEV_ZONED
1542static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1543{
1544 struct block_device *bdev = FDEV(devi).bdev;
1545 sector_t nr_sectors = bdev->bd_part->nr_sects;
1546 sector_t sector = 0;
1547 struct blk_zone *zones;
1548 unsigned int i, nr_zones;
1549 unsigned int n = 0;
1550 int err = -EIO;
1551
1552 if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1553 return 0;
1554
1555 if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1556 SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
1557 return -EINVAL;
1558 sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
1559 if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1560 __ilog2_u32(sbi->blocks_per_blkz))
1561 return -EINVAL;
1562 sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1563 FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1564 sbi->log_blocks_per_blkz;
1565 if (nr_sectors & (bdev_zone_size(bdev) - 1))
1566 FDEV(devi).nr_blkz++;
1567
1568 FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1569 if (!FDEV(devi).blkz_type)
1570 return -ENOMEM;
1571
1572#define F2FS_REPORT_NR_ZONES 4096
1573
1574 zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1575 GFP_KERNEL);
1576 if (!zones)
1577 return -ENOMEM;
1578
1579 /* Get block zones type */
1580 while (zones && sector < nr_sectors) {
1581
1582 nr_zones = F2FS_REPORT_NR_ZONES;
1583 err = blkdev_report_zones(bdev, sector,
1584 zones, &nr_zones,
1585 GFP_KERNEL);
1586 if (err)
1587 break;
1588 if (!nr_zones) {
1589 err = -EIO;
1590 break;
1591 }
1592
1593 for (i = 0; i < nr_zones; i++) {
1594 FDEV(devi).blkz_type[n] = zones[i].type;
1595 sector += zones[i].len;
1596 n++;
1597 }
1598 }
1599
1600 kfree(zones);
1601
1602 return err;
1603}
1604#endif
1605
1505/* 1606/*
1506 * Read f2fs raw super block. 1607 * Read f2fs raw super block.
1507 * Because we have two copies of super block, so read both of them 1608 * Because we have two copies of super block, so read both of them
@@ -1594,6 +1695,77 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1594 return err; 1695 return err;
1595} 1696}
1596 1697
1698static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1699{
1700 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1701 int i;
1702
1703 for (i = 0; i < MAX_DEVICES; i++) {
1704 if (!RDEV(i).path[0])
1705 return 0;
1706
1707 if (i == 0) {
1708 sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
1709 MAX_DEVICES, GFP_KERNEL);
1710 if (!sbi->devs)
1711 return -ENOMEM;
1712 }
1713
1714 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1715 FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
1716 if (i == 0) {
1717 FDEV(i).start_blk = 0;
1718 FDEV(i).end_blk = FDEV(i).start_blk +
1719 (FDEV(i).total_segments <<
1720 sbi->log_blocks_per_seg) - 1 +
1721 le32_to_cpu(raw_super->segment0_blkaddr);
1722 } else {
1723 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1724 FDEV(i).end_blk = FDEV(i).start_blk +
1725 (FDEV(i).total_segments <<
1726 sbi->log_blocks_per_seg) - 1;
1727 }
1728
1729 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1730 sbi->sb->s_mode, sbi->sb->s_type);
1731 if (IS_ERR(FDEV(i).bdev))
1732 return PTR_ERR(FDEV(i).bdev);
1733
1734 /* to release errored devices */
1735 sbi->s_ndevs = i + 1;
1736
1737#ifdef CONFIG_BLK_DEV_ZONED
1738 if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1739 !f2fs_sb_mounted_blkzoned(sbi->sb)) {
1740 f2fs_msg(sbi->sb, KERN_ERR,
1741 "Zoned block device feature not enabled\n");
1742 return -EINVAL;
1743 }
1744 if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1745 if (init_blkz_info(sbi, i)) {
1746 f2fs_msg(sbi->sb, KERN_ERR,
1747 "Failed to initialize F2FS blkzone information");
1748 return -EINVAL;
1749 }
1750 f2fs_msg(sbi->sb, KERN_INFO,
1751 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1752 i, FDEV(i).path,
1753 FDEV(i).total_segments,
1754 FDEV(i).start_blk, FDEV(i).end_blk,
1755 bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1756 "Host-aware" : "Host-managed");
1757 continue;
1758 }
1759#endif
1760 f2fs_msg(sbi->sb, KERN_INFO,
1761 "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1762 i, FDEV(i).path,
1763 FDEV(i).total_segments,
1764 FDEV(i).start_blk, FDEV(i).end_blk);
1765 }
1766 return 0;
1767}
1768
1597static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 1769static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1598{ 1770{
1599 struct f2fs_sb_info *sbi; 1771 struct f2fs_sb_info *sbi;
@@ -1641,6 +1813,18 @@ try_onemore:
1641 sb->s_fs_info = sbi; 1813 sb->s_fs_info = sbi;
1642 sbi->raw_super = raw_super; 1814 sbi->raw_super = raw_super;
1643 1815
1816 /*
1817 * The BLKZONED feature indicates that the drive was formatted with
1818 * zone alignment optimization. This is optional for host-aware
1819 * devices, but mandatory for host-managed zoned block devices.
1820 */
1821#ifndef CONFIG_BLK_DEV_ZONED
1822 if (f2fs_sb_mounted_blkzoned(sb)) {
1823 f2fs_msg(sb, KERN_ERR,
1824 "Zoned block device support is not enabled\n");
1825 goto free_sb_buf;
1826 }
1827#endif
1644 default_options(sbi); 1828 default_options(sbi);
1645 /* parse mount options */ 1829 /* parse mount options */
1646 options = kstrdup((const char *)data, GFP_KERNEL); 1830 options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1710,6 +1894,13 @@ try_onemore:
1710 goto free_meta_inode; 1894 goto free_meta_inode;
1711 } 1895 }
1712 1896
1897 /* Initialize device list */
1898 err = f2fs_scan_devices(sbi);
1899 if (err) {
1900 f2fs_msg(sb, KERN_ERR, "Failed to find devices");
1901 goto free_devices;
1902 }
1903
1713 sbi->total_valid_node_count = 1904 sbi->total_valid_node_count =
1714 le32_to_cpu(sbi->ckpt->valid_node_count); 1905 le32_to_cpu(sbi->ckpt->valid_node_count);
1715 percpu_counter_set(&sbi->total_valid_inode_count, 1906 percpu_counter_set(&sbi->total_valid_inode_count,
@@ -1893,12 +2084,21 @@ free_node_inode:
1893 mutex_lock(&sbi->umount_mutex); 2084 mutex_lock(&sbi->umount_mutex);
1894 release_ino_entry(sbi, true); 2085 release_ino_entry(sbi, true);
1895 f2fs_leave_shrinker(sbi); 2086 f2fs_leave_shrinker(sbi);
2087 /*
2088 * Some dirty meta pages can be produced by recover_orphan_inodes()
2089 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2090 * followed by write_checkpoint() through f2fs_write_node_pages(), which
2091 * falls into an infinite loop in sync_meta_pages().
2092 */
2093 truncate_inode_pages_final(META_MAPPING(sbi));
1896 iput(sbi->node_inode); 2094 iput(sbi->node_inode);
1897 mutex_unlock(&sbi->umount_mutex); 2095 mutex_unlock(&sbi->umount_mutex);
1898free_nm: 2096free_nm:
1899 destroy_node_manager(sbi); 2097 destroy_node_manager(sbi);
1900free_sm: 2098free_sm:
1901 destroy_segment_manager(sbi); 2099 destroy_segment_manager(sbi);
2100free_devices:
2101 destroy_device_list(sbi);
1902 kfree(sbi->ckpt); 2102 kfree(sbi->ckpt);
1903free_meta_inode: 2103free_meta_inode:
1904 make_bad_inode(sbi->meta_inode); 2104 make_bad_inode(sbi->meta_inode);
@@ -2044,3 +2244,4 @@ module_exit(exit_f2fs_fs)
2044MODULE_AUTHOR("Samsung Electronics's Praesto Team"); 2244MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2045MODULE_DESCRIPTION("Flash Friendly File System"); 2245MODULE_DESCRIPTION("Flash Friendly File System");
2046MODULE_LICENSE("GPL"); 2246MODULE_LICENSE("GPL");
2247
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 3e1c0280f866..c47ce2f330a1 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
106 return -EINVAL; 106 return -EINVAL;
107 107
108 F2FS_I(inode)->i_advise |= *(char *)value; 108 F2FS_I(inode)->i_advise |= *(char *)value;
109 f2fs_mark_inode_dirty_sync(inode); 109 f2fs_mark_inode_dirty_sync(inode, true);
110 return 0; 110 return 0;
111} 111}
112 112
@@ -554,7 +554,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
554 if (index == F2FS_XATTR_INDEX_ENCRYPTION && 554 if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
555 !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) 555 !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
556 f2fs_set_encrypted_inode(inode); 556 f2fs_set_encrypted_inode(inode);
557 f2fs_mark_inode_dirty_sync(inode); 557 f2fs_mark_inode_dirty_sync(inode, true);
558 if (!error && S_ISDIR(inode->i_mode)) 558 if (!error && S_ISDIR(inode->i_mode))
559 set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); 559 set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
560exit: 560exit:
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 422630b8e588..cea41a124a80 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -52,10 +52,17 @@
52 52
53#define VERSION_LEN 256 53#define VERSION_LEN 256
54#define MAX_VOLUME_NAME 512 54#define MAX_VOLUME_NAME 512
55#define MAX_PATH_LEN 64
56#define MAX_DEVICES 8
55 57
56/* 58/*
57 * For superblock 59 * For superblock
58 */ 60 */
61struct f2fs_device {
62 __u8 path[MAX_PATH_LEN];
63 __le32 total_segments;
64} __packed;
65
59struct f2fs_super_block { 66struct f2fs_super_block {
60 __le32 magic; /* Magic Number */ 67 __le32 magic; /* Magic Number */
61 __le16 major_ver; /* Major Version */ 68 __le16 major_ver; /* Major Version */
@@ -94,7 +101,8 @@ struct f2fs_super_block {
94 __le32 feature; /* defined features */ 101 __le32 feature; /* defined features */
95 __u8 encryption_level; /* versioning level for encryption */ 102 __u8 encryption_level; /* versioning level for encryption */
96 __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ 103 __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
97 __u8 reserved[871]; /* valid reserved region */ 104 struct f2fs_device devs[MAX_DEVICES]; /* device list */
105 __u8 reserved[327]; /* valid reserved region */
98} __packed; 106} __packed;
99 107
100/* 108/*
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 5da2c829a718..01b3c9869a0d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1111,6 +1111,27 @@ TRACE_EVENT(f2fs_issue_discard,
1111 (unsigned long long)__entry->blklen) 1111 (unsigned long long)__entry->blklen)
1112); 1112);
1113 1113
1114TRACE_EVENT(f2fs_issue_reset_zone,
1115
1116 TP_PROTO(struct super_block *sb, block_t blkstart),
1117
1118 TP_ARGS(sb, blkstart),
1119
1120 TP_STRUCT__entry(
1121 __field(dev_t, dev)
1122 __field(block_t, blkstart)
1123 ),
1124
1125 TP_fast_assign(
1126 __entry->dev = sb->s_dev;
1127 __entry->blkstart = blkstart;
1128 ),
1129
1130 TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
1131 show_dev(__entry),
1132 (unsigned long long)__entry->blkstart)
1133);
1134
1114TRACE_EVENT(f2fs_issue_flush, 1135TRACE_EVENT(f2fs_issue_flush,
1115 1136
1116 TP_PROTO(struct super_block *sb, unsigned int nobarrier, 1137 TP_PROTO(struct super_block *sb, unsigned int nobarrier,