aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 13:55:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 13:55:36 -0400
commit3021112598d2b722eee54d8a662fea2089abbdbc (patch)
tree640c5f7b1d8ece378e3e67bd3c401c80b40ecea8
parent0af9fb63915cf5ebb47b5c9ff16526b47545baf5 (diff)
parent48b230a583965d33c32b4e3c29a1e5e15d7e55de (diff)
Merge tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch-set includes the following major enhancement patches. - introduce large directory support - introduce f2fs_issue_flush to merge redundant flush commands - merge write IOs as much as possible aligned to the segment - add sysfs entries to tune the f2fs configuration - use radix_tree for the free_nid_list to reduce in-memory operations - remove costly bit operations in f2fs_find_entry - enhance the readahead flow for CP/NAT/SIT/SSA blocks The other bug fixes are as follows: - recover xattr node blocks correctly after sudden-power-cut - fix to calculate the maximum number of node ids - enhance to handle many error cases And, there are a bunch of cleanups" * tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (62 commits) f2fs: fix wrong statistics of inline data f2fs: check the acl's validity before setting f2fs: introduce f2fs_issue_flush to avoid redundant flush issue f2fs: fix to cover io->bio with io_rwsem f2fs: fix error path when fail to read inline data f2fs: use list_for_each_entry{_safe} for simplyfying code f2fs: avoid free slab cache under spinlock f2fs: avoid unneeded lookup when xattr name length is too long f2fs: avoid unnecessary bio submit when wait page writeback f2fs: return -EIO when node id is not matched f2fs: avoid RECLAIM_FS-ON-W warning f2fs: skip unnecessary node writes during fsync f2fs: introduce fi->i_sem to protect fi's info f2fs: change reclaim rate in percentage f2fs: add missing documentation for dir_level f2fs: remove unnecessary threshold f2fs: throttle the memory footprint with a sysfs entry f2fs: avoid to drop nat entries due to the negative nr_shrink f2fs: call f2fs_wait_on_page_writeback instead of native function f2fs: introduce nr_pages_to_write for segment alignment ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs12
-rw-r--r--Documentation/filesystems/f2fs.txt29
-rw-r--r--fs/f2fs/acl.c8
-rw-r--r--fs/f2fs/checkpoint.c208
-rw-r--r--fs/f2fs/data.c106
-rw-r--r--fs/f2fs/debug.c12
-rw-r--r--fs/f2fs/dir.c85
-rw-r--r--fs/f2fs/f2fs.h105
-rw-r--r--fs/f2fs/file.c31
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c27
-rw-r--r--fs/f2fs/namei.c9
-rw-r--r--fs/f2fs/node.c334
-rw-r--r--fs/f2fs/node.h25
-rw-r--r--fs/f2fs/recovery.c37
-rw-r--r--fs/f2fs/segment.c222
-rw-r--r--fs/f2fs/segment.h75
-rw-r--r--fs/f2fs/super.c97
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--include/linux/f2fs_fs.h2
21 files changed, 939 insertions, 512 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 32b0809203dd..62dd72522d6e 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -55,3 +55,15 @@ Date: January 2014
55Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> 55Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
56Description: 56Description:
57 Controls the number of trials to find a victim segment. 57 Controls the number of trials to find a victim segment.
58
59What: /sys/fs/f2fs/<disk>/dir_level
60Date: March 2014
61Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
62Description:
63 Controls the directory level for large directory.
64
65What: /sys/fs/f2fs/<disk>/ram_thresh
66Date: March 2014
67Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
68Description:
69 Controls the memory footprint used by f2fs.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index b8d284975f0f..25311e113e75 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs
122inline_xattr Enable the inline xattrs feature. 122inline_xattr Enable the inline xattrs feature.
123inline_data Enable the inline data feature: New created small(<~3.4k) 123inline_data Enable the inline data feature: New created small(<~3.4k)
124 files can be written into inode block. 124 files can be written into inode block.
125flush_merge Merge concurrent cache_flush commands as much as possible
126 to eliminate redundant command issues. If the underlying
127 device handles the cache_flush command relatively slowly,
128 recommend to enable this option.
125 129
126================================================================================ 130================================================================================
127DEBUGFS ENTRIES 131DEBUGFS ENTRIES
@@ -169,9 +173,11 @@ Files in /sys/fs/f2fs/<devname>
169 173
170 reclaim_segments This parameter controls the number of prefree 174 reclaim_segments This parameter controls the number of prefree
171 segments to be reclaimed. If the number of prefree 175 segments to be reclaimed. If the number of prefree
172 segments is larger than this number, f2fs tries to 176 segments is larger than the number of segments
173 conduct checkpoint to reclaim the prefree segments 177 in the proportion to the percentage over total
174 to free segments. By default, 100 segments, 200MB. 178 volume size, f2fs tries to conduct checkpoint to
179 reclaim the prefree segments to free segments.
180 By default, 5% over total # of segments.
175 181
176 max_small_discards This parameter controls the number of discard 182 max_small_discards This parameter controls the number of discard
177 commands that consist small blocks less than 2MB. 183 commands that consist small blocks less than 2MB.
@@ -195,6 +201,17 @@ Files in /sys/fs/f2fs/<devname>
195 cleaning operations. The default value is 4096 201 cleaning operations. The default value is 4096
196 which covers 8GB block address range. 202 which covers 8GB block address range.
197 203
204 dir_level This parameter controls the directory level to
205 support large directory. If a directory has a
206 number of files, it can reduce the file lookup
207 latency by increasing this dir_level value.
208 Otherwise, it needs to decrease this value to
209 reduce the space overhead. The default value is 0.
210
211 ram_thresh This parameter controls the memory footprint used
212 by free nids and cached nat entries. By default,
213 10 is set, which indicates 10 MB / 1 GB RAM.
214
198================================================================================ 215================================================================================
199USAGE 216USAGE
200================================================================================ 217================================================================================
@@ -444,9 +461,11 @@ The number of blocks and buckets are determined by,
444 # of blocks in level #n = | 461 # of blocks in level #n = |
445 `- 4, Otherwise 462 `- 4, Otherwise
446 463
447 ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, 464 ,- 2^ (n + dir_level),
465 | if n < MAX_DIR_HASH_DEPTH / 2,
448 # of buckets in level #n = | 466 # of buckets in level #n = |
449 `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise 467 `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
468 Otherwise
450 469
451When F2FS finds a file name in a directory, at first a hash value of the file 470When F2FS finds a file name in a directory, at first a hash value of the file
452name is calculated. Then, F2FS scans the hash table in level #0 to find the 471name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4cb8c4b..e93e4ec7d165 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
174 174
175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0); 175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
176 if (retval > 0) { 176 if (retval > 0) {
177 value = kmalloc(retval, GFP_KERNEL); 177 value = kmalloc(retval, GFP_F2FS_ZERO);
178 if (!value) 178 if (!value)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 retval = f2fs_getxattr(inode, name_index, "", value, retval); 180 retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
206 switch (type) { 212 switch (type) {
207 case ACL_TYPE_ACCESS: 213 case ACL_TYPE_ACCESS:
208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 293d0486a40f..4aa521aa9bc3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
33 struct address_space *mapping = META_MAPPING(sbi); 33 struct address_space *mapping = META_MAPPING(sbi);
34 struct page *page = NULL; 34 struct page *page = NULL;
35repeat: 35repeat:
36 page = grab_cache_page(mapping, index); 36 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
37 if (!page) { 37 if (!page) {
38 cond_resched(); 38 cond_resched();
39 goto repeat; 39 goto repeat;
40 } 40 }
41 41
42 /* We wait writeback only inside grab_meta_page() */
43 wait_on_page_writeback(page);
44 SetPageUptodate(page); 42 SetPageUptodate(page);
45 return page; 43 return page;
46} 44}
@@ -75,23 +73,102 @@ out:
75 return page; 73 return page;
76} 74}
77 75
76inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
77{
78 switch (type) {
79 case META_NAT:
80 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
81 case META_SIT:
82 return SIT_BLK_CNT(sbi);
83 case META_SSA:
84 case META_CP:
85 return 0;
86 default:
87 BUG();
88 }
89}
90
91/*
92 * Readahead CP/NAT/SIT/SSA pages
93 */
94int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
95{
96 block_t prev_blk_addr = 0;
97 struct page *page;
98 int blkno = start;
99 int max_blks = get_max_meta_blks(sbi, type);
100
101 struct f2fs_io_info fio = {
102 .type = META,
103 .rw = READ_SYNC | REQ_META | REQ_PRIO
104 };
105
106 for (; nrpages-- > 0; blkno++) {
107 block_t blk_addr;
108
109 switch (type) {
110 case META_NAT:
111 /* get nat block addr */
112 if (unlikely(blkno >= max_blks))
113 blkno = 0;
114 blk_addr = current_nat_addr(sbi,
115 blkno * NAT_ENTRY_PER_BLOCK);
116 break;
117 case META_SIT:
118 /* get sit block addr */
119 if (unlikely(blkno >= max_blks))
120 goto out;
121 blk_addr = current_sit_addr(sbi,
122 blkno * SIT_ENTRY_PER_BLOCK);
123 if (blkno != start && prev_blk_addr + 1 != blk_addr)
124 goto out;
125 prev_blk_addr = blk_addr;
126 break;
127 case META_SSA:
128 case META_CP:
129 /* get ssa/cp block addr */
130 blk_addr = blkno;
131 break;
132 default:
133 BUG();
134 }
135
136 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
137 if (!page)
138 continue;
139 if (PageUptodate(page)) {
140 mark_page_accessed(page);
141 f2fs_put_page(page, 1);
142 continue;
143 }
144
145 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
146 mark_page_accessed(page);
147 f2fs_put_page(page, 0);
148 }
149out:
150 f2fs_submit_merged_bio(sbi, META, READ);
151 return blkno - start;
152}
153
78static int f2fs_write_meta_page(struct page *page, 154static int f2fs_write_meta_page(struct page *page,
79 struct writeback_control *wbc) 155 struct writeback_control *wbc)
80{ 156{
81 struct inode *inode = page->mapping->host; 157 struct inode *inode = page->mapping->host;
82 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 158 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
83 159
84 /* Should not write any meta pages, if any IO error was occurred */ 160 if (unlikely(sbi->por_doing))
85 if (unlikely(sbi->por_doing ||
86 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
87 goto redirty_out; 161 goto redirty_out;
88
89 if (wbc->for_reclaim) 162 if (wbc->for_reclaim)
90 goto redirty_out; 163 goto redirty_out;
91 164
92 wait_on_page_writeback(page); 165 /* Should not write any meta pages, if any IO error was occurred */
166 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
167 goto no_write;
93 168
169 f2fs_wait_on_page_writeback(page, META);
94 write_meta_page(sbi, page); 170 write_meta_page(sbi, page);
171no_write:
95 dec_page_count(sbi, F2FS_DIRTY_META); 172 dec_page_count(sbi, F2FS_DIRTY_META);
96 unlock_page(page); 173 unlock_page(page);
97 return 0; 174 return 0;
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
99redirty_out: 176redirty_out:
100 dec_page_count(sbi, F2FS_DIRTY_META); 177 dec_page_count(sbi, F2FS_DIRTY_META);
101 wbc->pages_skipped++; 178 wbc->pages_skipped++;
179 account_page_redirty(page);
102 set_page_dirty(page); 180 set_page_dirty(page);
103 return AOP_WRITEPAGE_ACTIVATE; 181 return AOP_WRITEPAGE_ACTIVATE;
104} 182}
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
107 struct writeback_control *wbc) 185 struct writeback_control *wbc)
108{ 186{
109 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 187 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
110 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 188 long diff, written;
111 long written;
112
113 if (wbc->for_kupdate)
114 return 0;
115 189
116 /* collect a number of dirty meta pages and write together */ 190 /* collect a number of dirty meta pages and write together */
117 if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) 191 if (wbc->for_kupdate ||
118 return 0; 192 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
193 goto skip_write;
119 194
120 /* if mounting is failed, skip writing node pages */ 195 /* if mounting is failed, skip writing node pages */
121 mutex_lock(&sbi->cp_mutex); 196 mutex_lock(&sbi->cp_mutex);
122 written = sync_meta_pages(sbi, META, nrpages); 197 diff = nr_pages_to_write(sbi, META, wbc);
198 written = sync_meta_pages(sbi, META, wbc->nr_to_write);
123 mutex_unlock(&sbi->cp_mutex); 199 mutex_unlock(&sbi->cp_mutex);
124 wbc->nr_to_write -= written; 200 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
201 return 0;
202
203skip_write:
204 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
125 return 0; 205 return 0;
126} 206}
127 207
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
148 228
149 for (i = 0; i < nr_pages; i++) { 229 for (i = 0; i < nr_pages; i++) {
150 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
231
151 lock_page(page); 232 lock_page(page);
152 f2fs_bug_on(page->mapping != mapping); 233
153 f2fs_bug_on(!PageDirty(page)); 234 if (unlikely(page->mapping != mapping)) {
154 clear_page_dirty_for_io(page); 235continue_unlock:
236 unlock_page(page);
237 continue;
238 }
239 if (!PageDirty(page)) {
240 /* someone wrote it for us */
241 goto continue_unlock;
242 }
243
244 if (!clear_page_dirty_for_io(page))
245 goto continue_unlock;
246
155 if (f2fs_write_meta_page(page, &wbc)) { 247 if (f2fs_write_meta_page(page, &wbc)) {
156 unlock_page(page); 248 unlock_page(page);
157 break; 249 break;
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
216 308
217void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 309void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
218{ 310{
219 struct list_head *head, *this; 311 struct list_head *head;
220 struct orphan_inode_entry *new = NULL, *orphan = NULL; 312 struct orphan_inode_entry *new, *orphan;
221 313
222 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 314 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
223 new->ino = ino; 315 new->ino = ino;
224 316
225 spin_lock(&sbi->orphan_inode_lock); 317 spin_lock(&sbi->orphan_inode_lock);
226 head = &sbi->orphan_inode_list; 318 head = &sbi->orphan_inode_list;
227 list_for_each(this, head) { 319 list_for_each_entry(orphan, head, list) {
228 orphan = list_entry(this, struct orphan_inode_entry, list);
229 if (orphan->ino == ino) { 320 if (orphan->ino == ino) {
230 spin_unlock(&sbi->orphan_inode_lock); 321 spin_unlock(&sbi->orphan_inode_lock);
231 kmem_cache_free(orphan_entry_slab, new); 322 kmem_cache_free(orphan_entry_slab, new);
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
234 325
235 if (orphan->ino > ino) 326 if (orphan->ino > ino)
236 break; 327 break;
237 orphan = NULL;
238 } 328 }
239 329
240 /* add new_oentry into list which is sorted by inode number */ 330 /* add new orphan entry into list which is sorted by inode number */
241 if (orphan) 331 list_add_tail(&new->list, &orphan->list);
242 list_add(&new->list, this->prev);
243 else
244 list_add_tail(&new->list, head);
245 spin_unlock(&sbi->orphan_inode_lock); 332 spin_unlock(&sbi->orphan_inode_lock);
246} 333}
247 334
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
255 list_for_each_entry(orphan, head, list) { 342 list_for_each_entry(orphan, head, list) {
256 if (orphan->ino == ino) { 343 if (orphan->ino == ino) {
257 list_del(&orphan->list); 344 list_del(&orphan->list);
258 kmem_cache_free(orphan_entry_slab, orphan);
259 f2fs_bug_on(sbi->n_orphans == 0); 345 f2fs_bug_on(sbi->n_orphans == 0);
260 sbi->n_orphans--; 346 sbi->n_orphans--;
261 break; 347 spin_unlock(&sbi->orphan_inode_lock);
348 kmem_cache_free(orphan_entry_slab, orphan);
349 return;
262 } 350 }
263 } 351 }
264 spin_unlock(&sbi->orphan_inode_lock); 352 spin_unlock(&sbi->orphan_inode_lock);
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
285 start_blk = __start_cp_addr(sbi) + 1; 373 start_blk = __start_cp_addr(sbi) + 1;
286 orphan_blkaddr = __start_sum_addr(sbi) - 1; 374 orphan_blkaddr = __start_sum_addr(sbi) - 1;
287 375
376 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
377
288 for (i = 0; i < orphan_blkaddr; i++) { 378 for (i = 0; i < orphan_blkaddr; i++) {
289 struct page *page = get_meta_page(sbi, start_blk + i); 379 struct page *page = get_meta_page(sbi, start_blk + i);
290 struct f2fs_orphan_block *orphan_blk; 380 struct f2fs_orphan_block *orphan_blk;
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
466{ 556{
467 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
468 struct list_head *head = &sbi->dir_inode_list; 558 struct list_head *head = &sbi->dir_inode_list;
469 struct list_head *this; 559 struct dir_inode_entry *entry;
470 560
471 list_for_each(this, head) { 561 list_for_each_entry(entry, head, list)
472 struct dir_inode_entry *entry;
473 entry = list_entry(this, struct dir_inode_entry, list);
474 if (unlikely(entry->inode == inode)) 562 if (unlikely(entry->inode == inode))
475 return -EEXIST; 563 return -EEXIST;
476 } 564
477 list_add_tail(&new->list, head); 565 list_add_tail(&new->list, head);
478 stat_inc_dirty_dir(sbi); 566 stat_inc_dirty_dir(sbi);
479 return 0; 567 return 0;
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
483{ 571{
484 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 572 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
485 struct dir_inode_entry *new; 573 struct dir_inode_entry *new;
574 int ret = 0;
486 575
487 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
488 return; 577 return;
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
492 INIT_LIST_HEAD(&new->list); 581 INIT_LIST_HEAD(&new->list);
493 582
494 spin_lock(&sbi->dir_inode_lock); 583 spin_lock(&sbi->dir_inode_lock);
495 if (__add_dirty_inode(inode, new)) 584 ret = __add_dirty_inode(inode, new);
496 kmem_cache_free(inode_entry_slab, new);
497
498 inc_page_count(sbi, F2FS_DIRTY_DENTS);
499 inode_inc_dirty_dents(inode); 585 inode_inc_dirty_dents(inode);
500 SetPagePrivate(page); 586 SetPagePrivate(page);
501 spin_unlock(&sbi->dir_inode_lock); 587 spin_unlock(&sbi->dir_inode_lock);
588
589 if (ret)
590 kmem_cache_free(inode_entry_slab, new);
502} 591}
503 592
504void add_dirty_dir_inode(struct inode *inode) 593void add_dirty_dir_inode(struct inode *inode)
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
506 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 595 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
507 struct dir_inode_entry *new = 596 struct dir_inode_entry *new =
508 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 597 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
598 int ret = 0;
509 599
510 new->inode = inode; 600 new->inode = inode;
511 INIT_LIST_HEAD(&new->list); 601 INIT_LIST_HEAD(&new->list);
512 602
513 spin_lock(&sbi->dir_inode_lock); 603 spin_lock(&sbi->dir_inode_lock);
514 if (__add_dirty_inode(inode, new)) 604 ret = __add_dirty_inode(inode, new);
515 kmem_cache_free(inode_entry_slab, new);
516 spin_unlock(&sbi->dir_inode_lock); 605 spin_unlock(&sbi->dir_inode_lock);
606
607 if (ret)
608 kmem_cache_free(inode_entry_slab, new);
517} 609}
518 610
519void remove_dirty_dir_inode(struct inode *inode) 611void remove_dirty_dir_inode(struct inode *inode)
520{ 612{
521 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 613 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
522 614 struct list_head *head;
523 struct list_head *this, *head; 615 struct dir_inode_entry *entry;
524 616
525 if (!S_ISDIR(inode->i_mode)) 617 if (!S_ISDIR(inode->i_mode))
526 return; 618 return;
527 619
528 spin_lock(&sbi->dir_inode_lock); 620 spin_lock(&sbi->dir_inode_lock);
529 if (atomic_read(&F2FS_I(inode)->dirty_dents)) { 621 if (get_dirty_dents(inode)) {
530 spin_unlock(&sbi->dir_inode_lock); 622 spin_unlock(&sbi->dir_inode_lock);
531 return; 623 return;
532 } 624 }
533 625
534 head = &sbi->dir_inode_list; 626 head = &sbi->dir_inode_list;
535 list_for_each(this, head) { 627 list_for_each_entry(entry, head, list) {
536 struct dir_inode_entry *entry;
537 entry = list_entry(this, struct dir_inode_entry, list);
538 if (entry->inode == inode) { 628 if (entry->inode == inode) {
539 list_del(&entry->list); 629 list_del(&entry->list);
540 kmem_cache_free(inode_entry_slab, entry);
541 stat_dec_dirty_dir(sbi); 630 stat_dec_dirty_dir(sbi);
542 break; 631 spin_unlock(&sbi->dir_inode_lock);
632 kmem_cache_free(inode_entry_slab, entry);
633 goto done;
543 } 634 }
544 } 635 }
545 spin_unlock(&sbi->dir_inode_lock); 636 spin_unlock(&sbi->dir_inode_lock);
546 637
638done:
547 /* Only from the recovery routine */ 639 /* Only from the recovery routine */
548 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 640 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
549 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 641 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
554struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) 646struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
555{ 647{
556 648
557 struct list_head *this, *head; 649 struct list_head *head;
558 struct inode *inode = NULL; 650 struct inode *inode = NULL;
651 struct dir_inode_entry *entry;
559 652
560 spin_lock(&sbi->dir_inode_lock); 653 spin_lock(&sbi->dir_inode_lock);
561 654
562 head = &sbi->dir_inode_list; 655 head = &sbi->dir_inode_list;
563 list_for_each(this, head) { 656 list_for_each_entry(entry, head, list) {
564 struct dir_inode_entry *entry;
565 entry = list_entry(this, struct dir_inode_entry, list);
566 if (entry->inode->i_ino == ino) { 657 if (entry->inode->i_ino == ino) {
567 inode = entry->inode; 658 inode = entry->inode;
568 break; 659 break;
@@ -589,7 +680,7 @@ retry:
589 inode = igrab(entry->inode); 680 inode = igrab(entry->inode);
590 spin_unlock(&sbi->dir_inode_lock); 681 spin_unlock(&sbi->dir_inode_lock);
591 if (inode) { 682 if (inode) {
592 filemap_flush(inode->i_mapping); 683 filemap_fdatawrite(inode->i_mapping);
593 iput(inode); 684 iput(inode);
594 } else { 685 } else {
595 /* 686 /*
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
824 unblock_operations(sbi); 915 unblock_operations(sbi);
825 mutex_unlock(&sbi->cp_mutex); 916 mutex_unlock(&sbi->cp_mutex);
826 917
918 stat_inc_cp_count(sbi->stat_info);
827 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 919 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
828} 920}
829 921
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
845int __init create_checkpoint_caches(void) 937int __init create_checkpoint_caches(void)
846{ 938{
847 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 939 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
848 sizeof(struct orphan_inode_entry), NULL); 940 sizeof(struct orphan_inode_entry));
849 if (!orphan_entry_slab) 941 if (!orphan_entry_slab)
850 return -ENOMEM; 942 return -ENOMEM;
851 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 943 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
852 sizeof(struct dir_inode_entry), NULL); 944 sizeof(struct dir_inode_entry));
853 if (!inode_entry_slab) { 945 if (!inode_entry_slab) {
854 kmem_cache_destroy(orphan_entry_slab); 946 kmem_cache_destroy(orphan_entry_slab);
855 return -ENOMEM; 947 return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2261ccdd0b5f..45abd60e2bff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
45 45
46static void f2fs_write_end_io(struct bio *bio, int err) 46static void f2fs_write_end_io(struct bio *bio, int err)
47{ 47{
48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb); 48 struct f2fs_sb_info *sbi = bio->bi_private;
49 struct bio_vec *bvec; 49 struct bio_vec *bvec;
50 int i; 50 int i;
51 51
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
55 if (unlikely(err)) { 55 if (unlikely(err)) {
56 SetPageError(page); 56 SetPageError(page);
57 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
58 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 58 f2fs_stop_checkpoint(sbi);
59 sbi->sb->s_flags |= MS_RDONLY;
60 } 59 }
61 end_page_writeback(page); 60 end_page_writeback(page);
62 dec_page_count(sbi, F2FS_WRITEBACK); 61 dec_page_count(sbi, F2FS_WRITEBACK);
63 } 62 }
64 63
65 if (bio->bi_private) 64 if (sbi->wait_io) {
66 complete(bio->bi_private); 65 complete(sbi->wait_io);
66 sbi->wait_io = NULL;
67 }
67 68
68 if (!get_pages(sbi, F2FS_WRITEBACK) && 69 if (!get_pages(sbi, F2FS_WRITEBACK) &&
69 !list_empty(&sbi->cp_wait.task_list)) 70 !list_empty(&sbi->cp_wait.task_list))
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
86 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_bdev = sbi->sb->s_bdev;
87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 bio->bi_private = sbi;
89 91
90 return bio; 92 return bio;
91} 93}
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
113 */ 115 */
114 if (fio->type == META_FLUSH) { 116 if (fio->type == META_FLUSH) {
115 DECLARE_COMPLETION_ONSTACK(wait); 117 DECLARE_COMPLETION_ONSTACK(wait);
116 io->bio->bi_private = &wait; 118 io->sbi->wait_io = &wait;
117 submit_bio(rw, io->bio); 119 submit_bio(rw, io->bio);
118 wait_for_completion(&wait); 120 wait_for_completion(&wait);
119 } else { 121 } else {
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
132 134
133 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 135 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
134 136
135 mutex_lock(&io->io_mutex); 137 down_write(&io->io_rwsem);
136 138
137 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
138 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
140 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
141 } 143 }
142 __submit_merged_bio(io); 144 __submit_merged_bio(io);
143 mutex_unlock(&io->io_mutex); 145 up_write(&io->io_rwsem);
144} 146}
145 147
146/* 148/*
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
178 180
179 verify_block_addr(sbi, blk_addr); 181 verify_block_addr(sbi, blk_addr);
180 182
181 mutex_lock(&io->io_mutex); 183 down_write(&io->io_rwsem);
182 184
183 if (!is_read) 185 if (!is_read)
184 inc_page_count(sbi, F2FS_WRITEBACK); 186 inc_page_count(sbi, F2FS_WRITEBACK);
@@ -202,7 +204,7 @@ alloc_new:
202 204
203 io->last_block_in_bio = blk_addr; 205 io->last_block_in_bio = blk_addr;
204 206
205 mutex_unlock(&io->io_mutex); 207 up_write(&io->io_rwsem);
206 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 208 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
207} 209}
208 210
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
797 */ 799 */
798 offset = i_size & (PAGE_CACHE_SIZE - 1); 800 offset = i_size & (PAGE_CACHE_SIZE - 1);
799 if ((page->index >= end_index + 1) || !offset) { 801 if ((page->index >= end_index + 1) || !offset) {
800 if (S_ISDIR(inode->i_mode)) { 802 inode_dec_dirty_dents(inode);
801 dec_page_count(sbi, F2FS_DIRTY_DENTS);
802 inode_dec_dirty_dents(inode);
803 }
804 goto out; 803 goto out;
805 } 804 }
806 805
807 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 806 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
808write: 807write:
809 if (unlikely(sbi->por_doing)) { 808 if (unlikely(sbi->por_doing))
810 err = AOP_WRITEPAGE_ACTIVATE;
811 goto redirty_out; 809 goto redirty_out;
812 }
813 810
814 /* Dentry blocks are controlled by checkpoint */ 811 /* Dentry blocks are controlled by checkpoint */
815 if (S_ISDIR(inode->i_mode)) { 812 if (S_ISDIR(inode->i_mode)) {
816 dec_page_count(sbi, F2FS_DIRTY_DENTS);
817 inode_dec_dirty_dents(inode); 813 inode_dec_dirty_dents(inode);
818 err = do_write_data_page(page, &fio); 814 err = do_write_data_page(page, &fio);
819 } else { 815 goto done;
820 f2fs_lock_op(sbi); 816 }
821
822 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
823 err = f2fs_write_inline_data(inode, page, offset);
824 f2fs_unlock_op(sbi);
825 goto out;
826 } else {
827 err = do_write_data_page(page, &fio);
828 }
829 817
830 f2fs_unlock_op(sbi); 818 if (!wbc->for_reclaim)
831 need_balance_fs = true; 819 need_balance_fs = true;
832 } 820 else if (has_not_enough_free_secs(sbi, 0))
833 if (err == -ENOENT)
834 goto out;
835 else if (err)
836 goto redirty_out; 821 goto redirty_out;
837 822
838 if (wbc->for_reclaim) { 823 f2fs_lock_op(sbi);
839 f2fs_submit_merged_bio(sbi, DATA, WRITE); 824 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
840 need_balance_fs = false; 825 err = f2fs_write_inline_data(inode, page, offset);
841 } 826 else
827 err = do_write_data_page(page, &fio);
828 f2fs_unlock_op(sbi);
829done:
830 if (err && err != -ENOENT)
831 goto redirty_out;
842 832
843 clear_cold_data(page); 833 clear_cold_data(page);
844out: 834out:
@@ -849,12 +839,11 @@ out:
849 839
850redirty_out: 840redirty_out:
851 wbc->pages_skipped++; 841 wbc->pages_skipped++;
842 account_page_redirty(page);
852 set_page_dirty(page); 843 set_page_dirty(page);
853 return err; 844 return AOP_WRITEPAGE_ACTIVATE;
854} 845}
855 846
856#define MAX_DESIRED_PAGES_WP 4096
857
858static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, 847static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
859 void *data) 848 void *data)
860{ 849{
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
871 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 860 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
872 bool locked = false; 861 bool locked = false;
873 int ret; 862 int ret;
874 long excess_nrtw = 0, desired_nrtw; 863 long diff;
875 864
876 /* deal with chardevs and other special file */ 865 /* deal with chardevs and other special file */
877 if (!mapping->a_ops->writepage) 866 if (!mapping->a_ops->writepage)
878 return 0; 867 return 0;
879 868
880 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 869 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
881 desired_nrtw = MAX_DESIRED_PAGES_WP; 870 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
882 excess_nrtw = desired_nrtw - wbc->nr_to_write; 871 goto skip_write;
883 wbc->nr_to_write = desired_nrtw; 872
884 } 873 diff = nr_pages_to_write(sbi, DATA, wbc);
885 874
886 if (!S_ISDIR(inode->i_mode)) { 875 if (!S_ISDIR(inode->i_mode)) {
887 mutex_lock(&sbi->writepages); 876 mutex_lock(&sbi->writepages);
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
895 884
896 remove_dirty_dir_inode(inode); 885 remove_dirty_dir_inode(inode);
897 886
898 wbc->nr_to_write -= excess_nrtw; 887 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
899 return ret; 888 return ret;
889
890skip_write:
891 wbc->pages_skipped += get_dirty_dents(inode);
892 return 0;
900} 893}
901 894
902static int f2fs_write_begin(struct file *file, struct address_space *mapping, 895static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -949,13 +942,19 @@ inline_data:
949 if (dn.data_blkaddr == NEW_ADDR) { 942 if (dn.data_blkaddr == NEW_ADDR) {
950 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 943 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
951 } else { 944 } else {
952 if (f2fs_has_inline_data(inode)) 945 if (f2fs_has_inline_data(inode)) {
953 err = f2fs_read_inline_data(inode, page); 946 err = f2fs_read_inline_data(inode, page);
954 else 947 if (err) {
948 page_cache_release(page);
949 return err;
950 }
951 } else {
955 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 952 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
956 READ_SYNC); 953 READ_SYNC);
957 if (err) 954 if (err)
958 return err; 955 return err;
956 }
957
959 lock_page(page); 958 lock_page(page);
960 if (unlikely(!PageUptodate(page))) { 959 if (unlikely(!PageUptodate(page))) {
961 f2fs_put_page(page, 1); 960 f2fs_put_page(page, 1);
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1031 unsigned int length) 1030 unsigned int length)
1032{ 1031{
1033 struct inode *inode = page->mapping->host; 1032 struct inode *inode = page->mapping->host;
1034 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1033 if (PageDirty(page))
1035 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
1036 dec_page_count(sbi, F2FS_DIRTY_DENTS);
1037 inode_dec_dirty_dents(inode); 1034 inode_dec_dirty_dents(inode);
1038 }
1039 ClearPagePrivate(page); 1035 ClearPagePrivate(page);
1040} 1036}
1041 1037
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3de9d20d0c14..b52c12cf5873 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
86{ 86{
87 struct f2fs_stat_info *si = F2FS_STAT(sbi); 87 struct f2fs_stat_info *si = F2FS_STAT(sbi);
88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; 88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
89 struct sit_info *sit_i = SIT_I(sbi);
90 unsigned int segno, vblocks; 89 unsigned int segno, vblocks;
91 int ndirty = 0; 90 int ndirty = 0;
92 91
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
94 total_vblocks = 0; 93 total_vblocks = 0;
95 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
96 hblks_per_sec = blks_per_sec / 2; 95 hblks_per_sec = blks_per_sec / 2;
97 mutex_lock(&sit_i->sentry_lock);
98 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
99 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
100 dist = abs(vblocks - hblks_per_sec); 98 dist = abs(vblocks - hblks_per_sec);
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
105 ndirty++; 103 ndirty++;
106 } 104 }
107 } 105 }
108 mutex_unlock(&sit_i->sentry_lock);
109 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
110 si->bimodal = bimodal / dist; 107 si->bimodal = bimodal / dist;
111 if (si->dirty_count) 108 if (si->dirty_count)
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
236 si->dirty_count); 233 si->dirty_count);
237 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", 234 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
238 si->prefree_count, si->free_segs, si->free_secs); 235 si->prefree_count, si->free_segs, si->free_secs);
236 seq_printf(s, "CP calls: %d\n", si->cp_count);
239 seq_printf(s, "GC calls: %d (BG: %d)\n", 237 seq_printf(s, "GC calls: %d (BG: %d)\n",
240 si->call_count, si->bg_gc); 238 si->call_count, si->bg_gc);
241 seq_printf(s, " - data segments : %d\n", si->data_segs); 239 seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
252 si->ndirty_dent, si->ndirty_dirs); 250 si->ndirty_dent, si->ndirty_dirs);
253 seq_printf(s, " - meta: %4d in %4d\n", 251 seq_printf(s, " - meta: %4d in %4d\n",
254 si->ndirty_meta, si->meta_pages); 252 si->ndirty_meta, si->meta_pages);
255 seq_printf(s, " - NATs: %5d > %lu\n", 253 seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
256 si->nats, NM_WOUT_THRESHOLD); 254 si->nats, si->sits);
257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", 255 seq_printf(s, " - free_nids: %9d\n",
258 si->sits, si->fnids); 256 si->fnids);
259 seq_puts(s, "\nDistribution of User Blocks:"); 257 seq_puts(s, "\nDistribution of User Blocks:");
260 seq_puts(s, " [ valid | invalid | free ]\n"); 258 seq_puts(s, " [ valid | invalid | free ]\n");
261 seq_puts(s, " ["); 259 seq_puts(s, " [");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2b7c255bcbdf..972fd0ef230f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
21 >> PAGE_CACHE_SHIFT; 21 >> PAGE_CACHE_SHIFT;
22} 22}
23 23
24static unsigned int dir_buckets(unsigned int level) 24static unsigned int dir_buckets(unsigned int level, int dir_level)
25{ 25{
26 if (level < MAX_DIR_HASH_DEPTH / 2) 26 if (level < MAX_DIR_HASH_DEPTH / 2)
27 return 1 << level; 27 return 1 << (level + dir_level);
28 else 28 else
29 return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); 29 return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
30} 30}
31 31
32static unsigned int bucket_blocks(unsigned int level) 32static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 66}
67 67
68static unsigned long dir_block_index(unsigned int level, unsigned int idx) 68static unsigned long dir_block_index(unsigned int level,
69 int dir_level, unsigned int idx)
69{ 70{
70 unsigned long i; 71 unsigned long i;
71 unsigned long bidx = 0; 72 unsigned long bidx = 0;
72 73
73 for (i = 0; i < level; i++) 74 for (i = 0; i < level; i++)
74 bidx += dir_buckets(i) * bucket_blocks(i); 75 bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
75 bidx += idx * bucket_blocks(level); 76 bidx += idx * bucket_blocks(level);
76 return bidx; 77 return bidx;
77} 78}
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
94{ 95{
95 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
96 unsigned long bit_pos, end_pos, next_pos; 97 unsigned long bit_pos = 0;
97 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); 98 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
98 int slots; 99 const void *dentry_bits = &dentry_blk->dentry_bitmap;
100 int max_len = 0;
99 101
100 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
101 NR_DENTRY_IN_BLOCK, 0);
102 while (bit_pos < NR_DENTRY_IN_BLOCK) { 102 while (bit_pos < NR_DENTRY_IN_BLOCK) {
103 if (!test_bit_le(bit_pos, dentry_bits)) {
104 if (bit_pos == 0)
105 max_len = 1;
106 else if (!test_bit_le(bit_pos - 1, dentry_bits))
107 max_len++;
108 bit_pos++;
109 continue;
110 }
103 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
104 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
105
106 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name, namelen, namehash, de)) {
107 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
108 name, namelen)) { 114 name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
110 goto found; 116 goto found;
111 } 117 }
112 } 118 }
113 next_pos = bit_pos + slots; 119 if (max_len > *max_slots) {
114 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 120 *max_slots = max_len;
115 NR_DENTRY_IN_BLOCK, next_pos); 121 max_len = 0;
116 if (bit_pos >= NR_DENTRY_IN_BLOCK) 122 }
117 end_pos = NR_DENTRY_IN_BLOCK; 123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
118 else
119 end_pos = bit_pos;
120 if (*max_slots < end_pos - next_pos)
121 *max_slots = end_pos - next_pos;
122 } 124 }
123 125
124 de = NULL; 126 de = NULL;
125 kunmap(dentry_page); 127 kunmap(dentry_page);
126found: 128found:
129 if (max_len > *max_slots)
130 *max_slots = max_len;
127 return de; 131 return de;
128} 132}
129 133
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
141 145
142 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 146 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
143 147
144 nbucket = dir_buckets(level); 148 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
145 nblock = bucket_blocks(level); 149 nblock = bucket_blocks(level);
146 150
147 bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); 151 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
152 le32_to_cpu(namehash) % nbucket);
148 end_block = bidx + nblock; 153 end_block = bidx + nblock;
149 154
150 for (; bidx < end_block; bidx++) { 155 for (; bidx < end_block; bidx++) {
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
248 struct page *page, struct inode *inode) 253 struct page *page, struct inode *inode)
249{ 254{
250 lock_page(page); 255 lock_page(page);
251 wait_on_page_writeback(page); 256 f2fs_wait_on_page_writeback(page, DATA);
252 de->ino = cpu_to_le32(inode->i_ino); 257 de->ino = cpu_to_le32(inode->i_ino);
253 set_de_type(de, inode); 258 set_de_type(de, inode);
254 kunmap(page); 259 kunmap(page);
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
347 err = f2fs_init_security(inode, dir, name, page); 352 err = f2fs_init_security(inode, dir, name, page);
348 if (err) 353 if (err)
349 goto put_error; 354 goto put_error;
350
351 wait_on_page_writeback(page);
352 } else { 355 } else {
353 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 356 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
354 if (IS_ERR(page)) 357 if (IS_ERR(page))
355 return page; 358 return page;
356 359
357 wait_on_page_writeback(page);
358 set_cold_node(inode, page); 360 set_cold_node(inode, page);
359 } 361 }
360 362
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
372 374
373put_error: 375put_error:
374 f2fs_put_page(page, 1); 376 f2fs_put_page(page, 1);
377 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
378 truncate_inode_pages(&inode->i_data, 0);
379 truncate_blocks(inode, 0);
380 remove_dirty_dir_inode(inode);
375error: 381error:
376 remove_inode_page(inode); 382 remove_inode_page(inode);
377 return ERR_PTR(err); 383 return ERR_PTR(err);
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
395 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 401 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
396 } 402 }
397 403
398 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
399 update_inode_page(dir);
400
401 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 404 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
402 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 405 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
403} 406}
@@ -464,10 +467,11 @@ start:
464 if (level == current_depth) 467 if (level == current_depth)
465 ++current_depth; 468 ++current_depth;
466 469
467 nbucket = dir_buckets(level); 470 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
468 nblock = bucket_blocks(level); 471 nblock = bucket_blocks(level);
469 472
470 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); 473 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
474 (le32_to_cpu(dentry_hash) % nbucket));
471 475
472 for (block = bidx; block <= (bidx + nblock - 1); block++) { 476 for (block = bidx; block <= (bidx + nblock - 1); block++) {
473 dentry_page = get_new_data_page(dir, NULL, block, true); 477 dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -487,8 +491,9 @@ start:
487 ++level; 491 ++level;
488 goto start; 492 goto start;
489add_dentry: 493add_dentry:
490 wait_on_page_writeback(dentry_page); 494 f2fs_wait_on_page_writeback(dentry_page, DATA);
491 495
496 down_write(&F2FS_I(inode)->i_sem);
492 page = init_inode_metadata(inode, dir, name); 497 page = init_inode_metadata(inode, dir, name);
493 if (IS_ERR(page)) { 498 if (IS_ERR(page)) {
494 err = PTR_ERR(page); 499 err = PTR_ERR(page);
@@ -511,7 +516,12 @@ add_dentry:
511 516
512 update_parent_metadata(dir, inode, current_depth); 517 update_parent_metadata(dir, inode, current_depth);
513fail: 518fail:
514 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 519 up_write(&F2FS_I(inode)->i_sem);
520
521 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
522 update_inode_page(dir);
523 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
524 }
515 kunmap(dentry_page); 525 kunmap(dentry_page);
516 f2fs_put_page(dentry_page, 1); 526 f2fs_put_page(dentry_page, 1);
517 return err; 527 return err;
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
528 unsigned int bit_pos; 538 unsigned int bit_pos;
529 struct address_space *mapping = page->mapping; 539 struct address_space *mapping = page->mapping;
530 struct inode *dir = mapping->host; 540 struct inode *dir = mapping->host;
531 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
532 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 541 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
533 void *kaddr = page_address(page); 542 void *kaddr = page_address(page);
534 int i; 543 int i;
535 544
536 lock_page(page); 545 lock_page(page);
537 wait_on_page_writeback(page); 546 f2fs_wait_on_page_writeback(page, DATA);
538 547
539 dentry_blk = (struct f2fs_dentry_block *)kaddr; 548 dentry_blk = (struct f2fs_dentry_block *)kaddr;
540 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 549 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
551 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 560 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
552 561
553 if (inode) { 562 if (inode) {
563 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
564
565 down_write(&F2FS_I(inode)->i_sem);
566
554 if (S_ISDIR(inode->i_mode)) { 567 if (S_ISDIR(inode->i_mode)) {
555 drop_nlink(dir); 568 drop_nlink(dir);
556 update_inode_page(dir); 569 update_inode_page(dir);
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
561 drop_nlink(inode); 574 drop_nlink(inode);
562 i_size_write(inode, 0); 575 i_size_write(inode, 0);
563 } 576 }
577 up_write(&F2FS_I(inode)->i_sem);
564 update_inode_page(inode); 578 update_inode_page(inode);
565 579
566 if (inode->i_nlink == 0) 580 if (inode->i_nlink == 0)
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
573 truncate_hole(dir, page->index, page->index + 1); 587 truncate_hole(dir, page->index, page->index + 1);
574 clear_page_dirty_for_io(page); 588 clear_page_dirty_for_io(page);
575 ClearPageUptodate(page); 589 ClearPageUptodate(page);
576 dec_page_count(sbi, F2FS_DIRTY_DENTS);
577 inode_dec_dirty_dents(dir); 590 inode_dec_dirty_dents(dir);
578 } 591 }
579 f2fs_put_page(page, 1); 592 f2fs_put_page(page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fc3c558cb4f3..2ecac8312359 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -40,6 +40,7 @@
40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
43 44
44#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
45#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -88,6 +89,16 @@ enum {
88 SIT_BITMAP 89 SIT_BITMAP
89}; 90};
90 91
92/*
93 * For CP/NAT/SIT/SSA readahead
94 */
95enum {
96 META_CP,
97 META_NAT,
98 META_SIT,
99 META_SSA
100};
101
91/* for the list of orphan inodes */ 102/* for the list of orphan inodes */
92struct orphan_inode_entry { 103struct orphan_inode_entry {
93 struct list_head list; /* list head */ 104 struct list_head list; /* list head */
@@ -187,16 +198,20 @@ struct extent_info {
187#define FADVISE_COLD_BIT 0x01 198#define FADVISE_COLD_BIT 0x01
188#define FADVISE_LOST_PINO_BIT 0x02 199#define FADVISE_LOST_PINO_BIT 0x02
189 200
201#define DEF_DIR_LEVEL 0
202
190struct f2fs_inode_info { 203struct f2fs_inode_info {
191 struct inode vfs_inode; /* serve a vfs inode */ 204 struct inode vfs_inode; /* serve a vfs inode */
192 unsigned long i_flags; /* keep an inode flags for ioctl */ 205 unsigned long i_flags; /* keep an inode flags for ioctl */
193 unsigned char i_advise; /* use to give file attribute hints */ 206 unsigned char i_advise; /* use to give file attribute hints */
207 unsigned char i_dir_level; /* use for dentry level for large dir */
194 unsigned int i_current_depth; /* use only in directory structure */ 208 unsigned int i_current_depth; /* use only in directory structure */
195 unsigned int i_pino; /* parent inode number */ 209 unsigned int i_pino; /* parent inode number */
196 umode_t i_acl_mode; /* keep file acl mode temporarily */ 210 umode_t i_acl_mode; /* keep file acl mode temporarily */
197 211
198 /* Use below internally in f2fs*/ 212 /* Use below internally in f2fs*/
199 unsigned long flags; /* use to pass per-file flags */ 213 unsigned long flags; /* use to pass per-file flags */
214 struct rw_semaphore i_sem; /* protect fi info */
200 atomic_t dirty_dents; /* # of dirty dentry pages */ 215 atomic_t dirty_dents; /* # of dirty dentry pages */
201 f2fs_hash_t chash; /* hash value of given file name */ 216 f2fs_hash_t chash; /* hash value of given file name */
202 unsigned int clevel; /* maximum level of given file name */ 217 unsigned int clevel; /* maximum level of given file name */
@@ -229,6 +244,7 @@ struct f2fs_nm_info {
229 block_t nat_blkaddr; /* base disk address of NAT */ 244 block_t nat_blkaddr; /* base disk address of NAT */
230 nid_t max_nid; /* maximum possible node ids */ 245 nid_t max_nid; /* maximum possible node ids */
231 nid_t next_scan_nid; /* the next nid to be scanned */ 246 nid_t next_scan_nid; /* the next nid to be scanned */
247 unsigned int ram_thresh; /* control the memory footprint */
232 248
233 /* NAT cache management */ 249 /* NAT cache management */
234 struct radix_tree_root nat_root;/* root of the nat entry cache */ 250 struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -238,6 +254,7 @@ struct f2fs_nm_info {
238 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 254 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
239 255
240 /* free node ids management */ 256 /* free node ids management */
257 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
241 struct list_head free_nid_list; /* a list for free nids */ 258 struct list_head free_nid_list; /* a list for free nids */
242 spinlock_t free_nid_list_lock; /* protect free nid list */ 259 spinlock_t free_nid_list_lock; /* protect free nid list */
243 unsigned int fcnt; /* the number of free node id */ 260 unsigned int fcnt; /* the number of free node id */
@@ -300,6 +317,12 @@ enum {
300 NO_CHECK_TYPE 317 NO_CHECK_TYPE
301}; 318};
302 319
320struct flush_cmd {
321 struct flush_cmd *next;
322 struct completion wait;
323 int ret;
324};
325
303struct f2fs_sm_info { 326struct f2fs_sm_info {
304 struct sit_info *sit_info; /* whole segment information */ 327 struct sit_info *sit_info; /* whole segment information */
305 struct free_segmap_info *free_info; /* free segment information */ 328 struct free_segmap_info *free_info; /* free segment information */
@@ -328,6 +351,14 @@ struct f2fs_sm_info {
328 351
329 unsigned int ipu_policy; /* in-place-update policy */ 352 unsigned int ipu_policy; /* in-place-update policy */
330 unsigned int min_ipu_util; /* in-place-update threshold */ 353 unsigned int min_ipu_util; /* in-place-update threshold */
354
355 /* for flush command control */
356 struct task_struct *f2fs_issue_flush; /* flush thread */
357 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
358 struct flush_cmd *issue_list; /* list for command issue */
359 struct flush_cmd *dispatch_list; /* list for command dispatch */
360 spinlock_t issue_lock; /* for issue list lock */
361 struct flush_cmd *issue_tail; /* list tail of issue list */
331}; 362};
332 363
333/* 364/*
@@ -378,7 +409,7 @@ struct f2fs_bio_info {
378 struct bio *bio; /* bios to merge */ 409 struct bio *bio; /* bios to merge */
379 sector_t last_block_in_bio; /* last block number */ 410 sector_t last_block_in_bio; /* last block number */
380 struct f2fs_io_info fio; /* store buffered io info. */ 411 struct f2fs_io_info fio; /* store buffered io info. */
381 struct mutex io_mutex; /* mutex for bio */ 412 struct rw_semaphore io_rwsem; /* blocking op for bio */
382}; 413};
383 414
384struct f2fs_sb_info { 415struct f2fs_sb_info {
@@ -398,6 +429,7 @@ struct f2fs_sb_info {
398 /* for bio operations */ 429 /* for bio operations */
399 struct f2fs_bio_info read_io; /* for read bios */ 430 struct f2fs_bio_info read_io; /* for read bios */
400 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 431 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
432 struct completion *wait_io; /* for completion bios */
401 433
402 /* for checkpoint */ 434 /* for checkpoint */
403 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 435 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -407,7 +439,6 @@ struct f2fs_sb_info {
407 struct mutex node_write; /* locking node writes */ 439 struct mutex node_write; /* locking node writes */
408 struct mutex writepages; /* mutex for writepages() */ 440 struct mutex writepages; /* mutex for writepages() */
409 bool por_doing; /* recovery is doing or not */ 441 bool por_doing; /* recovery is doing or not */
410 bool on_build_free_nids; /* build_free_nids is doing */
411 wait_queue_head_t cp_wait; 442 wait_queue_head_t cp_wait;
412 443
413 /* for orphan inode management */ 444 /* for orphan inode management */
@@ -436,6 +467,7 @@ struct f2fs_sb_info {
436 unsigned int total_valid_node_count; /* valid node block count */ 467 unsigned int total_valid_node_count; /* valid node block count */
437 unsigned int total_valid_inode_count; /* valid inode count */ 468 unsigned int total_valid_inode_count; /* valid inode count */
438 int active_logs; /* # of active logs */ 469 int active_logs; /* # of active logs */
470 int dir_level; /* directory level */
439 471
440 block_t user_block_count; /* # of user blocks */ 472 block_t user_block_count; /* # of user blocks */
441 block_t total_valid_block_count; /* # of valid blocks */ 473 block_t total_valid_block_count; /* # of valid blocks */
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
622 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; 654 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
623} 655}
624 656
657static inline bool f2fs_has_xattr_block(unsigned int ofs)
658{
659 return ofs == XATTR_NODE_OFFSET;
660}
661
625static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 662static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
626 struct inode *inode, blkcnt_t count) 663 struct inode *inode, blkcnt_t count)
627{ 664{
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
661 698
662static inline void inode_inc_dirty_dents(struct inode *inode) 699static inline void inode_inc_dirty_dents(struct inode *inode)
663{ 700{
701 inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
664 atomic_inc(&F2FS_I(inode)->dirty_dents); 702 atomic_inc(&F2FS_I(inode)->dirty_dents);
665} 703}
666 704
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
671 709
672static inline void inode_dec_dirty_dents(struct inode *inode) 710static inline void inode_dec_dirty_dents(struct inode *inode)
673{ 711{
712 if (!S_ISDIR(inode->i_mode))
713 return;
714
715 dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
674 atomic_dec(&F2FS_I(inode)->dirty_dents); 716 atomic_dec(&F2FS_I(inode)->dirty_dents);
675} 717}
676 718
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
679 return atomic_read(&sbi->nr_pages[count_type]); 721 return atomic_read(&sbi->nr_pages[count_type]);
680} 722}
681 723
724static inline int get_dirty_dents(struct inode *inode)
725{
726 return atomic_read(&F2FS_I(inode)->dirty_dents);
727}
728
682static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 729static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
683{ 730{
684 unsigned int pages_per_sec = sbi->segs_per_sec * 731 unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
689 736
690static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 737static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
691{ 738{
692 block_t ret; 739 return sbi->total_valid_block_count;
693 spin_lock(&sbi->stat_lock);
694 ret = sbi->total_valid_block_count;
695 spin_unlock(&sbi->stat_lock);
696 return ret;
697} 740}
698 741
699static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) 742static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
789 832
790static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) 833static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
791{ 834{
792 unsigned int ret; 835 return sbi->total_valid_node_count;
793 spin_lock(&sbi->stat_lock);
794 ret = sbi->total_valid_node_count;
795 spin_unlock(&sbi->stat_lock);
796 return ret;
797} 836}
798 837
799static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 838static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
814 853
815static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 854static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
816{ 855{
817 unsigned int ret; 856 return sbi->total_valid_inode_count;
818 spin_lock(&sbi->stat_lock);
819 ret = sbi->total_valid_inode_count;
820 spin_unlock(&sbi->stat_lock);
821 return ret;
822} 857}
823 858
824static inline void f2fs_put_page(struct page *page, int unlock) 859static inline void f2fs_put_page(struct page *page, int unlock)
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
844} 879}
845 880
846static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, 881static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
847 size_t size, void (*ctor)(void *)) 882 size_t size)
848{ 883{
849 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); 884 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
850} 885}
851 886
852static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, 887static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
983 ri->i_inline |= F2FS_INLINE_DATA; 1018 ri->i_inline |= F2FS_INLINE_DATA;
984} 1019}
985 1020
1021static inline int f2fs_has_inline_xattr(struct inode *inode)
1022{
1023 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
1024}
1025
986static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) 1026static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
987{ 1027{
988 if (is_inode_flag_set(fi, FI_INLINE_XATTR)) 1028 if (f2fs_has_inline_xattr(&fi->vfs_inode))
989 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; 1029 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
990 return DEF_ADDRS_PER_INODE; 1030 return DEF_ADDRS_PER_INODE;
991} 1031}
992 1032
993static inline void *inline_xattr_addr(struct page *page) 1033static inline void *inline_xattr_addr(struct page *page)
994{ 1034{
995 struct f2fs_inode *ri; 1035 struct f2fs_inode *ri = F2FS_INODE(page);
996 ri = (struct f2fs_inode *)page_address(page);
997 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 1036 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
998 F2FS_INLINE_XATTR_ADDRS]); 1037 F2FS_INLINE_XATTR_ADDRS]);
999} 1038}
1000 1039
1001static inline int inline_xattr_size(struct inode *inode) 1040static inline int inline_xattr_size(struct inode *inode)
1002{ 1041{
1003 if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) 1042 if (f2fs_has_inline_xattr(inode))
1004 return F2FS_INLINE_XATTR_ADDRS << 2; 1043 return F2FS_INLINE_XATTR_ADDRS << 2;
1005 else 1044 else
1006 return 0; 1045 return 0;
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1013 1052
1014static inline void *inline_data_addr(struct page *page) 1053static inline void *inline_data_addr(struct page *page)
1015{ 1054{
1016 struct f2fs_inode *ri; 1055 struct f2fs_inode *ri = F2FS_INODE(page);
1017 ri = (struct f2fs_inode *)page_address(page);
1018 return (void *)&(ri->i_addr[1]); 1056 return (void *)&(ri->i_addr[1]);
1019} 1057}
1020 1058
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
1023 return sb->s_flags & MS_RDONLY; 1061 return sb->s_flags & MS_RDONLY;
1024} 1062}
1025 1063
1064static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1065{
1066 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1067 sbi->sb->s_flags |= MS_RDONLY;
1068}
1069
1026#define get_inode_mode(i) \ 1070#define get_inode_mode(i) \
1027 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1071 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
1028 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 1072 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
1048struct inode *f2fs_iget(struct super_block *, unsigned long); 1092struct inode *f2fs_iget(struct super_block *, unsigned long);
1049int try_to_free_nats(struct f2fs_sb_info *, int); 1093int try_to_free_nats(struct f2fs_sb_info *, int);
1050void update_inode(struct inode *, struct page *); 1094void update_inode(struct inode *, struct page *);
1051int update_inode_page(struct inode *); 1095void update_inode_page(struct inode *);
1052int f2fs_write_inode(struct inode *, struct writeback_control *); 1096int f2fs_write_inode(struct inode *, struct writeback_control *);
1053void f2fs_evict_inode(struct inode *); 1097void f2fs_evict_inode(struct inode *);
1054 1098
@@ -1097,6 +1141,7 @@ struct dnode_of_data;
1097struct node_info; 1141struct node_info;
1098 1142
1099int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1143int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1144bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
1100void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1145void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1101int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1146int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1102int truncate_inode_blocks(struct inode *, pgoff_t); 1147int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1115void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1160void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1116void recover_node_page(struct f2fs_sb_info *, struct page *, 1161void recover_node_page(struct f2fs_sb_info *, struct page *,
1117 struct f2fs_summary *, struct node_info *, block_t); 1162 struct f2fs_summary *, struct node_info *, block_t);
1163bool recover_xattr_data(struct inode *, struct page *, block_t);
1118int recover_inode_page(struct f2fs_sb_info *, struct page *); 1164int recover_inode_page(struct f2fs_sb_info *, struct page *);
1119int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1165int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1120 struct f2fs_summary_block *); 1166 struct f2fs_summary_block *);
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
1129 */ 1175 */
1130void f2fs_balance_fs(struct f2fs_sb_info *); 1176void f2fs_balance_fs(struct f2fs_sb_info *);
1131void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1177void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1178int f2fs_issue_flush(struct f2fs_sb_info *);
1132void invalidate_blocks(struct f2fs_sb_info *, block_t); 1179void invalidate_blocks(struct f2fs_sb_info *, block_t);
1180void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1133void clear_prefree_segments(struct f2fs_sb_info *); 1181void clear_prefree_segments(struct f2fs_sb_info *);
1134int npages_for_summary_flush(struct f2fs_sb_info *); 1182int npages_for_summary_flush(struct f2fs_sb_info *);
1135void allocate_new_segments(struct f2fs_sb_info *); 1183void allocate_new_segments(struct f2fs_sb_info *);
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
1162 */ 1210 */
1163struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1211struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1164struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1212struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1213int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1165long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1214long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1166int acquire_orphan_inode(struct f2fs_sb_info *); 1215int acquire_orphan_inode(struct f2fs_sb_info *);
1167void release_orphan_inode(struct f2fs_sb_info *); 1216void release_orphan_inode(struct f2fs_sb_info *);
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
1231 int util_free, util_valid, util_invalid; 1280 int util_free, util_valid, util_invalid;
1232 int rsvd_segs, overp_segs; 1281 int rsvd_segs, overp_segs;
1233 int dirty_count, node_pages, meta_pages; 1282 int dirty_count, node_pages, meta_pages;
1234 int prefree_count, call_count; 1283 int prefree_count, call_count, cp_count;
1235 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1284 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1236 int tot_blks, data_blks, node_blks; 1285 int tot_blks, data_blks, node_blks;
1237 int curseg[NR_CURSEG_TYPE]; 1286 int curseg[NR_CURSEG_TYPE];
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1248 return (struct f2fs_stat_info *)sbi->stat_info; 1297 return (struct f2fs_stat_info *)sbi->stat_info;
1249} 1298}
1250 1299
1300#define stat_inc_cp_count(si) ((si)->cp_count++)
1251#define stat_inc_call_count(si) ((si)->call_count++) 1301#define stat_inc_call_count(si) ((si)->call_count++)
1252#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1302#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1253#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1303#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
1302void __init f2fs_create_root_stats(void); 1352void __init f2fs_create_root_stats(void);
1303void f2fs_destroy_root_stats(void); 1353void f2fs_destroy_root_stats(void);
1304#else 1354#else
1355#define stat_inc_cp_count(si)
1305#define stat_inc_call_count(si) 1356#define stat_inc_call_count(si)
1306#define stat_inc_bggc_count(si) 1357#define stat_inc_bggc_count(si)
1307#define stat_inc_dirty_dir(sbi) 1358#define stat_inc_dirty_dir(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0dfcef53a6ed..302d552afea5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
76 trace_f2fs_vm_page_mkwrite(page, DATA); 76 trace_f2fs_vm_page_mkwrite(page, DATA);
77mapped: 77mapped:
78 /* fill the page */ 78 /* fill the page */
79 wait_on_page_writeback(page); 79 f2fs_wait_on_page_writeback(page, DATA);
80out: 80out:
81 sb_end_pagefault(inode->i_sb); 81 sb_end_pagefault(inode->i_sb);
82 return block_page_mkwrite_return(err); 82 return block_page_mkwrite_return(err);
@@ -111,11 +111,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
111int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 111int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
112{ 112{
113 struct inode *inode = file->f_mapping->host; 113 struct inode *inode = file->f_mapping->host;
114 struct f2fs_inode_info *fi = F2FS_I(inode);
114 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 115 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
115 int ret = 0; 116 int ret = 0;
116 bool need_cp = false; 117 bool need_cp = false;
117 struct writeback_control wbc = { 118 struct writeback_control wbc = {
118 .sync_mode = WB_SYNC_NONE, 119 .sync_mode = WB_SYNC_ALL,
119 .nr_to_write = LONG_MAX, 120 .nr_to_write = LONG_MAX,
120 .for_reclaim = 0, 121 .for_reclaim = 0,
121 }; 122 };
@@ -133,7 +134,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
133 /* guarantee free sections for fsync */ 134 /* guarantee free sections for fsync */
134 f2fs_balance_fs(sbi); 135 f2fs_balance_fs(sbi);
135 136
136 mutex_lock(&inode->i_mutex); 137 down_read(&fi->i_sem);
137 138
138 /* 139 /*
139 * Both of fdatasync() and fsync() are able to be recovered from 140 * Both of fdatasync() and fsync() are able to be recovered from
@@ -150,25 +151,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
150 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) 151 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
151 need_cp = true; 152 need_cp = true;
152 153
154 up_read(&fi->i_sem);
155
153 if (need_cp) { 156 if (need_cp) {
154 nid_t pino; 157 nid_t pino;
155 158
156 F2FS_I(inode)->xattr_ver = 0;
157
158 /* all the dirty node pages should be flushed for POR */ 159 /* all the dirty node pages should be flushed for POR */
159 ret = f2fs_sync_fs(inode->i_sb, 1); 160 ret = f2fs_sync_fs(inode->i_sb, 1);
161
162 down_write(&fi->i_sem);
163 F2FS_I(inode)->xattr_ver = 0;
160 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 164 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
161 get_parent_ino(inode, &pino)) { 165 get_parent_ino(inode, &pino)) {
162 F2FS_I(inode)->i_pino = pino; 166 F2FS_I(inode)->i_pino = pino;
163 file_got_pino(inode); 167 file_got_pino(inode);
168 up_write(&fi->i_sem);
164 mark_inode_dirty_sync(inode); 169 mark_inode_dirty_sync(inode);
165 ret = f2fs_write_inode(inode, NULL); 170 ret = f2fs_write_inode(inode, NULL);
166 if (ret) 171 if (ret)
167 goto out; 172 goto out;
173 } else {
174 up_write(&fi->i_sem);
168 } 175 }
169 } else { 176 } else {
170 /* if there is no written node page, write its inode page */ 177 /* if there is no written node page, write its inode page */
171 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 178 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
179 if (fsync_mark_done(sbi, inode->i_ino))
180 goto out;
172 mark_inode_dirty_sync(inode); 181 mark_inode_dirty_sync(inode);
173 ret = f2fs_write_inode(inode, NULL); 182 ret = f2fs_write_inode(inode, NULL);
174 if (ret) 183 if (ret)
@@ -177,10 +186,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
177 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 186 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
178 if (ret) 187 if (ret)
179 goto out; 188 goto out;
180 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 189 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
181 } 190 }
182out: 191out:
183 mutex_unlock(&inode->i_mutex);
184 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 192 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
185 return ret; 193 return ret;
186} 194}
@@ -245,7 +253,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
245 f2fs_put_page(page, 1); 253 f2fs_put_page(page, 1);
246 return; 254 return;
247 } 255 }
248 wait_on_page_writeback(page); 256 f2fs_wait_on_page_writeback(page, DATA);
249 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 257 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
250 set_page_dirty(page); 258 set_page_dirty(page);
251 f2fs_put_page(page, 1); 259 f2fs_put_page(page, 1);
@@ -422,7 +430,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
422 f2fs_unlock_op(sbi); 430 f2fs_unlock_op(sbi);
423 431
424 if (!IS_ERR(page)) { 432 if (!IS_ERR(page)) {
425 wait_on_page_writeback(page); 433 f2fs_wait_on_page_writeback(page, DATA);
426 zero_user(page, start, len); 434 zero_user(page, start, len);
427 set_page_dirty(page); 435 set_page_dirty(page);
428 f2fs_put_page(page, 1); 436 f2fs_put_page(page, 1);
@@ -560,6 +568,8 @@ static long f2fs_fallocate(struct file *file, int mode,
560 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 568 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
561 return -EOPNOTSUPP; 569 return -EOPNOTSUPP;
562 570
571 mutex_lock(&inode->i_mutex);
572
563 if (mode & FALLOC_FL_PUNCH_HOLE) 573 if (mode & FALLOC_FL_PUNCH_HOLE)
564 ret = punch_hole(inode, offset, len); 574 ret = punch_hole(inode, offset, len);
565 else 575 else
@@ -569,6 +579,9 @@ static long f2fs_fallocate(struct file *file, int mode,
569 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 579 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
570 mark_inode_dirty(inode); 580 mark_inode_dirty(inode);
571 } 581 }
582
583 mutex_unlock(&inode->i_mutex);
584
572 trace_f2fs_fallocate(inode, mode, offset, len, ret); 585 trace_f2fs_fallocate(inode, mode, offset, len, ret);
573 return ret; 586 return ret;
574} 587}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ea0371e854b4..b90dbe55403a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
531 set_page_dirty(page); 531 set_page_dirty(page);
532 set_cold_data(page); 532 set_cold_data(page);
533 } else { 533 } else {
534 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
535
536 f2fs_wait_on_page_writeback(page, DATA); 534 f2fs_wait_on_page_writeback(page, DATA);
537 535
538 if (clear_page_dirty_for_io(page) && 536 if (clear_page_dirty_for_io(page))
539 S_ISDIR(inode->i_mode)) {
540 dec_page_count(sbi, F2FS_DIRTY_DENTS);
541 inode_dec_dirty_dents(inode); 537 inode_dec_dirty_dents(inode);
542 }
543 set_cold_data(page); 538 set_cold_data(page);
544 do_write_data_page(page, &fio); 539 do_write_data_page(page, &fio);
545 clear_cold_data(page); 540 clear_cold_data(page);
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
701gc_more: 696gc_more:
702 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 697 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
703 goto stop; 698 goto stop;
699 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
700 goto stop;
704 701
705 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 702 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
706 gc_type = FG_GC; 703 gc_type = FG_GC;
@@ -711,6 +708,11 @@ gc_more:
711 goto stop; 708 goto stop;
712 ret = 0; 709 ret = 0;
713 710
711 /* readahead multi ssa blocks those have contiguous address */
712 if (sbi->segs_per_sec > 1)
713 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
714 META_SSA);
715
714 for (i = 0; i < sbi->segs_per_sec; i++) 716 for (i = 0; i < sbi->segs_per_sec; i++)
715 do_garbage_collect(sbi, segno + i, &ilist, gc_type); 717 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
716 718
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
740int __init create_gc_caches(void) 742int __init create_gc_caches(void)
741{ 743{
742 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 744 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
743 sizeof(struct inode_entry), NULL); 745 sizeof(struct inode_entry));
744 if (!winode_slab) 746 if (!winode_slab)
745 return -ENOMEM; 747 return -ENOMEM;
746 return 0; 748 return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 31ee5b164ff9..383db1fabcf4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
45 } 45 }
46 46
47 ipage = get_node_page(sbi, inode->i_ino); 47 ipage = get_node_page(sbi, inode->i_ino);
48 if (IS_ERR(ipage)) 48 if (IS_ERR(ipage)) {
49 unlock_page(page);
49 return PTR_ERR(ipage); 50 return PTR_ERR(ipage);
51 }
50 52
51 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 53 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
52 54
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 28cea76d78c6..ee829d360468 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
107 fi->flags = 0; 107 fi->flags = 0;
108 fi->i_advise = ri->i_advise; 108 fi->i_advise = ri->i_advise;
109 fi->i_pino = le32_to_cpu(ri->i_pino); 109 fi->i_pino = le32_to_cpu(ri->i_pino);
110 fi->i_dir_level = ri->i_dir_level;
110 111
111 get_extent_info(&fi->ext, ri->i_ext); 112 get_extent_info(&fi->ext, ri->i_ext);
112 get_inline_info(fi, ri); 113 get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
204 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 205 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
205 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 206 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
206 ri->i_generation = cpu_to_le32(inode->i_generation); 207 ri->i_generation = cpu_to_le32(inode->i_generation);
208 ri->i_dir_level = F2FS_I(inode)->i_dir_level;
207 209
208 __set_inode_rdev(inode, ri); 210 __set_inode_rdev(inode, ri);
209 set_cold_node(inode, node_page); 211 set_cold_node(inode, node_page);
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
212 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); 214 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
213} 215}
214 216
215int update_inode_page(struct inode *inode) 217void update_inode_page(struct inode *inode)
216{ 218{
217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 219 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
218 struct page *node_page; 220 struct page *node_page;
219 221retry:
220 node_page = get_node_page(sbi, inode->i_ino); 222 node_page = get_node_page(sbi, inode->i_ino);
221 if (IS_ERR(node_page)) 223 if (IS_ERR(node_page)) {
222 return PTR_ERR(node_page); 224 int err = PTR_ERR(node_page);
223 225 if (err == -ENOMEM) {
226 cond_resched();
227 goto retry;
228 } else if (err != -ENOENT) {
229 f2fs_stop_checkpoint(sbi);
230 }
231 return;
232 }
224 update_inode(inode, node_page); 233 update_inode(inode, node_page);
225 f2fs_put_page(node_page, 1); 234 f2fs_put_page(node_page, 1);
226 return 0;
227} 235}
228 236
229int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 237int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
230{ 238{
231 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
232 int ret;
233 240
234 if (inode->i_ino == F2FS_NODE_INO(sbi) || 241 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
235 inode->i_ino == F2FS_META_INO(sbi)) 242 inode->i_ino == F2FS_META_INO(sbi))
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
243 * during the urgent cleaning time when runing out of free sections. 250 * during the urgent cleaning time when runing out of free sections.
244 */ 251 */
245 f2fs_lock_op(sbi); 252 f2fs_lock_op(sbi);
246 ret = update_inode_page(inode); 253 update_inode_page(inode);
247 f2fs_unlock_op(sbi); 254 f2fs_unlock_op(sbi);
248 255
249 if (wbc) 256 if (wbc)
250 f2fs_balance_fs(sbi); 257 f2fs_balance_fs(sbi);
251 258
252 return ret; 259 return 0;
253} 260}
254 261
255/* 262/*
@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
266 inode->i_ino == F2FS_META_INO(sbi)) 273 inode->i_ino == F2FS_META_INO(sbi))
267 goto no_delete; 274 goto no_delete;
268 275
269 f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); 276 f2fs_bug_on(get_dirty_dents(inode));
270 remove_dirty_dir_inode(inode); 277 remove_dirty_dir_inode(inode);
271 278
272 if (inode->i_nlink || is_bad_inode(inode)) 279 if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459e97bf..a9409d19dfd4 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 inode = f2fs_iget(dir->i_sb, ino); 207 inode = f2fs_iget(dir->i_sb, ino);
208 if (IS_ERR(inode)) 208 if (IS_ERR(inode))
209 return ERR_CAST(inode); 209 return ERR_CAST(inode);
210
211 stat_inc_inline_inode(inode);
210 } 212 }
211 213
212 return d_splice_alias(inode, dentry); 214 return d_splice_alias(inode, dentry);
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
424 } 426 }
425 427
426 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 428 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
429 down_write(&F2FS_I(old_inode)->i_sem);
427 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 430 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
431 up_write(&F2FS_I(old_inode)->i_sem);
428 432
429 new_inode->i_ctime = CURRENT_TIME; 433 new_inode->i_ctime = CURRENT_TIME;
434 down_write(&F2FS_I(new_inode)->i_sem);
430 if (old_dir_entry) 435 if (old_dir_entry)
431 drop_nlink(new_inode); 436 drop_nlink(new_inode);
432 drop_nlink(new_inode); 437 drop_nlink(new_inode);
438 up_write(&F2FS_I(new_inode)->i_sem);
439
433 mark_inode_dirty(new_inode); 440 mark_inode_dirty(new_inode);
434 441
435 if (!new_inode->i_nlink) 442 if (!new_inode->i_nlink)
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
459 if (old_dir != new_dir) { 466 if (old_dir != new_dir) {
460 f2fs_set_link(old_inode, old_dir_entry, 467 f2fs_set_link(old_inode, old_dir_entry,
461 old_dir_page, new_dir); 468 old_dir_page, new_dir);
469 down_write(&F2FS_I(old_inode)->i_sem);
462 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 470 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
471 up_write(&F2FS_I(old_inode)->i_sem);
463 update_inode_page(old_inode); 472 update_inode_page(old_inode);
464 } else { 473 } else {
465 kunmap(old_dir_page); 474 kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b0649b76eb4f..a161e955c4c8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,27 @@
21#include "segment.h" 21#include "segment.h"
22#include <trace/events/f2fs.h> 22#include <trace/events/f2fs.h>
23 23
24#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25
24static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
25static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
26 28
29static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
30{
31 struct sysinfo val;
32 unsigned long mem_size = 0;
33
34 si_meminfo(&val);
35 if (type == FREE_NIDS)
36 mem_size = nm_i->fcnt * sizeof(struct free_nid);
37 else if (type == NAT_ENTRIES)
38 mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
39 mem_size >>= 12;
40
41 /* give 50:50 memory for free nids and nat caches respectively */
42 return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
43}
44
27static void clear_node_page_dirty(struct page *page) 45static void clear_node_page_dirty(struct page *page)
28{ 46{
29 struct address_space *mapping = page->mapping; 47 struct address_space *mapping = page->mapping;
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
82 return dst_page; 100 return dst_page;
83} 101}
84 102
85/*
86 * Readahead NAT pages
87 */
88static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
89{
90 struct address_space *mapping = META_MAPPING(sbi);
91 struct f2fs_nm_info *nm_i = NM_I(sbi);
92 struct page *page;
93 pgoff_t index;
94 int i;
95 struct f2fs_io_info fio = {
96 .type = META,
97 .rw = READ_SYNC | REQ_META | REQ_PRIO
98 };
99
100
101 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
102 if (unlikely(nid >= nm_i->max_nid))
103 nid = 0;
104 index = current_nat_addr(sbi, nid);
105
106 page = grab_cache_page(mapping, index);
107 if (!page)
108 continue;
109 if (PageUptodate(page)) {
110 mark_page_accessed(page);
111 f2fs_put_page(page, 1);
112 continue;
113 }
114 f2fs_submit_page_mbio(sbi, page, index, &fio);
115 mark_page_accessed(page);
116 f2fs_put_page(page, 0);
117 }
118 f2fs_submit_merged_bio(sbi, META, READ);
119}
120
121static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 103static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
122{ 104{
123 return radix_tree_lookup(&nm_i->nat_root, n); 105 return radix_tree_lookup(&nm_i->nat_root, n);
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
151 return is_cp; 133 return is_cp;
152} 134}
153 135
136bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
137{
138 struct f2fs_nm_info *nm_i = NM_I(sbi);
139 struct nat_entry *e;
140 bool fsync_done = false;
141
142 read_lock(&nm_i->nat_tree_lock);
143 e = __lookup_nat_cache(nm_i, nid);
144 if (e)
145 fsync_done = e->fsync_done;
146 read_unlock(&nm_i->nat_tree_lock);
147 return fsync_done;
148}
149
154static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 150static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
155{ 151{
156 struct nat_entry *new; 152 struct nat_entry *new;
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
164 } 160 }
165 memset(new, 0, sizeof(struct nat_entry)); 161 memset(new, 0, sizeof(struct nat_entry));
166 nat_set_nid(new, nid); 162 nat_set_nid(new, nid);
163 new->checkpointed = true;
167 list_add_tail(&new->list, &nm_i->nat_entries); 164 list_add_tail(&new->list, &nm_i->nat_entries);
168 nm_i->nat_cnt++; 165 nm_i->nat_cnt++;
169 return new; 166 return new;
@@ -185,13 +182,12 @@ retry:
185 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); 182 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
186 nat_set_ino(e, le32_to_cpu(ne->ino)); 183 nat_set_ino(e, le32_to_cpu(ne->ino));
187 nat_set_version(e, ne->version); 184 nat_set_version(e, ne->version);
188 e->checkpointed = true;
189 } 185 }
190 write_unlock(&nm_i->nat_tree_lock); 186 write_unlock(&nm_i->nat_tree_lock);
191} 187}
192 188
193static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 189static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
194 block_t new_blkaddr) 190 block_t new_blkaddr, bool fsync_done)
195{ 191{
196 struct f2fs_nm_info *nm_i = NM_I(sbi); 192 struct f2fs_nm_info *nm_i = NM_I(sbi);
197 struct nat_entry *e; 193 struct nat_entry *e;
@@ -205,7 +201,6 @@ retry:
205 goto retry; 201 goto retry;
206 } 202 }
207 e->ni = *ni; 203 e->ni = *ni;
208 e->checkpointed = true;
209 f2fs_bug_on(ni->blk_addr == NEW_ADDR); 204 f2fs_bug_on(ni->blk_addr == NEW_ADDR);
210 } else if (new_blkaddr == NEW_ADDR) { 205 } else if (new_blkaddr == NEW_ADDR) {
211 /* 206 /*
@@ -217,9 +212,6 @@ retry:
217 f2fs_bug_on(ni->blk_addr != NULL_ADDR); 212 f2fs_bug_on(ni->blk_addr != NULL_ADDR);
218 } 213 }
219 214
220 if (new_blkaddr == NEW_ADDR)
221 e->checkpointed = false;
222
223 /* sanity check */ 215 /* sanity check */
224 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 216 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
225 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 217 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
@@ -239,6 +231,11 @@ retry:
239 /* change address */ 231 /* change address */
240 nat_set_blkaddr(e, new_blkaddr); 232 nat_set_blkaddr(e, new_blkaddr);
241 __set_nat_cache_dirty(nm_i, e); 233 __set_nat_cache_dirty(nm_i, e);
234
235 /* update fsync_mark if its inode nat entry is still alive */
236 e = __lookup_nat_cache(nm_i, ni->ino);
237 if (e)
238 e->fsync_done = fsync_done;
242 write_unlock(&nm_i->nat_tree_lock); 239 write_unlock(&nm_i->nat_tree_lock);
243} 240}
244 241
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
246{ 243{
247 struct f2fs_nm_info *nm_i = NM_I(sbi); 244 struct f2fs_nm_info *nm_i = NM_I(sbi);
248 245
249 if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) 246 if (available_free_memory(nm_i, NAT_ENTRIES))
250 return 0; 247 return 0;
251 248
252 write_lock(&nm_i->nat_tree_lock); 249 write_lock(&nm_i->nat_tree_lock);
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
505 /* Deallocate node address */ 502 /* Deallocate node address */
506 invalidate_blocks(sbi, ni.blk_addr); 503 invalidate_blocks(sbi, ni.blk_addr);
507 dec_valid_node_count(sbi, dn->inode); 504 dec_valid_node_count(sbi, dn->inode);
508 set_node_addr(sbi, &ni, NULL_ADDR); 505 set_node_addr(sbi, &ni, NULL_ADDR, false);
509 506
510 if (dn->nid == dn->inode->i_ino) { 507 if (dn->nid == dn->inode->i_ino) {
511 remove_orphan_inode(sbi, dn->nid); 508 remove_orphan_inode(sbi, dn->nid);
@@ -763,7 +760,7 @@ skip_partial:
763 f2fs_put_page(page, 1); 760 f2fs_put_page(page, 1);
764 goto restart; 761 goto restart;
765 } 762 }
766 wait_on_page_writeback(page); 763 f2fs_wait_on_page_writeback(page, NODE);
767 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 764 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
768 set_page_dirty(page); 765 set_page_dirty(page);
769 unlock_page(page); 766 unlock_page(page);
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
852 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 849 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
853 return ERR_PTR(-EPERM); 850 return ERR_PTR(-EPERM);
854 851
855 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 852 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
853 dn->nid, AOP_FLAG_NOFS);
856 if (!page) 854 if (!page)
857 return ERR_PTR(-ENOMEM); 855 return ERR_PTR(-ENOMEM);
858 856
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
867 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 865 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
868 new_ni = old_ni; 866 new_ni = old_ni;
869 new_ni.ino = dn->inode->i_ino; 867 new_ni.ino = dn->inode->i_ino;
870 set_node_addr(sbi, &new_ni, NEW_ADDR); 868 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
871 869
872 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 870 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
873 set_cold_node(dn->inode, page); 871 set_cold_node(dn->inode, page);
874 SetPageUptodate(page); 872 SetPageUptodate(page);
875 set_page_dirty(page); 873 set_page_dirty(page);
876 874
877 if (ofs == XATTR_NODE_OFFSET) 875 if (f2fs_has_xattr_block(ofs))
878 F2FS_I(dn->inode)->i_xattr_nid = dn->nid; 876 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
879 877
880 dn->node_page = page; 878 dn->node_page = page;
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
948 struct page *page; 946 struct page *page;
949 int err; 947 int err;
950repeat: 948repeat:
951 page = grab_cache_page(NODE_MAPPING(sbi), nid); 949 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
950 nid, AOP_FLAG_NOFS);
952 if (!page) 951 if (!page)
953 return ERR_PTR(-ENOMEM); 952 return ERR_PTR(-ENOMEM);
954 953
@@ -959,7 +958,7 @@ repeat:
959 goto got_it; 958 goto got_it;
960 959
961 lock_page(page); 960 lock_page(page);
962 if (unlikely(!PageUptodate(page))) { 961 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
963 f2fs_put_page(page, 1); 962 f2fs_put_page(page, 1);
964 return ERR_PTR(-EIO); 963 return ERR_PTR(-EIO);
965 } 964 }
@@ -968,7 +967,6 @@ repeat:
968 goto repeat; 967 goto repeat;
969 } 968 }
970got_it: 969got_it:
971 f2fs_bug_on(nid != nid_of_node(page));
972 mark_page_accessed(page); 970 mark_page_accessed(page);
973 return page; 971 return page;
974} 972}
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1168 continue; 1166 continue;
1169 1167
1170 if (ino && ino_of_node(page) == ino) { 1168 if (ino && ino_of_node(page) == ino) {
1171 wait_on_page_writeback(page); 1169 f2fs_wait_on_page_writeback(page, NODE);
1172 if (TestClearPageError(page)) 1170 if (TestClearPageError(page))
1173 ret = -EIO; 1171 ret = -EIO;
1174 } 1172 }
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
1201 if (unlikely(sbi->por_doing)) 1199 if (unlikely(sbi->por_doing))
1202 goto redirty_out; 1200 goto redirty_out;
1203 1201
1204 wait_on_page_writeback(page); 1202 f2fs_wait_on_page_writeback(page, NODE);
1205 1203
1206 /* get old block addr of this node page */ 1204 /* get old block addr of this node page */
1207 nid = nid_of_node(page); 1205 nid = nid_of_node(page);
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
1222 mutex_lock(&sbi->node_write); 1220 mutex_lock(&sbi->node_write);
1223 set_page_writeback(page); 1221 set_page_writeback(page);
1224 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1222 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1225 set_node_addr(sbi, &ni, new_addr); 1223 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1226 dec_page_count(sbi, F2FS_DIRTY_NODES); 1224 dec_page_count(sbi, F2FS_DIRTY_NODES);
1227 mutex_unlock(&sbi->node_write); 1225 mutex_unlock(&sbi->node_write);
1228 unlock_page(page); 1226 unlock_page(page);
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
1231redirty_out: 1229redirty_out:
1232 dec_page_count(sbi, F2FS_DIRTY_NODES); 1230 dec_page_count(sbi, F2FS_DIRTY_NODES);
1233 wbc->pages_skipped++; 1231 wbc->pages_skipped++;
1232 account_page_redirty(page);
1234 set_page_dirty(page); 1233 set_page_dirty(page);
1235 return AOP_WRITEPAGE_ACTIVATE; 1234 return AOP_WRITEPAGE_ACTIVATE;
1236} 1235}
1237 1236
1238/*
1239 * It is very important to gather dirty pages and write at once, so that we can
1240 * submit a big bio without interfering other data writes.
1241 * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
1242 */
1243#define COLLECT_DIRTY_NODES 1536
1244static int f2fs_write_node_pages(struct address_space *mapping, 1237static int f2fs_write_node_pages(struct address_space *mapping,
1245 struct writeback_control *wbc) 1238 struct writeback_control *wbc)
1246{ 1239{
1247 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1240 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1248 long nr_to_write = wbc->nr_to_write; 1241 long diff;
1249 1242
1250 /* balancing f2fs's metadata in background */ 1243 /* balancing f2fs's metadata in background */
1251 f2fs_balance_fs_bg(sbi); 1244 f2fs_balance_fs_bg(sbi);
1252 1245
1253 /* collect a number of dirty node pages and write together */ 1246 /* collect a number of dirty node pages and write together */
1254 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) 1247 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1255 return 0; 1248 goto skip_write;
1256 1249
1257 /* if mounting is failed, skip writing node pages */ 1250 diff = nr_pages_to_write(sbi, NODE, wbc);
1258 wbc->nr_to_write = 3 * max_hw_blocks(sbi);
1259 wbc->sync_mode = WB_SYNC_NONE; 1251 wbc->sync_mode = WB_SYNC_NONE;
1260 sync_node_pages(sbi, 0, wbc); 1252 sync_node_pages(sbi, 0, wbc);
1261 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - 1253 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1262 wbc->nr_to_write); 1254 return 0;
1255
1256skip_write:
1257 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1263 return 0; 1258 return 0;
1264} 1259}
1265 1260
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
1307 .releasepage = f2fs_release_node_page, 1302 .releasepage = f2fs_release_node_page,
1308}; 1303};
1309 1304
1310static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) 1305static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1306 nid_t n)
1311{ 1307{
1312 struct list_head *this; 1308 return radix_tree_lookup(&nm_i->free_nid_root, n);
1313 struct free_nid *i;
1314 list_for_each(this, head) {
1315 i = list_entry(this, struct free_nid, list);
1316 if (i->nid == n)
1317 return i;
1318 }
1319 return NULL;
1320} 1309}
1321 1310
1322static void __del_from_free_nid_list(struct free_nid *i) 1311static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1312 struct free_nid *i)
1323{ 1313{
1324 list_del(&i->list); 1314 list_del(&i->list);
1325 kmem_cache_free(free_nid_slab, i); 1315 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1326} 1316}
1327 1317
1328static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) 1318static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1331 struct nat_entry *ne; 1321 struct nat_entry *ne;
1332 bool allocated = false; 1322 bool allocated = false;
1333 1323
1334 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) 1324 if (!available_free_memory(nm_i, FREE_NIDS))
1335 return -1; 1325 return -1;
1336 1326
1337 /* 0 nid should not be used */ 1327 /* 0 nid should not be used */
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1342 /* do not add allocated nids */ 1332 /* do not add allocated nids */
1343 read_lock(&nm_i->nat_tree_lock); 1333 read_lock(&nm_i->nat_tree_lock);
1344 ne = __lookup_nat_cache(nm_i, nid); 1334 ne = __lookup_nat_cache(nm_i, nid);
1345 if (ne && nat_get_blkaddr(ne) != NULL_ADDR) 1335 if (ne &&
1336 (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
1346 allocated = true; 1337 allocated = true;
1347 read_unlock(&nm_i->nat_tree_lock); 1338 read_unlock(&nm_i->nat_tree_lock);
1348 if (allocated) 1339 if (allocated)
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1354 i->state = NID_NEW; 1345 i->state = NID_NEW;
1355 1346
1356 spin_lock(&nm_i->free_nid_list_lock); 1347 spin_lock(&nm_i->free_nid_list_lock);
1357 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { 1348 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1358 spin_unlock(&nm_i->free_nid_list_lock); 1349 spin_unlock(&nm_i->free_nid_list_lock);
1359 kmem_cache_free(free_nid_slab, i); 1350 kmem_cache_free(free_nid_slab, i);
1360 return 0; 1351 return 0;
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1368static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1359static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1369{ 1360{
1370 struct free_nid *i; 1361 struct free_nid *i;
1362 bool need_free = false;
1363
1371 spin_lock(&nm_i->free_nid_list_lock); 1364 spin_lock(&nm_i->free_nid_list_lock);
1372 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1365 i = __lookup_free_nid_list(nm_i, nid);
1373 if (i && i->state == NID_NEW) { 1366 if (i && i->state == NID_NEW) {
1374 __del_from_free_nid_list(i); 1367 __del_from_free_nid_list(nm_i, i);
1375 nm_i->fcnt--; 1368 nm_i->fcnt--;
1369 need_free = true;
1376 } 1370 }
1377 spin_unlock(&nm_i->free_nid_list_lock); 1371 spin_unlock(&nm_i->free_nid_list_lock);
1372
1373 if (need_free)
1374 kmem_cache_free(free_nid_slab, i);
1378} 1375}
1379 1376
1380static void scan_nat_page(struct f2fs_nm_info *nm_i, 1377static void scan_nat_page(struct f2fs_nm_info *nm_i,
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1413 return; 1410 return;
1414 1411
1415 /* readahead nat pages to be scanned */ 1412 /* readahead nat pages to be scanned */
1416 ra_nat_pages(sbi, nid); 1413 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1417 1414
1418 while (1) { 1415 while (1) {
1419 struct page *page = get_current_nat_page(sbi, nid); 1416 struct page *page = get_current_nat_page(sbi, nid);
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1454{ 1451{
1455 struct f2fs_nm_info *nm_i = NM_I(sbi); 1452 struct f2fs_nm_info *nm_i = NM_I(sbi);
1456 struct free_nid *i = NULL; 1453 struct free_nid *i = NULL;
1457 struct list_head *this;
1458retry: 1454retry:
1459 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) 1455 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
1460 return false; 1456 return false;
@@ -1462,13 +1458,11 @@ retry:
1462 spin_lock(&nm_i->free_nid_list_lock); 1458 spin_lock(&nm_i->free_nid_list_lock);
1463 1459
1464 /* We should not use stale free nids created by build_free_nids */ 1460 /* We should not use stale free nids created by build_free_nids */
1465 if (nm_i->fcnt && !sbi->on_build_free_nids) { 1461 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1466 f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1462 f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1467 list_for_each(this, &nm_i->free_nid_list) { 1463 list_for_each_entry(i, &nm_i->free_nid_list, list)
1468 i = list_entry(this, struct free_nid, list);
1469 if (i->state == NID_NEW) 1464 if (i->state == NID_NEW)
1470 break; 1465 break;
1471 }
1472 1466
1473 f2fs_bug_on(i->state != NID_NEW); 1467 f2fs_bug_on(i->state != NID_NEW);
1474 *nid = i->nid; 1468 *nid = i->nid;
@@ -1481,9 +1475,7 @@ retry:
1481 1475
1482 /* Let's scan nat pages and its caches to get free nids */ 1476 /* Let's scan nat pages and its caches to get free nids */
1483 mutex_lock(&nm_i->build_lock); 1477 mutex_lock(&nm_i->build_lock);
1484 sbi->on_build_free_nids = true;
1485 build_free_nids(sbi); 1478 build_free_nids(sbi);
1486 sbi->on_build_free_nids = false;
1487 mutex_unlock(&nm_i->build_lock); 1479 mutex_unlock(&nm_i->build_lock);
1488 goto retry; 1480 goto retry;
1489} 1481}
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1497 struct free_nid *i; 1489 struct free_nid *i;
1498 1490
1499 spin_lock(&nm_i->free_nid_list_lock); 1491 spin_lock(&nm_i->free_nid_list_lock);
1500 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1492 i = __lookup_free_nid_list(nm_i, nid);
1501 f2fs_bug_on(!i || i->state != NID_ALLOC); 1493 f2fs_bug_on(!i || i->state != NID_ALLOC);
1502 __del_from_free_nid_list(i); 1494 __del_from_free_nid_list(nm_i, i);
1503 spin_unlock(&nm_i->free_nid_list_lock); 1495 spin_unlock(&nm_i->free_nid_list_lock);
1496
1497 kmem_cache_free(free_nid_slab, i);
1504} 1498}
1505 1499
1506/* 1500/*
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1510{ 1504{
1511 struct f2fs_nm_info *nm_i = NM_I(sbi); 1505 struct f2fs_nm_info *nm_i = NM_I(sbi);
1512 struct free_nid *i; 1506 struct free_nid *i;
1507 bool need_free = false;
1513 1508
1514 if (!nid) 1509 if (!nid)
1515 return; 1510 return;
1516 1511
1517 spin_lock(&nm_i->free_nid_list_lock); 1512 spin_lock(&nm_i->free_nid_list_lock);
1518 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1513 i = __lookup_free_nid_list(nm_i, nid);
1519 f2fs_bug_on(!i || i->state != NID_ALLOC); 1514 f2fs_bug_on(!i || i->state != NID_ALLOC);
1520 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { 1515 if (!available_free_memory(nm_i, FREE_NIDS)) {
1521 __del_from_free_nid_list(i); 1516 __del_from_free_nid_list(nm_i, i);
1517 need_free = true;
1522 } else { 1518 } else {
1523 i->state = NID_NEW; 1519 i->state = NID_NEW;
1524 nm_i->fcnt++; 1520 nm_i->fcnt++;
1525 } 1521 }
1526 spin_unlock(&nm_i->free_nid_list_lock); 1522 spin_unlock(&nm_i->free_nid_list_lock);
1523
1524 if (need_free)
1525 kmem_cache_free(free_nid_slab, i);
1527} 1526}
1528 1527
1529void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, 1528void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1531 block_t new_blkaddr) 1530 block_t new_blkaddr)
1532{ 1531{
1533 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); 1532 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1534 set_node_addr(sbi, ni, new_blkaddr); 1533 set_node_addr(sbi, ni, new_blkaddr, false);
1535 clear_node_page_dirty(page); 1534 clear_node_page_dirty(page);
1536} 1535}
1537 1536
1537void recover_inline_xattr(struct inode *inode, struct page *page)
1538{
1539 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1540 void *src_addr, *dst_addr;
1541 size_t inline_size;
1542 struct page *ipage;
1543 struct f2fs_inode *ri;
1544
1545 if (!f2fs_has_inline_xattr(inode))
1546 return;
1547
1548 if (!IS_INODE(page))
1549 return;
1550
1551 ri = F2FS_INODE(page);
1552 if (!(ri->i_inline & F2FS_INLINE_XATTR))
1553 return;
1554
1555 ipage = get_node_page(sbi, inode->i_ino);
1556 f2fs_bug_on(IS_ERR(ipage));
1557
1558 dst_addr = inline_xattr_addr(ipage);
1559 src_addr = inline_xattr_addr(page);
1560 inline_size = inline_xattr_size(inode);
1561
1562 memcpy(dst_addr, src_addr, inline_size);
1563
1564 update_inode(inode, ipage);
1565 f2fs_put_page(ipage, 1);
1566}
1567
1568bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1569{
1570 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1571 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1572 nid_t new_xnid = nid_of_node(page);
1573 struct node_info ni;
1574
1575 recover_inline_xattr(inode, page);
1576
1577 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1578 return false;
1579
1580 /* 1: invalidate the previous xattr nid */
1581 if (!prev_xnid)
1582 goto recover_xnid;
1583
1584 /* Deallocate node address */
1585 get_node_info(sbi, prev_xnid, &ni);
1586 f2fs_bug_on(ni.blk_addr == NULL_ADDR);
1587 invalidate_blocks(sbi, ni.blk_addr);
1588 dec_valid_node_count(sbi, inode);
1589 set_node_addr(sbi, &ni, NULL_ADDR, false);
1590
1591recover_xnid:
1592 /* 2: allocate new xattr nid */
1593 if (unlikely(!inc_valid_node_count(sbi, inode)))
1594 f2fs_bug_on(1);
1595
1596 remove_free_nid(NM_I(sbi), new_xnid);
1597 get_node_info(sbi, new_xnid, &ni);
1598 ni.ino = inode->i_ino;
1599 set_node_addr(sbi, &ni, NEW_ADDR, false);
1600 F2FS_I(inode)->i_xattr_nid = new_xnid;
1601
1602 /* 3: update xattr blkaddr */
1603 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1604 set_node_addr(sbi, &ni, blkaddr, false);
1605
1606 update_inode_page(inode);
1607 return true;
1608}
1609
1538int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1610int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1539{ 1611{
1540 struct f2fs_inode *src, *dst; 1612 struct f2fs_inode *src, *dst;
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1567 1639
1568 if (unlikely(!inc_valid_node_count(sbi, NULL))) 1640 if (unlikely(!inc_valid_node_count(sbi, NULL)))
1569 WARN_ON(1); 1641 WARN_ON(1);
1570 set_node_addr(sbi, &new_ni, NEW_ADDR); 1642 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1571 inc_valid_inode_count(sbi); 1643 inc_valid_inode_count(sbi);
1572 f2fs_put_page(ipage, 1); 1644 f2fs_put_page(ipage, 1);
1573 return 0; 1645 return 0;
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1590 for (; page_idx < start + nrpages; page_idx++) { 1662 for (; page_idx < start + nrpages; page_idx++) {
1591 /* alloc temporal page for read node summary info*/ 1663 /* alloc temporal page for read node summary info*/
1592 page = alloc_page(GFP_F2FS_ZERO); 1664 page = alloc_page(GFP_F2FS_ZERO);
1593 if (!page) { 1665 if (!page)
1594 struct page *tmp; 1666 break;
1595 list_for_each_entry_safe(page, tmp, pages, lru) {
1596 list_del(&page->lru);
1597 unlock_page(page);
1598 __free_pages(page, 0);
1599 }
1600 return -ENOMEM;
1601 }
1602 1667
1603 lock_page(page); 1668 lock_page(page);
1604 page->index = page_idx; 1669 page->index = page_idx;
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1609 f2fs_submit_page_mbio(sbi, page, page->index, &fio); 1674 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1610 1675
1611 f2fs_submit_merged_bio(sbi, META, READ); 1676 f2fs_submit_merged_bio(sbi, META, READ);
1612 return 0; 1677
1678 return page_idx - start;
1613} 1679}
1614 1680
1615int restore_node_summary(struct f2fs_sb_info *sbi, 1681int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1628 addr = START_BLOCK(sbi, segno); 1694 addr = START_BLOCK(sbi, segno);
1629 sum_entry = &sum->entries[0]; 1695 sum_entry = &sum->entries[0];
1630 1696
1631 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 1697 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1632 nrpages = min(last_offset - i, bio_blocks); 1698 nrpages = min(last_offset - i, bio_blocks);
1633 1699
1634 /* read ahead node pages */ 1700 /* read ahead node pages */
1635 err = ra_sum_pages(sbi, &page_list, addr, nrpages); 1701 nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
1636 if (err) 1702 if (!nrpages)
1637 return err; 1703 return -ENOMEM;
1638 1704
1639 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1705 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1706 if (err)
1707 goto skip;
1640 1708
1641 lock_page(page); 1709 lock_page(page);
1642 if (unlikely(!PageUptodate(page))) { 1710 if (unlikely(!PageUptodate(page))) {
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1648 sum_entry->ofs_in_node = 0; 1716 sum_entry->ofs_in_node = 0;
1649 sum_entry++; 1717 sum_entry++;
1650 } 1718 }
1651
1652 list_del(&page->lru);
1653 unlock_page(page); 1719 unlock_page(page);
1720skip:
1721 list_del(&page->lru);
1654 __free_pages(page, 0); 1722 __free_pages(page, 0);
1655 } 1723 }
1656 } 1724 }
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1709 struct f2fs_nm_info *nm_i = NM_I(sbi); 1777 struct f2fs_nm_info *nm_i = NM_I(sbi);
1710 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1778 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1711 struct f2fs_summary_block *sum = curseg->sum_blk; 1779 struct f2fs_summary_block *sum = curseg->sum_blk;
1712 struct list_head *cur, *n; 1780 struct nat_entry *ne, *cur;
1713 struct page *page = NULL; 1781 struct page *page = NULL;
1714 struct f2fs_nat_block *nat_blk = NULL; 1782 struct f2fs_nat_block *nat_blk = NULL;
1715 nid_t start_nid = 0, end_nid = 0; 1783 nid_t start_nid = 0, end_nid = 0;
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1721 mutex_lock(&curseg->curseg_mutex); 1789 mutex_lock(&curseg->curseg_mutex);
1722 1790
1723 /* 1) flush dirty nat caches */ 1791 /* 1) flush dirty nat caches */
1724 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { 1792 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1725 struct nat_entry *ne;
1726 nid_t nid; 1793 nid_t nid;
1727 struct f2fs_nat_entry raw_ne; 1794 struct f2fs_nat_entry raw_ne;
1728 int offset = -1; 1795 int offset = -1;
1729 block_t new_blkaddr; 1796 block_t new_blkaddr;
1730 1797
1731 ne = list_entry(cur, struct nat_entry, list);
1732 nid = nat_get_nid(ne);
1733
1734 if (nat_get_blkaddr(ne) == NEW_ADDR) 1798 if (nat_get_blkaddr(ne) == NEW_ADDR)
1735 continue; 1799 continue;
1800
1801 nid = nat_get_nid(ne);
1802
1736 if (flushed) 1803 if (flushed)
1737 goto to_nat_page; 1804 goto to_nat_page;
1738 1805
@@ -1783,16 +1850,12 @@ flush_now:
1783 } else { 1850 } else {
1784 write_lock(&nm_i->nat_tree_lock); 1851 write_lock(&nm_i->nat_tree_lock);
1785 __clear_nat_cache_dirty(nm_i, ne); 1852 __clear_nat_cache_dirty(nm_i, ne);
1786 ne->checkpointed = true;
1787 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1788 } 1854 }
1789 } 1855 }
1790 if (!flushed) 1856 if (!flushed)
1791 mutex_unlock(&curseg->curseg_mutex); 1857 mutex_unlock(&curseg->curseg_mutex);
1792 f2fs_put_page(page, 1); 1858 f2fs_put_page(page, 1);
1793
1794 /* 2) shrink nat caches if necessary */
1795 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1796} 1859}
1797 1860
1798static int init_node_manager(struct f2fs_sb_info *sbi) 1861static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1807 /* segment_count_nat includes pair segment so divide to 2. */ 1870 /* segment_count_nat includes pair segment so divide to 2. */
1808 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 1871 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1809 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 1872 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1810 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1873
1874 /* not used nids: 0, node, meta, (and root counted as valid node) */
1875 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
1811 nm_i->fcnt = 0; 1876 nm_i->fcnt = 0;
1812 nm_i->nat_cnt = 0; 1877 nm_i->nat_cnt = 0;
1878 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1813 1879
1880 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1814 INIT_LIST_HEAD(&nm_i->free_nid_list); 1881 INIT_LIST_HEAD(&nm_i->free_nid_list);
1815 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1882 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1816 INIT_LIST_HEAD(&nm_i->nat_entries); 1883 INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1864 spin_lock(&nm_i->free_nid_list_lock); 1931 spin_lock(&nm_i->free_nid_list_lock);
1865 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 1932 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1866 f2fs_bug_on(i->state == NID_ALLOC); 1933 f2fs_bug_on(i->state == NID_ALLOC);
1867 __del_from_free_nid_list(i); 1934 __del_from_free_nid_list(nm_i, i);
1868 nm_i->fcnt--; 1935 nm_i->fcnt--;
1936 spin_unlock(&nm_i->free_nid_list_lock);
1937 kmem_cache_free(free_nid_slab, i);
1938 spin_lock(&nm_i->free_nid_list_lock);
1869 } 1939 }
1870 f2fs_bug_on(nm_i->fcnt); 1940 f2fs_bug_on(nm_i->fcnt);
1871 spin_unlock(&nm_i->free_nid_list_lock); 1941 spin_unlock(&nm_i->free_nid_list_lock);
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1875 while ((found = __gang_lookup_nat_cache(nm_i, 1945 while ((found = __gang_lookup_nat_cache(nm_i,
1876 nid, NATVEC_SIZE, natvec))) { 1946 nid, NATVEC_SIZE, natvec))) {
1877 unsigned idx; 1947 unsigned idx;
1878 for (idx = 0; idx < found; idx++) { 1948 nid = nat_get_nid(natvec[found - 1]) + 1;
1879 struct nat_entry *e = natvec[idx]; 1949 for (idx = 0; idx < found; idx++)
1880 nid = nat_get_nid(e) + 1; 1950 __del_from_nat_cache(nm_i, natvec[idx]);
1881 __del_from_nat_cache(nm_i, e);
1882 }
1883 } 1951 }
1884 f2fs_bug_on(nm_i->nat_cnt); 1952 f2fs_bug_on(nm_i->nat_cnt);
1885 write_unlock(&nm_i->nat_tree_lock); 1953 write_unlock(&nm_i->nat_tree_lock);
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1892int __init create_node_manager_caches(void) 1960int __init create_node_manager_caches(void)
1893{ 1961{
1894 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1962 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1895 sizeof(struct nat_entry), NULL); 1963 sizeof(struct nat_entry));
1896 if (!nat_entry_slab) 1964 if (!nat_entry_slab)
1897 return -ENOMEM; 1965 return -ENOMEM;
1898 1966
1899 free_nid_slab = f2fs_kmem_cache_create("free_nid", 1967 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1900 sizeof(struct free_nid), NULL); 1968 sizeof(struct free_nid));
1901 if (!free_nid_slab) { 1969 if (!free_nid_slab) {
1902 kmem_cache_destroy(nat_entry_slab); 1970 kmem_cache_destroy(nat_entry_slab);
1903 return -ENOMEM; 1971 return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c4c79885c993..5decc1a375f0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
17/* # of pages to perform readahead before building free nids */ 17/* # of pages to perform readahead before building free nids */
18#define FREE_NID_PAGES 4 18#define FREE_NID_PAGES 4
19 19
20/* maximum # of free node ids to produce during build_free_nids */
21#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
22
23/* maximum readahead size for node during getting data blocks */ 20/* maximum readahead size for node during getting data blocks */
24#define MAX_RA_NODE 128 21#define MAX_RA_NODE 128
25 22
26/* maximum cached nat entries to manage memory footprint */ 23/* control the memory footprint threshold (10MB per 1GB ram) */
27#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) 24#define DEF_RAM_THRESHOLD 10
28 25
29/* vector size for gang look-up from nat cache that consists of radix tree */ 26/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64 27#define NATVEC_SIZE 64
@@ -45,6 +42,7 @@ struct node_info {
45struct nat_entry { 42struct nat_entry {
46 struct list_head list; /* for clean or dirty nat list */ 43 struct list_head list; /* for clean or dirty nat list */
47 bool checkpointed; /* whether it is checkpointed or not */ 44 bool checkpointed; /* whether it is checkpointed or not */
45 bool fsync_done; /* whether the latest node has fsync mark */
48 struct node_info ni; /* in-memory node information */ 46 struct node_info ni; /* in-memory node information */
49}; 47};
50 48
@@ -58,9 +56,15 @@ struct nat_entry {
58#define nat_set_version(nat, v) (nat->ni.version = v) 56#define nat_set_version(nat, v) (nat->ni.version = v)
59 57
60#define __set_nat_cache_dirty(nm_i, ne) \ 58#define __set_nat_cache_dirty(nm_i, ne) \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); 59 do { \
60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0);
62#define __clear_nat_cache_dirty(nm_i, ne) \ 63#define __clear_nat_cache_dirty(nm_i, ne) \
63 list_move_tail(&ne->list, &nm_i->nat_entries); 64 do { \
65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0);
64#define inc_node_version(version) (++version) 68#define inc_node_version(version) (++version)
65 69
66static inline void node_info_from_raw_nat(struct node_info *ni, 70static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
71 ni->version = raw_ne->version; 75 ni->version = raw_ne->version;
72} 76}
73 77
78enum nid_type {
79 FREE_NIDS, /* indicates the free nid list */
80 NAT_ENTRIES /* indicates the cached nat entry */
81};
82
74/* 83/*
75 * For free nid mangement 84 * For free nid mangement
76 */ 85 */
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
236{ 245{
237 unsigned int ofs = ofs_of_node(node_page); 246 unsigned int ofs = ofs_of_node(node_page);
238 247
239 if (ofs == XATTR_NODE_OFFSET) 248 if (f2fs_has_xattr_block(ofs))
240 return false; 249 return false;
241 250
242 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || 251 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 976a7a934db5..b1ae89f0f44e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, 27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
28 nid_t ino) 28 nid_t ino)
29{ 29{
30 struct list_head *this;
31 struct fsync_inode_entry *entry; 30 struct fsync_inode_entry *entry;
32 31
33 list_for_each(this, head) { 32 list_for_each_entry(entry, head, list)
34 entry = list_entry(this, struct fsync_inode_entry, list);
35 if (entry->inode->i_ino == ino) 33 if (entry->inode->i_ino == ino)
36 return entry; 34 return entry;
37 } 35
38 return NULL; 36 return NULL;
39} 37}
40 38
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
136 134
137 /* get node pages in the current segment */ 135 /* get node pages in the current segment */
138 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 136 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
139 blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; 137 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
140 138
141 /* read node page */ 139 /* read node page */
142 page = alloc_page(GFP_F2FS_ZERO); 140 page = alloc_page(GFP_F2FS_ZERO);
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
218{ 216{
219 struct seg_entry *sentry; 217 struct seg_entry *sentry;
220 unsigned int segno = GET_SEGNO(sbi, blkaddr); 218 unsigned int segno = GET_SEGNO(sbi, blkaddr);
221 unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & 219 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
222 (sbi->blocks_per_seg - 1); 220 struct f2fs_summary_block *sum_node;
223 struct f2fs_summary sum; 221 struct f2fs_summary sum;
222 struct page *sum_page, *node_page;
224 nid_t ino, nid; 223 nid_t ino, nid;
225 void *kaddr;
226 struct inode *inode; 224 struct inode *inode;
227 struct page *node_page;
228 unsigned int offset; 225 unsigned int offset;
229 block_t bidx; 226 block_t bidx;
230 int i; 227 int i;
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
238 struct curseg_info *curseg = CURSEG_I(sbi, i); 235 struct curseg_info *curseg = CURSEG_I(sbi, i);
239 if (curseg->segno == segno) { 236 if (curseg->segno == segno) {
240 sum = curseg->sum_blk->entries[blkoff]; 237 sum = curseg->sum_blk->entries[blkoff];
241 break; 238 goto got_it;
242 } 239 }
243 } 240 }
244 if (i > CURSEG_COLD_DATA) {
245 struct page *sum_page = get_sum_page(sbi, segno);
246 struct f2fs_summary_block *sum_node;
247 kaddr = page_address(sum_page);
248 sum_node = (struct f2fs_summary_block *)kaddr;
249 sum = sum_node->entries[blkoff];
250 f2fs_put_page(sum_page, 1);
251 }
252 241
242 sum_page = get_sum_page(sbi, segno);
243 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
244 sum = sum_node->entries[blkoff];
245 f2fs_put_page(sum_page, 1);
246got_it:
253 /* Use the locked dnode page and inode */ 247 /* Use the locked dnode page and inode */
254 nid = le32_to_cpu(sum.nid); 248 nid = le32_to_cpu(sum.nid);
255 if (dn->inode->i_ino == nid) { 249 if (dn->inode->i_ino == nid) {
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
301 if (recover_inline_data(inode, page)) 295 if (recover_inline_data(inode, page))
302 goto out; 296 goto out;
303 297
298 if (recover_xattr_data(inode, page, blkaddr))
299 goto out;
300
304 start = start_bidx_of_node(ofs_of_node(page), fi); 301 start = start_bidx_of_node(ofs_of_node(page), fi);
305 if (IS_INODE(page)) 302 if (IS_INODE(page))
306 end = start + ADDRS_PER_INODE(fi); 303 end = start + ADDRS_PER_INODE(fi);
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
317 goto out; 314 goto out;
318 } 315 }
319 316
320 wait_on_page_writeback(dn.node_page); 317 f2fs_wait_on_page_writeback(dn.node_page, NODE);
321 318
322 get_node_info(sbi, dn.nid, &ni); 319 get_node_info(sbi, dn.nid, &ni);
323 f2fs_bug_on(ni.ino != ino_of_node(page)); 320 f2fs_bug_on(ni.ino != ino_of_node(page));
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
437 bool need_writecp = false; 434 bool need_writecp = false;
438 435
439 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 436 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
440 sizeof(struct fsync_inode_entry), NULL); 437 sizeof(struct fsync_inode_entry));
441 if (!fsync_entry_slab) 438 if (!fsync_entry_slab)
442 return -ENOMEM; 439 return -ENOMEM;
443 440
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7caac5f2ca9e..085f548be7a3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h> 15#include <linux/prefetch.h>
16#include <linux/kthread.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
17#include <linux/swap.h> 18#include <linux/swap.h>
18 19
@@ -24,6 +25,7 @@
24#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
25 26
26static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *flush_cmd_slab;
27 29
28/* 30/*
29 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
195 f2fs_sync_fs(sbi->sb, true); 197 f2fs_sync_fs(sbi->sb, true);
196} 198}
197 199
200static int issue_flush_thread(void *data)
201{
202 struct f2fs_sb_info *sbi = data;
203 struct f2fs_sm_info *sm_i = SM_I(sbi);
204 wait_queue_head_t *q = &sm_i->flush_wait_queue;
205repeat:
206 if (kthread_should_stop())
207 return 0;
208
209 spin_lock(&sm_i->issue_lock);
210 if (sm_i->issue_list) {
211 sm_i->dispatch_list = sm_i->issue_list;
212 sm_i->issue_list = sm_i->issue_tail = NULL;
213 }
214 spin_unlock(&sm_i->issue_lock);
215
216 if (sm_i->dispatch_list) {
217 struct bio *bio = bio_alloc(GFP_NOIO, 0);
218 struct flush_cmd *cmd, *next;
219 int ret;
220
221 bio->bi_bdev = sbi->sb->s_bdev;
222 ret = submit_bio_wait(WRITE_FLUSH, bio);
223
224 for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
225 cmd->ret = ret;
226 next = cmd->next;
227 complete(&cmd->wait);
228 }
229 sm_i->dispatch_list = NULL;
230 }
231
232 wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
233 goto repeat;
234}
235
236int f2fs_issue_flush(struct f2fs_sb_info *sbi)
237{
238 struct f2fs_sm_info *sm_i = SM_I(sbi);
239 struct flush_cmd *cmd;
240 int ret;
241
242 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244
245 cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
246 cmd->next = NULL;
247 cmd->ret = 0;
248 init_completion(&cmd->wait);
249
250 spin_lock(&sm_i->issue_lock);
251 if (sm_i->issue_list)
252 sm_i->issue_tail->next = cmd;
253 else
254 sm_i->issue_list = cmd;
255 sm_i->issue_tail = cmd;
256 spin_unlock(&sm_i->issue_lock);
257
258 if (!sm_i->dispatch_list)
259 wake_up(&sm_i->flush_wait_queue);
260
261 wait_for_completion(&cmd->wait);
262 ret = cmd->ret;
263 kmem_cache_free(flush_cmd_slab, cmd);
264 return ret;
265}
266
198static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 267static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
199 enum dirty_type dirty_type) 268 enum dirty_type dirty_type)
200{ 269{
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
340void clear_prefree_segments(struct f2fs_sb_info *sbi) 409void clear_prefree_segments(struct f2fs_sb_info *sbi)
341{ 410{
342 struct list_head *head = &(SM_I(sbi)->discard_list); 411 struct list_head *head = &(SM_I(sbi)->discard_list);
343 struct list_head *this, *next; 412 struct discard_entry *entry, *this;
344 struct discard_entry *entry;
345 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 413 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
346 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 414 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
347 unsigned int total_segs = TOTAL_SEGS(sbi); 415 unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
370 mutex_unlock(&dirty_i->seglist_lock); 438 mutex_unlock(&dirty_i->seglist_lock);
371 439
372 /* send small discards */ 440 /* send small discards */
373 list_for_each_safe(this, next, head) { 441 list_for_each_entry_safe(entry, this, head, list) {
374 entry = list_entry(this, struct discard_entry, list);
375 f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 442 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
376 list_del(&entry->list); 443 list_del(&entry->list);
377 SM_I(sbi)->nr_discards -= entry->len; 444 SM_I(sbi)->nr_discards -= entry->len;
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
405 472
406 se = get_seg_entry(sbi, segno); 473 se = get_seg_entry(sbi, segno);
407 new_vblocks = se->valid_blocks + del; 474 new_vblocks = se->valid_blocks + del;
408 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); 475 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
409 476
410 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 477 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
411 (new_vblocks > sbi->blocks_per_seg))); 478 (new_vblocks > sbi->blocks_per_seg)));
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
434 get_sec_entry(sbi, segno)->valid_blocks += del; 501 get_sec_entry(sbi, segno)->valid_blocks += del;
435} 502}
436 503
437static void refresh_sit_entry(struct f2fs_sb_info *sbi, 504void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
438 block_t old_blkaddr, block_t new_blkaddr)
439{ 505{
440 update_sit_entry(sbi, new_blkaddr, 1); 506 update_sit_entry(sbi, new, 1);
441 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 507 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
442 update_sit_entry(sbi, old_blkaddr, -1); 508 update_sit_entry(sbi, old, -1);
509
510 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
511 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
443} 512}
444 513
445void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 514void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
881 950
882 stat_inc_block_count(sbi, curseg); 951 stat_inc_block_count(sbi, curseg);
883 952
953 if (!__has_curseg_space(sbi, type))
954 sit_i->s_ops->allocate_segment(sbi, type, false);
884 /* 955 /*
885 * SIT information should be updated before segment allocation, 956 * SIT information should be updated before segment allocation,
886 * since SSR needs latest valid block information. 957 * since SSR needs latest valid block information.
887 */ 958 */
888 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 959 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
889
890 if (!__has_curseg_space(sbi, type))
891 sit_i->s_ops->allocate_segment(sbi, type, false);
892
893 locate_dirty_segment(sbi, old_cursegno); 960 locate_dirty_segment(sbi, old_cursegno);
894 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 961
895 mutex_unlock(&sit_i->sentry_lock); 962 mutex_unlock(&sit_i->sentry_lock);
896 963
897 if (page && IS_NODESEG(type)) 964 if (page && IS_NODESEG(type))
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
987 change_curseg(sbi, type, true); 1054 change_curseg(sbi, type, true);
988 } 1055 }
989 1056
990 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1057 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
991 (sbi->blocks_per_seg - 1);
992 __add_sum_entry(sbi, type, sum); 1058 __add_sum_entry(sbi, type, sum);
993 1059
994 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1060 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
995
996 locate_dirty_segment(sbi, old_cursegno); 1061 locate_dirty_segment(sbi, old_cursegno);
997 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
998 1062
999 mutex_unlock(&sit_i->sentry_lock); 1063 mutex_unlock(&sit_i->sentry_lock);
1000 mutex_unlock(&curseg->curseg_mutex); 1064 mutex_unlock(&curseg->curseg_mutex);
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1028 curseg->next_segno = segno; 1092 curseg->next_segno = segno;
1029 change_curseg(sbi, type, true); 1093 change_curseg(sbi, type, true);
1030 } 1094 }
1031 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1095 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1032 (sbi->blocks_per_seg - 1);
1033 __add_sum_entry(sbi, type, sum); 1096 __add_sum_entry(sbi, type, sum);
1034 1097
1035 /* change the current log to the next block addr in advance */ 1098 /* change the current log to the next block addr in advance */
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1037 curseg->next_segno = next_segno; 1100 curseg->next_segno = next_segno;
1038 change_curseg(sbi, type, true); 1101 change_curseg(sbi, type, true);
1039 } 1102 }
1040 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & 1103 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1041 (sbi->blocks_per_seg - 1);
1042 1104
1043 /* rewrite node page */ 1105 /* rewrite node page */
1044 set_page_writeback(page); 1106 set_page_writeback(page);
1045 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); 1107 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1046 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1108 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1047 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1109 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1048
1049 locate_dirty_segment(sbi, old_cursegno); 1110 locate_dirty_segment(sbi, old_cursegno);
1050 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1051 1111
1052 mutex_unlock(&sit_i->sentry_lock); 1112 mutex_unlock(&sit_i->sentry_lock);
1053 mutex_unlock(&curseg->curseg_mutex); 1113 mutex_unlock(&curseg->curseg_mutex);
1054} 1114}
1055 1115
1116static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1117 struct page *page, enum page_type type)
1118{
1119 enum page_type btype = PAGE_TYPE_OF_BIO(type);
1120 struct f2fs_bio_info *io = &sbi->write_io[btype];
1121 struct bio_vec *bvec;
1122 int i;
1123
1124 down_read(&io->io_rwsem);
1125 if (!io->bio)
1126 goto out;
1127
1128 bio_for_each_segment_all(bvec, io->bio, i) {
1129 if (page == bvec->bv_page) {
1130 up_read(&io->io_rwsem);
1131 return true;
1132 }
1133 }
1134
1135out:
1136 up_read(&io->io_rwsem);
1137 return false;
1138}
1139
1056void f2fs_wait_on_page_writeback(struct page *page, 1140void f2fs_wait_on_page_writeback(struct page *page,
1057 enum page_type type) 1141 enum page_type type)
1058{ 1142{
1059 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1143 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1060 if (PageWriteback(page)) { 1144 if (PageWriteback(page)) {
1061 f2fs_submit_merged_bio(sbi, type, WRITE); 1145 if (is_merged_page(sbi, page, type))
1146 f2fs_submit_merged_bio(sbi, type, WRITE);
1062 wait_on_page_writeback(page); 1147 wait_on_page_writeback(page);
1063 } 1148 }
1064} 1149}
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1167 ns->ofs_in_node = 0; 1252 ns->ofs_in_node = 0;
1168 } 1253 }
1169 } else { 1254 } else {
1170 if (restore_node_summary(sbi, segno, sum)) { 1255 int err;
1256
1257 err = restore_node_summary(sbi, segno, sum);
1258 if (err) {
1171 f2fs_put_page(new, 1); 1259 f2fs_put_page(new, 1);
1172 return -EINVAL; 1260 return err;
1173 } 1261 }
1174 } 1262 }
1175 } 1263 }
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1190static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 1278static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1191{ 1279{
1192 int type = CURSEG_HOT_DATA; 1280 int type = CURSEG_HOT_DATA;
1281 int err;
1193 1282
1194 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { 1283 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1195 /* restore for compacted data summary */ 1284 /* restore for compacted data summary */
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1198 type = CURSEG_HOT_NODE; 1287 type = CURSEG_HOT_NODE;
1199 } 1288 }
1200 1289
1201 for (; type <= CURSEG_COLD_NODE; type++) 1290 for (; type <= CURSEG_COLD_NODE; type++) {
1202 if (read_normal_summaries(sbi, type)) 1291 err = read_normal_summaries(sbi, type);
1203 return -EINVAL; 1292 if (err)
1293 return err;
1294 }
1295
1204 return 0; 1296 return 0;
1205} 1297}
1206 1298
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1583 return restore_curseg_summaries(sbi); 1675 return restore_curseg_summaries(sbi);
1584} 1676}
1585 1677
1586static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
1587{
1588 struct address_space *mapping = META_MAPPING(sbi);
1589 struct page *page;
1590 block_t blk_addr, prev_blk_addr = 0;
1591 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1592 int blkno = start;
1593 struct f2fs_io_info fio = {
1594 .type = META,
1595 .rw = READ_SYNC | REQ_META | REQ_PRIO
1596 };
1597
1598 for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
1599
1600 blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
1601
1602 if (blkno != start && prev_blk_addr + 1 != blk_addr)
1603 break;
1604 prev_blk_addr = blk_addr;
1605repeat:
1606 page = grab_cache_page(mapping, blk_addr);
1607 if (!page) {
1608 cond_resched();
1609 goto repeat;
1610 }
1611 if (PageUptodate(page)) {
1612 mark_page_accessed(page);
1613 f2fs_put_page(page, 1);
1614 continue;
1615 }
1616
1617 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
1618
1619 mark_page_accessed(page);
1620 f2fs_put_page(page, 0);
1621 }
1622
1623 f2fs_submit_merged_bio(sbi, META, READ);
1624 return blkno - start;
1625}
1626
1627static void build_sit_entries(struct f2fs_sb_info *sbi) 1678static void build_sit_entries(struct f2fs_sb_info *sbi)
1628{ 1679{
1629 struct sit_info *sit_i = SIT_I(sbi); 1680 struct sit_info *sit_i = SIT_I(sbi);
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1635 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1686 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1636 1687
1637 do { 1688 do {
1638 readed = ra_sit_pages(sbi, start_blk, nrpages); 1689 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1639 1690
1640 start = start_blk * sit_i->sents_per_block; 1691 start = start_blk * sit_i->sents_per_block;
1641 end = (start_blk + readed) * sit_i->sents_per_block; 1692 end = (start_blk + readed) * sit_i->sents_per_block;
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1781{ 1832{
1782 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1833 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1783 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1834 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1835 dev_t dev = sbi->sb->s_bdev->bd_dev;
1784 struct f2fs_sm_info *sm_info; 1836 struct f2fs_sm_info *sm_info;
1785 int err; 1837 int err;
1786 1838
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1799 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 1851 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1800 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1852 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1801 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1853 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1802 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; 1854 sm_info->rec_prefree_segments = sm_info->main_segments *
1855 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
1803 sm_info->ipu_policy = F2FS_IPU_DISABLE; 1856 sm_info->ipu_policy = F2FS_IPU_DISABLE;
1804 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 1857 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
1805 1858
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1807 sm_info->nr_discards = 0; 1860 sm_info->nr_discards = 0;
1808 sm_info->max_discards = 0; 1861 sm_info->max_discards = 0;
1809 1862
1863 if (test_opt(sbi, FLUSH_MERGE)) {
1864 spin_lock_init(&sm_info->issue_lock);
1865 init_waitqueue_head(&sm_info->flush_wait_queue);
1866
1867 sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
1868 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
1869 if (IS_ERR(sm_info->f2fs_issue_flush))
1870 return PTR_ERR(sm_info->f2fs_issue_flush);
1871 }
1872
1810 err = build_sit_info(sbi); 1873 err = build_sit_info(sbi);
1811 if (err) 1874 if (err)
1812 return err; 1875 return err;
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1915 struct f2fs_sm_info *sm_info = SM_I(sbi); 1978 struct f2fs_sm_info *sm_info = SM_I(sbi);
1916 if (!sm_info) 1979 if (!sm_info)
1917 return; 1980 return;
1981 if (sm_info->f2fs_issue_flush)
1982 kthread_stop(sm_info->f2fs_issue_flush);
1918 destroy_dirty_segmap(sbi); 1983 destroy_dirty_segmap(sbi);
1919 destroy_curseg(sbi); 1984 destroy_curseg(sbi);
1920 destroy_free_segmap(sbi); 1985 destroy_free_segmap(sbi);
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1926int __init create_segment_manager_caches(void) 1991int __init create_segment_manager_caches(void)
1927{ 1992{
1928 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 1993 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1929 sizeof(struct discard_entry), NULL); 1994 sizeof(struct discard_entry));
1930 if (!discard_entry_slab) 1995 if (!discard_entry_slab)
1931 return -ENOMEM; 1996 return -ENOMEM;
1997 flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
1998 sizeof(struct flush_cmd));
1999 if (!flush_cmd_slab) {
2000 kmem_cache_destroy(discard_entry_slab);
2001 return -ENOMEM;
2002 }
1932 return 0; 2003 return 0;
1933} 2004}
1934 2005
1935void destroy_segment_manager_caches(void) 2006void destroy_segment_manager_caches(void)
1936{ 2007{
1937 kmem_cache_destroy(discard_entry_slab); 2008 kmem_cache_destroy(discard_entry_slab);
2009 kmem_cache_destroy(flush_cmd_slab);
1938} 2010}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5731682d7516..7091204680f4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,7 +14,7 @@
14#define NULL_SEGNO ((unsigned int)(~0)) 14#define NULL_SEGNO ((unsigned int)(~0))
15#define NULL_SECNO ((unsigned int)(~0)) 15#define NULL_SECNO ((unsigned int)(~0))
16 16
17#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ 17#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
18 18
19/* L: Logical segment # in volume, R: Relative segment # in main area */ 19/* L: Logical segment # in volume, R: Relative segment # in main area */
20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
@@ -57,6 +57,9 @@
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr) 57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
61 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
62
60#define GET_SEGNO(sbi, blk_addr) \ 63#define GET_SEGNO(sbi, blk_addr) \
61 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ 64 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
62 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ 65 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
377 380
378static inline block_t written_block_count(struct f2fs_sb_info *sbi) 381static inline block_t written_block_count(struct f2fs_sb_info *sbi)
379{ 382{
380 struct sit_info *sit_i = SIT_I(sbi); 383 return SIT_I(sbi)->written_valid_blocks;
381 block_t vblocks;
382
383 mutex_lock(&sit_i->sentry_lock);
384 vblocks = sit_i->written_valid_blocks;
385 mutex_unlock(&sit_i->sentry_lock);
386
387 return vblocks;
388} 384}
389 385
390static inline unsigned int free_segments(struct f2fs_sb_info *sbi) 386static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
391{ 387{
392 struct free_segmap_info *free_i = FREE_I(sbi); 388 return FREE_I(sbi)->free_segments;
393 unsigned int free_segs;
394
395 read_lock(&free_i->segmap_lock);
396 free_segs = free_i->free_segments;
397 read_unlock(&free_i->segmap_lock);
398
399 return free_segs;
400} 389}
401 390
402static inline int reserved_segments(struct f2fs_sb_info *sbi) 391static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
406 395
407static inline unsigned int free_sections(struct f2fs_sb_info *sbi) 396static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
408{ 397{
409 struct free_segmap_info *free_i = FREE_I(sbi); 398 return FREE_I(sbi)->free_sections;
410 unsigned int free_secs;
411
412 read_lock(&free_i->segmap_lock);
413 free_secs = free_i->free_sections;
414 read_unlock(&free_i->segmap_lock);
415
416 return free_secs;
417} 399}
418 400
419static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) 401static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
682 struct request_queue *q = bdev_get_queue(bdev); 664 struct request_queue *q = bdev_get_queue(bdev);
683 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 665 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
684} 666}
667
668/*
669 * It is very important to gather dirty pages and write at once, so that we can
670 * submit a big bio without interfering other data writes.
671 * By default, 512 pages for directory data,
672 * 512 pages (2MB) * 3 for three types of nodes, and
673 * max_bio_blocks for meta are set.
674 */
675static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
676{
677 if (type == DATA)
678 return sbi->blocks_per_seg;
679 else if (type == NODE)
680 return 3 * sbi->blocks_per_seg;
681 else if (type == META)
682 return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
683 else
684 return 0;
685}
686
687/*
688 * When writing pages, it'd better align nr_to_write for segment size.
689 */
690static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
691 struct writeback_control *wbc)
692{
693 long nr_to_write, desired;
694
695 if (wbc->sync_mode != WB_SYNC_NONE)
696 return 0;
697
698 nr_to_write = wbc->nr_to_write;
699
700 if (type == DATA)
701 desired = 4096;
702 else if (type == NODE)
703 desired = 3 * max_hw_blocks(sbi);
704 else
705 desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
706
707 wbc->nr_to_write = desired;
708 return desired - nr_to_write;
709}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 856bdf994c0a..c756923a7302 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@ enum {
51 Opt_disable_ext_identify, 51 Opt_disable_ext_identify,
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge,
54 Opt_err, 55 Opt_err,
55}; 56};
56 57
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
67 {Opt_disable_ext_identify, "disable_ext_identify"}, 68 {Opt_disable_ext_identify, "disable_ext_identify"},
68 {Opt_inline_xattr, "inline_xattr"}, 69 {Opt_inline_xattr, "inline_xattr"},
69 {Opt_inline_data, "inline_data"}, 70 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"},
70 {Opt_err, NULL}, 72 {Opt_err, NULL},
71}; 73};
72 74
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
74enum { 76enum {
75 GC_THREAD, /* struct f2fs_gc_thread */ 77 GC_THREAD, /* struct f2fs_gc_thread */
76 SM_INFO, /* struct f2fs_sm_info */ 78 SM_INFO, /* struct f2fs_sm_info */
79 NM_INFO, /* struct f2fs_nm_info */
77 F2FS_SBI, /* struct f2fs_sb_info */ 80 F2FS_SBI, /* struct f2fs_sb_info */
78}; 81};
79 82
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
92 return (unsigned char *)sbi->gc_thread; 95 return (unsigned char *)sbi->gc_thread;
93 else if (struct_type == SM_INFO) 96 else if (struct_type == SM_INFO)
94 return (unsigned char *)SM_I(sbi); 97 return (unsigned char *)SM_I(sbi);
98 else if (struct_type == NM_INFO)
99 return (unsigned char *)NM_I(sbi);
95 else if (struct_type == F2FS_SBI) 100 else if (struct_type == F2FS_SBI)
96 return (unsigned char *)sbi; 101 return (unsigned char *)sbi;
97 return NULL; 102 return NULL;
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
183F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 188F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
184F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 189F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
185F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
191F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
186F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 192F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
193F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
187 194
188#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 195#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
189static struct attribute *f2fs_attrs[] = { 196static struct attribute *f2fs_attrs[] = {
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
196 ATTR_LIST(ipu_policy), 203 ATTR_LIST(ipu_policy),
197 ATTR_LIST(min_ipu_util), 204 ATTR_LIST(min_ipu_util),
198 ATTR_LIST(max_victim_search), 205 ATTR_LIST(max_victim_search),
206 ATTR_LIST(dir_level),
207 ATTR_LIST(ram_thresh),
199 NULL, 208 NULL,
200}; 209};
201 210
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
256 265
257 if (!name) 266 if (!name)
258 return -ENOMEM; 267 return -ENOMEM;
259 if (!strncmp(name, "on", 2)) 268 if (strlen(name) == 2 && !strncmp(name, "on", 2))
260 set_opt(sbi, BG_GC); 269 set_opt(sbi, BG_GC);
261 else if (!strncmp(name, "off", 3)) 270 else if (strlen(name) == 3 && !strncmp(name, "off", 3))
262 clear_opt(sbi, BG_GC); 271 clear_opt(sbi, BG_GC);
263 else { 272 else {
264 kfree(name); 273 kfree(name);
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
327 case Opt_inline_data: 336 case Opt_inline_data:
328 set_opt(sbi, INLINE_DATA); 337 set_opt(sbi, INLINE_DATA);
329 break; 338 break;
339 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE);
341 break;
330 default: 342 default:
331 f2fs_msg(sb, KERN_ERR, 343 f2fs_msg(sb, KERN_ERR,
332 "Unrecognized mount option \"%s\" or missing value", 344 "Unrecognized mount option \"%s\" or missing value",
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
353 fi->i_current_depth = 1; 365 fi->i_current_depth = 1;
354 fi->i_advise = 0; 366 fi->i_advise = 0;
355 rwlock_init(&fi->ext.ext_lock); 367 rwlock_init(&fi->ext.ext_lock);
368 init_rwsem(&fi->i_sem);
356 369
357 set_inode_flag(fi, FI_NEW_INODE); 370 set_inode_flag(fi, FI_NEW_INODE);
358 371
359 if (test_opt(F2FS_SB(sb), INLINE_XATTR)) 372 if (test_opt(F2FS_SB(sb), INLINE_XATTR))
360 set_inode_flag(fi, FI_INLINE_XATTR); 373 set_inode_flag(fi, FI_INLINE_XATTR);
361 374
375 /* Will be used by directory only */
376 fi->i_dir_level = F2FS_SB(sb)->dir_level;
377
362 return &fi->vfs_inode; 378 return &fi->vfs_inode;
363} 379}
364 380
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
526 seq_puts(seq, ",disable_ext_identify"); 542 seq_puts(seq, ",disable_ext_identify");
527 if (test_opt(sbi, INLINE_DATA)) 543 if (test_opt(sbi, INLINE_DATA))
528 seq_puts(seq, ",inline_data"); 544 seq_puts(seq, ",inline_data");
545 if (test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge");
529 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 547 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
530 548
531 return 0; 549 return 0;
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
539 le32_to_cpu(sbi->raw_super->segment_count_main); 557 le32_to_cpu(sbi->raw_super->segment_count_main);
540 int i; 558 int i;
541 559
560 seq_puts(seq, "format: segment_type|valid_blocks\n"
561 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
562
542 for (i = 0; i < total_segs; i++) { 563 for (i = 0; i < total_segs; i++) {
543 seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); 564 struct seg_entry *se = get_seg_entry(sbi, i);
544 if (i != 0 && (i % 10) == 0) 565
545 seq_puts(seq, "\n"); 566 if ((i % 10) == 0)
567 seq_printf(seq, "%-5d", i);
568 seq_printf(seq, "%d|%-3u", se->type,
569 get_valid_blocks(sbi, i, 1));
570 if ((i % 10) == 9 || i == (total_segs - 1))
571 seq_putc(seq, '\n');
546 else 572 else
547 seq_puts(seq, " "); 573 seq_putc(seq, ' ');
548 } 574 }
575
549 return 0; 576 return 0;
550} 577}
551 578
@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
640 667
641 if (unlikely(ino < F2FS_ROOT_INO(sbi))) 668 if (unlikely(ino < F2FS_ROOT_INO(sbi)))
642 return ERR_PTR(-ESTALE); 669 return ERR_PTR(-ESTALE);
670 if (unlikely(ino >= NM_I(sbi)->max_nid))
671 return ERR_PTR(-ESTALE);
643 672
644 /* 673 /*
645 * f2fs_iget isn't quite right if the inode is currently unallocated! 674 * f2fs_iget isn't quite right if the inode is currently unallocated!
@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
787 816
788 for (i = 0; i < NR_COUNT_TYPE; i++) 817 for (i = 0; i < NR_COUNT_TYPE; i++)
789 atomic_set(&sbi->nr_pages[i], 0); 818 atomic_set(&sbi->nr_pages[i], 0);
819
820 sbi->dir_level = DEF_DIR_LEVEL;
790} 821}
791 822
792/* 823/*
@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
898 sbi->por_doing = false; 929 sbi->por_doing = false;
899 spin_lock_init(&sbi->stat_lock); 930 spin_lock_init(&sbi->stat_lock);
900 931
901 mutex_init(&sbi->read_io.io_mutex); 932 init_rwsem(&sbi->read_io.io_rwsem);
902 sbi->read_io.sbi = sbi; 933 sbi->read_io.sbi = sbi;
903 sbi->read_io.bio = NULL; 934 sbi->read_io.bio = NULL;
904 for (i = 0; i < NR_PAGE_TYPE; i++) { 935 for (i = 0; i < NR_PAGE_TYPE; i++) {
905 mutex_init(&sbi->write_io[i].io_mutex); 936 init_rwsem(&sbi->write_io[i].io_rwsem);
906 sbi->write_io[i].sbi = sbi; 937 sbi->write_io[i].sbi = sbi;
907 sbi->write_io[i].bio = NULL; 938 sbi->write_io[i].bio = NULL;
908 } 939 }
@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
991 goto free_root_inode; 1022 goto free_root_inode;
992 } 1023 }
993 1024
994 /* recover fsynced data */
995 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
996 err = recover_fsync_data(sbi);
997 if (err)
998 f2fs_msg(sb, KERN_ERR,
999 "Cannot recover all fsync data errno=%ld", err);
1000 }
1001
1002 /*
1003 * If filesystem is not mounted as read-only then
1004 * do start the gc_thread.
1005 */
1006 if (!(sb->s_flags & MS_RDONLY)) {
1007 /* After POR, we can run background GC thread.*/
1008 err = start_gc_thread(sbi);
1009 if (err)
1010 goto free_gc;
1011 }
1012
1013 err = f2fs_build_stats(sbi); 1025 err = f2fs_build_stats(sbi);
1014 if (err) 1026 if (err)
1015 goto free_gc; 1027 goto free_root_inode;
1016 1028
1017 if (f2fs_proc_root) 1029 if (f2fs_proc_root)
1018 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1030 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, 1046 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
1035 "%s", sb->s_id); 1047 "%s", sb->s_id);
1036 if (err) 1048 if (err)
1037 goto fail; 1049 goto free_proc;
1050
1051 /* recover fsynced data */
1052 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1053 err = recover_fsync_data(sbi);
1054 if (err)
1055 f2fs_msg(sb, KERN_ERR,
1056 "Cannot recover all fsync data errno=%ld", err);
1057 }
1038 1058
1059 /*
1060 * If filesystem is not mounted as read-only then
1061 * do start the gc_thread.
1062 */
1063 if (!(sb->s_flags & MS_RDONLY)) {
1064 /* After POR, we can run background GC thread.*/
1065 err = start_gc_thread(sbi);
1066 if (err)
1067 goto free_kobj;
1068 }
1039 return 0; 1069 return 0;
1040fail: 1070
1071free_kobj:
1072 kobject_del(&sbi->s_kobj);
1073free_proc:
1041 if (sbi->s_proc) { 1074 if (sbi->s_proc) {
1042 remove_proc_entry("segment_info", sbi->s_proc); 1075 remove_proc_entry("segment_info", sbi->s_proc);
1043 remove_proc_entry(sb->s_id, f2fs_proc_root); 1076 remove_proc_entry(sb->s_id, f2fs_proc_root);
1044 } 1077 }
1045 f2fs_destroy_stats(sbi); 1078 f2fs_destroy_stats(sbi);
1046free_gc:
1047 stop_gc_thread(sbi);
1048free_root_inode: 1079free_root_inode:
1049 dput(sb->s_root); 1080 dput(sb->s_root);
1050 sb->s_root = NULL; 1081 sb->s_root = NULL;
@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
1084static int __init init_inodecache(void) 1115static int __init init_inodecache(void)
1085{ 1116{
1086 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 1117 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
1087 sizeof(struct f2fs_inode_info), NULL); 1118 sizeof(struct f2fs_inode_info));
1088 if (!f2fs_inode_cachep) 1119 if (!f2fs_inode_cachep)
1089 return -ENOMEM; 1120 return -ENOMEM;
1090 return 0; 1121 return 0;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89d0422a91a8..503c2451131e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
275 275
276 inline_size = inline_xattr_size(inode); 276 inline_size = inline_xattr_size(inode);
277 277
278 txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); 278 txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
279 if (!txattr_addr) 279 if (!txattr_addr)
280 return NULL; 280 return NULL;
281 281
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
407 if (name == NULL) 407 if (name == NULL)
408 return -EINVAL; 408 return -EINVAL;
409 name_len = strlen(name); 409 name_len = strlen(name);
410 if (name_len > F2FS_NAME_LEN)
411 return -ERANGE;
410 412
411 base_addr = read_all_xattrs(inode, NULL); 413 base_addr = read_all_xattrs(inode, NULL);
412 if (!base_addr) 414 if (!base_addr)
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
590 f2fs_balance_fs(sbi); 592 f2fs_balance_fs(sbi);
591 593
592 f2fs_lock_op(sbi); 594 f2fs_lock_op(sbi);
595 /* protect xattr_ver */
596 down_write(&F2FS_I(inode)->i_sem);
593 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); 597 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
598 up_write(&F2FS_I(inode)->i_sem);
594 f2fs_unlock_op(sbi); 599 f2fs_unlock_op(sbi);
595 600
596 return err; 601 return err;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index da74d878dc4f..df53e1753a76 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -183,7 +183,7 @@ struct f2fs_inode {
183 __le32 i_pino; /* parent inode number */ 183 __le32 i_pino; /* parent inode number */
184 __le32 i_namelen; /* file name length */ 184 __le32 i_namelen; /* file name length */
185 __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ 185 __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */
186 __u8 i_reserved2; /* for backward compatibility */ 186 __u8 i_dir_level; /* dentry_level for large dir */
187 187
188 struct f2fs_extent i_ext; /* caching a largest extent */ 188 struct f2fs_extent i_ext; /* caching a largest extent */
189 189