aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/acl.c8
-rw-r--r--fs/f2fs/checkpoint.c208
-rw-r--r--fs/f2fs/data.c106
-rw-r--r--fs/f2fs/debug.c12
-rw-r--r--fs/f2fs/dir.c85
-rw-r--r--fs/f2fs/f2fs.h105
-rw-r--r--fs/f2fs/file.c31
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c27
-rw-r--r--fs/f2fs/namei.c9
-rw-r--r--fs/f2fs/node.c334
-rw-r--r--fs/f2fs/node.h25
-rw-r--r--fs/f2fs/recovery.c37
-rw-r--r--fs/f2fs/segment.c222
-rw-r--r--fs/f2fs/segment.h75
-rw-r--r--fs/f2fs/super.c97
-rw-r--r--fs/f2fs/xattr.c7
18 files changed, 902 insertions, 506 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4cb8c4b..e93e4ec7d165 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
174 174
175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0); 175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
176 if (retval > 0) { 176 if (retval > 0) {
177 value = kmalloc(retval, GFP_KERNEL); 177 value = kmalloc(retval, GFP_F2FS_ZERO);
178 if (!value) 178 if (!value)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 retval = f2fs_getxattr(inode, name_index, "", value, retval); 180 retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
206 switch (type) { 212 switch (type) {
207 case ACL_TYPE_ACCESS: 213 case ACL_TYPE_ACCESS:
208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 293d0486a40f..4aa521aa9bc3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
33 struct address_space *mapping = META_MAPPING(sbi); 33 struct address_space *mapping = META_MAPPING(sbi);
34 struct page *page = NULL; 34 struct page *page = NULL;
35repeat: 35repeat:
36 page = grab_cache_page(mapping, index); 36 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
37 if (!page) { 37 if (!page) {
38 cond_resched(); 38 cond_resched();
39 goto repeat; 39 goto repeat;
40 } 40 }
41 41
42 /* We wait writeback only inside grab_meta_page() */
43 wait_on_page_writeback(page);
44 SetPageUptodate(page); 42 SetPageUptodate(page);
45 return page; 43 return page;
46} 44}
@@ -75,23 +73,102 @@ out:
75 return page; 73 return page;
76} 74}
77 75
76inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
77{
78 switch (type) {
79 case META_NAT:
80 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
81 case META_SIT:
82 return SIT_BLK_CNT(sbi);
83 case META_SSA:
84 case META_CP:
85 return 0;
86 default:
87 BUG();
88 }
89}
90
91/*
92 * Readahead CP/NAT/SIT/SSA pages
93 */
94int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
95{
96 block_t prev_blk_addr = 0;
97 struct page *page;
98 int blkno = start;
99 int max_blks = get_max_meta_blks(sbi, type);
100
101 struct f2fs_io_info fio = {
102 .type = META,
103 .rw = READ_SYNC | REQ_META | REQ_PRIO
104 };
105
106 for (; nrpages-- > 0; blkno++) {
107 block_t blk_addr;
108
109 switch (type) {
110 case META_NAT:
111 /* get nat block addr */
112 if (unlikely(blkno >= max_blks))
113 blkno = 0;
114 blk_addr = current_nat_addr(sbi,
115 blkno * NAT_ENTRY_PER_BLOCK);
116 break;
117 case META_SIT:
118 /* get sit block addr */
119 if (unlikely(blkno >= max_blks))
120 goto out;
121 blk_addr = current_sit_addr(sbi,
122 blkno * SIT_ENTRY_PER_BLOCK);
123 if (blkno != start && prev_blk_addr + 1 != blk_addr)
124 goto out;
125 prev_blk_addr = blk_addr;
126 break;
127 case META_SSA:
128 case META_CP:
129 /* get ssa/cp block addr */
130 blk_addr = blkno;
131 break;
132 default:
133 BUG();
134 }
135
136 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
137 if (!page)
138 continue;
139 if (PageUptodate(page)) {
140 mark_page_accessed(page);
141 f2fs_put_page(page, 1);
142 continue;
143 }
144
145 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
146 mark_page_accessed(page);
147 f2fs_put_page(page, 0);
148 }
149out:
150 f2fs_submit_merged_bio(sbi, META, READ);
151 return blkno - start;
152}
153
78static int f2fs_write_meta_page(struct page *page, 154static int f2fs_write_meta_page(struct page *page,
79 struct writeback_control *wbc) 155 struct writeback_control *wbc)
80{ 156{
81 struct inode *inode = page->mapping->host; 157 struct inode *inode = page->mapping->host;
82 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 158 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
83 159
84 /* Should not write any meta pages, if any IO error was occurred */ 160 if (unlikely(sbi->por_doing))
85 if (unlikely(sbi->por_doing ||
86 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
87 goto redirty_out; 161 goto redirty_out;
88
89 if (wbc->for_reclaim) 162 if (wbc->for_reclaim)
90 goto redirty_out; 163 goto redirty_out;
91 164
92 wait_on_page_writeback(page); 165 /* Should not write any meta pages, if any IO error was occurred */
166 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
167 goto no_write;
93 168
169 f2fs_wait_on_page_writeback(page, META);
94 write_meta_page(sbi, page); 170 write_meta_page(sbi, page);
171no_write:
95 dec_page_count(sbi, F2FS_DIRTY_META); 172 dec_page_count(sbi, F2FS_DIRTY_META);
96 unlock_page(page); 173 unlock_page(page);
97 return 0; 174 return 0;
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
99redirty_out: 176redirty_out:
100 dec_page_count(sbi, F2FS_DIRTY_META); 177 dec_page_count(sbi, F2FS_DIRTY_META);
101 wbc->pages_skipped++; 178 wbc->pages_skipped++;
179 account_page_redirty(page);
102 set_page_dirty(page); 180 set_page_dirty(page);
103 return AOP_WRITEPAGE_ACTIVATE; 181 return AOP_WRITEPAGE_ACTIVATE;
104} 182}
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
107 struct writeback_control *wbc) 185 struct writeback_control *wbc)
108{ 186{
109 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 187 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
110 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 188 long diff, written;
111 long written;
112
113 if (wbc->for_kupdate)
114 return 0;
115 189
116 /* collect a number of dirty meta pages and write together */ 190 /* collect a number of dirty meta pages and write together */
117 if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) 191 if (wbc->for_kupdate ||
118 return 0; 192 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
193 goto skip_write;
119 194
120 /* if mounting is failed, skip writing node pages */ 195 /* if mounting is failed, skip writing node pages */
121 mutex_lock(&sbi->cp_mutex); 196 mutex_lock(&sbi->cp_mutex);
122 written = sync_meta_pages(sbi, META, nrpages); 197 diff = nr_pages_to_write(sbi, META, wbc);
198 written = sync_meta_pages(sbi, META, wbc->nr_to_write);
123 mutex_unlock(&sbi->cp_mutex); 199 mutex_unlock(&sbi->cp_mutex);
124 wbc->nr_to_write -= written; 200 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
201 return 0;
202
203skip_write:
204 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
125 return 0; 205 return 0;
126} 206}
127 207
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
148 228
149 for (i = 0; i < nr_pages; i++) { 229 for (i = 0; i < nr_pages; i++) {
150 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
231
151 lock_page(page); 232 lock_page(page);
152 f2fs_bug_on(page->mapping != mapping); 233
153 f2fs_bug_on(!PageDirty(page)); 234 if (unlikely(page->mapping != mapping)) {
154 clear_page_dirty_for_io(page); 235continue_unlock:
236 unlock_page(page);
237 continue;
238 }
239 if (!PageDirty(page)) {
240 /* someone wrote it for us */
241 goto continue_unlock;
242 }
243
244 if (!clear_page_dirty_for_io(page))
245 goto continue_unlock;
246
155 if (f2fs_write_meta_page(page, &wbc)) { 247 if (f2fs_write_meta_page(page, &wbc)) {
156 unlock_page(page); 248 unlock_page(page);
157 break; 249 break;
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
216 308
217void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 309void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
218{ 310{
219 struct list_head *head, *this; 311 struct list_head *head;
220 struct orphan_inode_entry *new = NULL, *orphan = NULL; 312 struct orphan_inode_entry *new, *orphan;
221 313
222 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 314 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
223 new->ino = ino; 315 new->ino = ino;
224 316
225 spin_lock(&sbi->orphan_inode_lock); 317 spin_lock(&sbi->orphan_inode_lock);
226 head = &sbi->orphan_inode_list; 318 head = &sbi->orphan_inode_list;
227 list_for_each(this, head) { 319 list_for_each_entry(orphan, head, list) {
228 orphan = list_entry(this, struct orphan_inode_entry, list);
229 if (orphan->ino == ino) { 320 if (orphan->ino == ino) {
230 spin_unlock(&sbi->orphan_inode_lock); 321 spin_unlock(&sbi->orphan_inode_lock);
231 kmem_cache_free(orphan_entry_slab, new); 322 kmem_cache_free(orphan_entry_slab, new);
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
234 325
235 if (orphan->ino > ino) 326 if (orphan->ino > ino)
236 break; 327 break;
237 orphan = NULL;
238 } 328 }
239 329
240 /* add new_oentry into list which is sorted by inode number */ 330 /* add new orphan entry into list which is sorted by inode number */
241 if (orphan) 331 list_add_tail(&new->list, &orphan->list);
242 list_add(&new->list, this->prev);
243 else
244 list_add_tail(&new->list, head);
245 spin_unlock(&sbi->orphan_inode_lock); 332 spin_unlock(&sbi->orphan_inode_lock);
246} 333}
247 334
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
255 list_for_each_entry(orphan, head, list) { 342 list_for_each_entry(orphan, head, list) {
256 if (orphan->ino == ino) { 343 if (orphan->ino == ino) {
257 list_del(&orphan->list); 344 list_del(&orphan->list);
258 kmem_cache_free(orphan_entry_slab, orphan);
259 f2fs_bug_on(sbi->n_orphans == 0); 345 f2fs_bug_on(sbi->n_orphans == 0);
260 sbi->n_orphans--; 346 sbi->n_orphans--;
261 break; 347 spin_unlock(&sbi->orphan_inode_lock);
348 kmem_cache_free(orphan_entry_slab, orphan);
349 return;
262 } 350 }
263 } 351 }
264 spin_unlock(&sbi->orphan_inode_lock); 352 spin_unlock(&sbi->orphan_inode_lock);
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
285 start_blk = __start_cp_addr(sbi) + 1; 373 start_blk = __start_cp_addr(sbi) + 1;
286 orphan_blkaddr = __start_sum_addr(sbi) - 1; 374 orphan_blkaddr = __start_sum_addr(sbi) - 1;
287 375
376 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
377
288 for (i = 0; i < orphan_blkaddr; i++) { 378 for (i = 0; i < orphan_blkaddr; i++) {
289 struct page *page = get_meta_page(sbi, start_blk + i); 379 struct page *page = get_meta_page(sbi, start_blk + i);
290 struct f2fs_orphan_block *orphan_blk; 380 struct f2fs_orphan_block *orphan_blk;
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
466{ 556{
467 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
468 struct list_head *head = &sbi->dir_inode_list; 558 struct list_head *head = &sbi->dir_inode_list;
469 struct list_head *this; 559 struct dir_inode_entry *entry;
470 560
471 list_for_each(this, head) { 561 list_for_each_entry(entry, head, list)
472 struct dir_inode_entry *entry;
473 entry = list_entry(this, struct dir_inode_entry, list);
474 if (unlikely(entry->inode == inode)) 562 if (unlikely(entry->inode == inode))
475 return -EEXIST; 563 return -EEXIST;
476 } 564
477 list_add_tail(&new->list, head); 565 list_add_tail(&new->list, head);
478 stat_inc_dirty_dir(sbi); 566 stat_inc_dirty_dir(sbi);
479 return 0; 567 return 0;
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
483{ 571{
484 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 572 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
485 struct dir_inode_entry *new; 573 struct dir_inode_entry *new;
574 int ret = 0;
486 575
487 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
488 return; 577 return;
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
492 INIT_LIST_HEAD(&new->list); 581 INIT_LIST_HEAD(&new->list);
493 582
494 spin_lock(&sbi->dir_inode_lock); 583 spin_lock(&sbi->dir_inode_lock);
495 if (__add_dirty_inode(inode, new)) 584 ret = __add_dirty_inode(inode, new);
496 kmem_cache_free(inode_entry_slab, new);
497
498 inc_page_count(sbi, F2FS_DIRTY_DENTS);
499 inode_inc_dirty_dents(inode); 585 inode_inc_dirty_dents(inode);
500 SetPagePrivate(page); 586 SetPagePrivate(page);
501 spin_unlock(&sbi->dir_inode_lock); 587 spin_unlock(&sbi->dir_inode_lock);
588
589 if (ret)
590 kmem_cache_free(inode_entry_slab, new);
502} 591}
503 592
504void add_dirty_dir_inode(struct inode *inode) 593void add_dirty_dir_inode(struct inode *inode)
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
506 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 595 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
507 struct dir_inode_entry *new = 596 struct dir_inode_entry *new =
508 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 597 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
598 int ret = 0;
509 599
510 new->inode = inode; 600 new->inode = inode;
511 INIT_LIST_HEAD(&new->list); 601 INIT_LIST_HEAD(&new->list);
512 602
513 spin_lock(&sbi->dir_inode_lock); 603 spin_lock(&sbi->dir_inode_lock);
514 if (__add_dirty_inode(inode, new)) 604 ret = __add_dirty_inode(inode, new);
515 kmem_cache_free(inode_entry_slab, new);
516 spin_unlock(&sbi->dir_inode_lock); 605 spin_unlock(&sbi->dir_inode_lock);
606
607 if (ret)
608 kmem_cache_free(inode_entry_slab, new);
517} 609}
518 610
519void remove_dirty_dir_inode(struct inode *inode) 611void remove_dirty_dir_inode(struct inode *inode)
520{ 612{
521 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 613 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
522 614 struct list_head *head;
523 struct list_head *this, *head; 615 struct dir_inode_entry *entry;
524 616
525 if (!S_ISDIR(inode->i_mode)) 617 if (!S_ISDIR(inode->i_mode))
526 return; 618 return;
527 619
528 spin_lock(&sbi->dir_inode_lock); 620 spin_lock(&sbi->dir_inode_lock);
529 if (atomic_read(&F2FS_I(inode)->dirty_dents)) { 621 if (get_dirty_dents(inode)) {
530 spin_unlock(&sbi->dir_inode_lock); 622 spin_unlock(&sbi->dir_inode_lock);
531 return; 623 return;
532 } 624 }
533 625
534 head = &sbi->dir_inode_list; 626 head = &sbi->dir_inode_list;
535 list_for_each(this, head) { 627 list_for_each_entry(entry, head, list) {
536 struct dir_inode_entry *entry;
537 entry = list_entry(this, struct dir_inode_entry, list);
538 if (entry->inode == inode) { 628 if (entry->inode == inode) {
539 list_del(&entry->list); 629 list_del(&entry->list);
540 kmem_cache_free(inode_entry_slab, entry);
541 stat_dec_dirty_dir(sbi); 630 stat_dec_dirty_dir(sbi);
542 break; 631 spin_unlock(&sbi->dir_inode_lock);
632 kmem_cache_free(inode_entry_slab, entry);
633 goto done;
543 } 634 }
544 } 635 }
545 spin_unlock(&sbi->dir_inode_lock); 636 spin_unlock(&sbi->dir_inode_lock);
546 637
638done:
547 /* Only from the recovery routine */ 639 /* Only from the recovery routine */
548 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 640 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
549 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 641 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
554struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) 646struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
555{ 647{
556 648
557 struct list_head *this, *head; 649 struct list_head *head;
558 struct inode *inode = NULL; 650 struct inode *inode = NULL;
651 struct dir_inode_entry *entry;
559 652
560 spin_lock(&sbi->dir_inode_lock); 653 spin_lock(&sbi->dir_inode_lock);
561 654
562 head = &sbi->dir_inode_list; 655 head = &sbi->dir_inode_list;
563 list_for_each(this, head) { 656 list_for_each_entry(entry, head, list) {
564 struct dir_inode_entry *entry;
565 entry = list_entry(this, struct dir_inode_entry, list);
566 if (entry->inode->i_ino == ino) { 657 if (entry->inode->i_ino == ino) {
567 inode = entry->inode; 658 inode = entry->inode;
568 break; 659 break;
@@ -589,7 +680,7 @@ retry:
589 inode = igrab(entry->inode); 680 inode = igrab(entry->inode);
590 spin_unlock(&sbi->dir_inode_lock); 681 spin_unlock(&sbi->dir_inode_lock);
591 if (inode) { 682 if (inode) {
592 filemap_flush(inode->i_mapping); 683 filemap_fdatawrite(inode->i_mapping);
593 iput(inode); 684 iput(inode);
594 } else { 685 } else {
595 /* 686 /*
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
824 unblock_operations(sbi); 915 unblock_operations(sbi);
825 mutex_unlock(&sbi->cp_mutex); 916 mutex_unlock(&sbi->cp_mutex);
826 917
918 stat_inc_cp_count(sbi->stat_info);
827 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 919 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
828} 920}
829 921
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
845int __init create_checkpoint_caches(void) 937int __init create_checkpoint_caches(void)
846{ 938{
847 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 939 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
848 sizeof(struct orphan_inode_entry), NULL); 940 sizeof(struct orphan_inode_entry));
849 if (!orphan_entry_slab) 941 if (!orphan_entry_slab)
850 return -ENOMEM; 942 return -ENOMEM;
851 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 943 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
852 sizeof(struct dir_inode_entry), NULL); 944 sizeof(struct dir_inode_entry));
853 if (!inode_entry_slab) { 945 if (!inode_entry_slab) {
854 kmem_cache_destroy(orphan_entry_slab); 946 kmem_cache_destroy(orphan_entry_slab);
855 return -ENOMEM; 947 return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2261ccdd0b5f..45abd60e2bff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
45 45
46static void f2fs_write_end_io(struct bio *bio, int err) 46static void f2fs_write_end_io(struct bio *bio, int err)
47{ 47{
48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb); 48 struct f2fs_sb_info *sbi = bio->bi_private;
49 struct bio_vec *bvec; 49 struct bio_vec *bvec;
50 int i; 50 int i;
51 51
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
55 if (unlikely(err)) { 55 if (unlikely(err)) {
56 SetPageError(page); 56 SetPageError(page);
57 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
58 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 58 f2fs_stop_checkpoint(sbi);
59 sbi->sb->s_flags |= MS_RDONLY;
60 } 59 }
61 end_page_writeback(page); 60 end_page_writeback(page);
62 dec_page_count(sbi, F2FS_WRITEBACK); 61 dec_page_count(sbi, F2FS_WRITEBACK);
63 } 62 }
64 63
65 if (bio->bi_private) 64 if (sbi->wait_io) {
66 complete(bio->bi_private); 65 complete(sbi->wait_io);
66 sbi->wait_io = NULL;
67 }
67 68
68 if (!get_pages(sbi, F2FS_WRITEBACK) && 69 if (!get_pages(sbi, F2FS_WRITEBACK) &&
69 !list_empty(&sbi->cp_wait.task_list)) 70 !list_empty(&sbi->cp_wait.task_list))
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
86 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_bdev = sbi->sb->s_bdev;
87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 bio->bi_private = sbi;
89 91
90 return bio; 92 return bio;
91} 93}
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
113 */ 115 */
114 if (fio->type == META_FLUSH) { 116 if (fio->type == META_FLUSH) {
115 DECLARE_COMPLETION_ONSTACK(wait); 117 DECLARE_COMPLETION_ONSTACK(wait);
116 io->bio->bi_private = &wait; 118 io->sbi->wait_io = &wait;
117 submit_bio(rw, io->bio); 119 submit_bio(rw, io->bio);
118 wait_for_completion(&wait); 120 wait_for_completion(&wait);
119 } else { 121 } else {
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
132 134
133 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 135 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
134 136
135 mutex_lock(&io->io_mutex); 137 down_write(&io->io_rwsem);
136 138
137 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
138 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
140 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
141 } 143 }
142 __submit_merged_bio(io); 144 __submit_merged_bio(io);
143 mutex_unlock(&io->io_mutex); 145 up_write(&io->io_rwsem);
144} 146}
145 147
146/* 148/*
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
178 180
179 verify_block_addr(sbi, blk_addr); 181 verify_block_addr(sbi, blk_addr);
180 182
181 mutex_lock(&io->io_mutex); 183 down_write(&io->io_rwsem);
182 184
183 if (!is_read) 185 if (!is_read)
184 inc_page_count(sbi, F2FS_WRITEBACK); 186 inc_page_count(sbi, F2FS_WRITEBACK);
@@ -202,7 +204,7 @@ alloc_new:
202 204
203 io->last_block_in_bio = blk_addr; 205 io->last_block_in_bio = blk_addr;
204 206
205 mutex_unlock(&io->io_mutex); 207 up_write(&io->io_rwsem);
206 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 208 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
207} 209}
208 210
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
797 */ 799 */
798 offset = i_size & (PAGE_CACHE_SIZE - 1); 800 offset = i_size & (PAGE_CACHE_SIZE - 1);
799 if ((page->index >= end_index + 1) || !offset) { 801 if ((page->index >= end_index + 1) || !offset) {
800 if (S_ISDIR(inode->i_mode)) { 802 inode_dec_dirty_dents(inode);
801 dec_page_count(sbi, F2FS_DIRTY_DENTS);
802 inode_dec_dirty_dents(inode);
803 }
804 goto out; 803 goto out;
805 } 804 }
806 805
807 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 806 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
808write: 807write:
809 if (unlikely(sbi->por_doing)) { 808 if (unlikely(sbi->por_doing))
810 err = AOP_WRITEPAGE_ACTIVATE;
811 goto redirty_out; 809 goto redirty_out;
812 }
813 810
814 /* Dentry blocks are controlled by checkpoint */ 811 /* Dentry blocks are controlled by checkpoint */
815 if (S_ISDIR(inode->i_mode)) { 812 if (S_ISDIR(inode->i_mode)) {
816 dec_page_count(sbi, F2FS_DIRTY_DENTS);
817 inode_dec_dirty_dents(inode); 813 inode_dec_dirty_dents(inode);
818 err = do_write_data_page(page, &fio); 814 err = do_write_data_page(page, &fio);
819 } else { 815 goto done;
820 f2fs_lock_op(sbi); 816 }
821
822 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
823 err = f2fs_write_inline_data(inode, page, offset);
824 f2fs_unlock_op(sbi);
825 goto out;
826 } else {
827 err = do_write_data_page(page, &fio);
828 }
829 817
830 f2fs_unlock_op(sbi); 818 if (!wbc->for_reclaim)
831 need_balance_fs = true; 819 need_balance_fs = true;
832 } 820 else if (has_not_enough_free_secs(sbi, 0))
833 if (err == -ENOENT)
834 goto out;
835 else if (err)
836 goto redirty_out; 821 goto redirty_out;
837 822
838 if (wbc->for_reclaim) { 823 f2fs_lock_op(sbi);
839 f2fs_submit_merged_bio(sbi, DATA, WRITE); 824 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
840 need_balance_fs = false; 825 err = f2fs_write_inline_data(inode, page, offset);
841 } 826 else
827 err = do_write_data_page(page, &fio);
828 f2fs_unlock_op(sbi);
829done:
830 if (err && err != -ENOENT)
831 goto redirty_out;
842 832
843 clear_cold_data(page); 833 clear_cold_data(page);
844out: 834out:
@@ -849,12 +839,11 @@ out:
849 839
850redirty_out: 840redirty_out:
851 wbc->pages_skipped++; 841 wbc->pages_skipped++;
842 account_page_redirty(page);
852 set_page_dirty(page); 843 set_page_dirty(page);
853 return err; 844 return AOP_WRITEPAGE_ACTIVATE;
854} 845}
855 846
856#define MAX_DESIRED_PAGES_WP 4096
857
858static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, 847static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
859 void *data) 848 void *data)
860{ 849{
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
871 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 860 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
872 bool locked = false; 861 bool locked = false;
873 int ret; 862 int ret;
874 long excess_nrtw = 0, desired_nrtw; 863 long diff;
875 864
876 /* deal with chardevs and other special file */ 865 /* deal with chardevs and other special file */
877 if (!mapping->a_ops->writepage) 866 if (!mapping->a_ops->writepage)
878 return 0; 867 return 0;
879 868
880 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 869 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
881 desired_nrtw = MAX_DESIRED_PAGES_WP; 870 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
882 excess_nrtw = desired_nrtw - wbc->nr_to_write; 871 goto skip_write;
883 wbc->nr_to_write = desired_nrtw; 872
884 } 873 diff = nr_pages_to_write(sbi, DATA, wbc);
885 874
886 if (!S_ISDIR(inode->i_mode)) { 875 if (!S_ISDIR(inode->i_mode)) {
887 mutex_lock(&sbi->writepages); 876 mutex_lock(&sbi->writepages);
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
895 884
896 remove_dirty_dir_inode(inode); 885 remove_dirty_dir_inode(inode);
897 886
898 wbc->nr_to_write -= excess_nrtw; 887 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
899 return ret; 888 return ret;
889
890skip_write:
891 wbc->pages_skipped += get_dirty_dents(inode);
892 return 0;
900} 893}
901 894
902static int f2fs_write_begin(struct file *file, struct address_space *mapping, 895static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -949,13 +942,19 @@ inline_data:
949 if (dn.data_blkaddr == NEW_ADDR) { 942 if (dn.data_blkaddr == NEW_ADDR) {
950 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 943 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
951 } else { 944 } else {
952 if (f2fs_has_inline_data(inode)) 945 if (f2fs_has_inline_data(inode)) {
953 err = f2fs_read_inline_data(inode, page); 946 err = f2fs_read_inline_data(inode, page);
954 else 947 if (err) {
948 page_cache_release(page);
949 return err;
950 }
951 } else {
955 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 952 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
956 READ_SYNC); 953 READ_SYNC);
957 if (err) 954 if (err)
958 return err; 955 return err;
956 }
957
959 lock_page(page); 958 lock_page(page);
960 if (unlikely(!PageUptodate(page))) { 959 if (unlikely(!PageUptodate(page))) {
961 f2fs_put_page(page, 1); 960 f2fs_put_page(page, 1);
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1031 unsigned int length) 1030 unsigned int length)
1032{ 1031{
1033 struct inode *inode = page->mapping->host; 1032 struct inode *inode = page->mapping->host;
1034 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1033 if (PageDirty(page))
1035 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
1036 dec_page_count(sbi, F2FS_DIRTY_DENTS);
1037 inode_dec_dirty_dents(inode); 1034 inode_dec_dirty_dents(inode);
1038 }
1039 ClearPagePrivate(page); 1035 ClearPagePrivate(page);
1040} 1036}
1041 1037
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3de9d20d0c14..b52c12cf5873 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
86{ 86{
87 struct f2fs_stat_info *si = F2FS_STAT(sbi); 87 struct f2fs_stat_info *si = F2FS_STAT(sbi);
88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; 88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
89 struct sit_info *sit_i = SIT_I(sbi);
90 unsigned int segno, vblocks; 89 unsigned int segno, vblocks;
91 int ndirty = 0; 90 int ndirty = 0;
92 91
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
94 total_vblocks = 0; 93 total_vblocks = 0;
95 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
96 hblks_per_sec = blks_per_sec / 2; 95 hblks_per_sec = blks_per_sec / 2;
97 mutex_lock(&sit_i->sentry_lock);
98 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
99 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
100 dist = abs(vblocks - hblks_per_sec); 98 dist = abs(vblocks - hblks_per_sec);
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
105 ndirty++; 103 ndirty++;
106 } 104 }
107 } 105 }
108 mutex_unlock(&sit_i->sentry_lock);
109 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
110 si->bimodal = bimodal / dist; 107 si->bimodal = bimodal / dist;
111 if (si->dirty_count) 108 if (si->dirty_count)
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
236 si->dirty_count); 233 si->dirty_count);
237 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", 234 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
238 si->prefree_count, si->free_segs, si->free_secs); 235 si->prefree_count, si->free_segs, si->free_secs);
236 seq_printf(s, "CP calls: %d\n", si->cp_count);
239 seq_printf(s, "GC calls: %d (BG: %d)\n", 237 seq_printf(s, "GC calls: %d (BG: %d)\n",
240 si->call_count, si->bg_gc); 238 si->call_count, si->bg_gc);
241 seq_printf(s, " - data segments : %d\n", si->data_segs); 239 seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
252 si->ndirty_dent, si->ndirty_dirs); 250 si->ndirty_dent, si->ndirty_dirs);
253 seq_printf(s, " - meta: %4d in %4d\n", 251 seq_printf(s, " - meta: %4d in %4d\n",
254 si->ndirty_meta, si->meta_pages); 252 si->ndirty_meta, si->meta_pages);
255 seq_printf(s, " - NATs: %5d > %lu\n", 253 seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
256 si->nats, NM_WOUT_THRESHOLD); 254 si->nats, si->sits);
257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", 255 seq_printf(s, " - free_nids: %9d\n",
258 si->sits, si->fnids); 256 si->fnids);
259 seq_puts(s, "\nDistribution of User Blocks:"); 257 seq_puts(s, "\nDistribution of User Blocks:");
260 seq_puts(s, " [ valid | invalid | free ]\n"); 258 seq_puts(s, " [ valid | invalid | free ]\n");
261 seq_puts(s, " ["); 259 seq_puts(s, " [");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2b7c255bcbdf..972fd0ef230f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
21 >> PAGE_CACHE_SHIFT; 21 >> PAGE_CACHE_SHIFT;
22} 22}
23 23
24static unsigned int dir_buckets(unsigned int level) 24static unsigned int dir_buckets(unsigned int level, int dir_level)
25{ 25{
26 if (level < MAX_DIR_HASH_DEPTH / 2) 26 if (level < MAX_DIR_HASH_DEPTH / 2)
27 return 1 << level; 27 return 1 << (level + dir_level);
28 else 28 else
29 return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); 29 return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
30} 30}
31 31
32static unsigned int bucket_blocks(unsigned int level) 32static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 66}
67 67
68static unsigned long dir_block_index(unsigned int level, unsigned int idx) 68static unsigned long dir_block_index(unsigned int level,
69 int dir_level, unsigned int idx)
69{ 70{
70 unsigned long i; 71 unsigned long i;
71 unsigned long bidx = 0; 72 unsigned long bidx = 0;
72 73
73 for (i = 0; i < level; i++) 74 for (i = 0; i < level; i++)
74 bidx += dir_buckets(i) * bucket_blocks(i); 75 bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
75 bidx += idx * bucket_blocks(level); 76 bidx += idx * bucket_blocks(level);
76 return bidx; 77 return bidx;
77} 78}
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
94{ 95{
95 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
96 unsigned long bit_pos, end_pos, next_pos; 97 unsigned long bit_pos = 0;
97 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); 98 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
98 int slots; 99 const void *dentry_bits = &dentry_blk->dentry_bitmap;
100 int max_len = 0;
99 101
100 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
101 NR_DENTRY_IN_BLOCK, 0);
102 while (bit_pos < NR_DENTRY_IN_BLOCK) { 102 while (bit_pos < NR_DENTRY_IN_BLOCK) {
103 if (!test_bit_le(bit_pos, dentry_bits)) {
104 if (bit_pos == 0)
105 max_len = 1;
106 else if (!test_bit_le(bit_pos - 1, dentry_bits))
107 max_len++;
108 bit_pos++;
109 continue;
110 }
103 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
104 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
105
106 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name, namelen, namehash, de)) {
107 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
108 name, namelen)) { 114 name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
110 goto found; 116 goto found;
111 } 117 }
112 } 118 }
113 next_pos = bit_pos + slots; 119 if (max_len > *max_slots) {
114 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 120 *max_slots = max_len;
115 NR_DENTRY_IN_BLOCK, next_pos); 121 max_len = 0;
116 if (bit_pos >= NR_DENTRY_IN_BLOCK) 122 }
117 end_pos = NR_DENTRY_IN_BLOCK; 123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
118 else
119 end_pos = bit_pos;
120 if (*max_slots < end_pos - next_pos)
121 *max_slots = end_pos - next_pos;
122 } 124 }
123 125
124 de = NULL; 126 de = NULL;
125 kunmap(dentry_page); 127 kunmap(dentry_page);
126found: 128found:
129 if (max_len > *max_slots)
130 *max_slots = max_len;
127 return de; 131 return de;
128} 132}
129 133
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
141 145
142 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 146 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
143 147
144 nbucket = dir_buckets(level); 148 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
145 nblock = bucket_blocks(level); 149 nblock = bucket_blocks(level);
146 150
147 bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); 151 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
152 le32_to_cpu(namehash) % nbucket);
148 end_block = bidx + nblock; 153 end_block = bidx + nblock;
149 154
150 for (; bidx < end_block; bidx++) { 155 for (; bidx < end_block; bidx++) {
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
248 struct page *page, struct inode *inode) 253 struct page *page, struct inode *inode)
249{ 254{
250 lock_page(page); 255 lock_page(page);
251 wait_on_page_writeback(page); 256 f2fs_wait_on_page_writeback(page, DATA);
252 de->ino = cpu_to_le32(inode->i_ino); 257 de->ino = cpu_to_le32(inode->i_ino);
253 set_de_type(de, inode); 258 set_de_type(de, inode);
254 kunmap(page); 259 kunmap(page);
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
347 err = f2fs_init_security(inode, dir, name, page); 352 err = f2fs_init_security(inode, dir, name, page);
348 if (err) 353 if (err)
349 goto put_error; 354 goto put_error;
350
351 wait_on_page_writeback(page);
352 } else { 355 } else {
353 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 356 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
354 if (IS_ERR(page)) 357 if (IS_ERR(page))
355 return page; 358 return page;
356 359
357 wait_on_page_writeback(page);
358 set_cold_node(inode, page); 360 set_cold_node(inode, page);
359 } 361 }
360 362
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
372 374
373put_error: 375put_error:
374 f2fs_put_page(page, 1); 376 f2fs_put_page(page, 1);
377 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
378 truncate_inode_pages(&inode->i_data, 0);
379 truncate_blocks(inode, 0);
380 remove_dirty_dir_inode(inode);
375error: 381error:
376 remove_inode_page(inode); 382 remove_inode_page(inode);
377 return ERR_PTR(err); 383 return ERR_PTR(err);
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
395 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 401 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
396 } 402 }
397 403
398 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
399 update_inode_page(dir);
400
401 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 404 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
402 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 405 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
403} 406}
@@ -464,10 +467,11 @@ start:
464 if (level == current_depth) 467 if (level == current_depth)
465 ++current_depth; 468 ++current_depth;
466 469
467 nbucket = dir_buckets(level); 470 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
468 nblock = bucket_blocks(level); 471 nblock = bucket_blocks(level);
469 472
470 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); 473 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
474 (le32_to_cpu(dentry_hash) % nbucket));
471 475
472 for (block = bidx; block <= (bidx + nblock - 1); block++) { 476 for (block = bidx; block <= (bidx + nblock - 1); block++) {
473 dentry_page = get_new_data_page(dir, NULL, block, true); 477 dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -487,8 +491,9 @@ start:
487 ++level; 491 ++level;
488 goto start; 492 goto start;
489add_dentry: 493add_dentry:
490 wait_on_page_writeback(dentry_page); 494 f2fs_wait_on_page_writeback(dentry_page, DATA);
491 495
496 down_write(&F2FS_I(inode)->i_sem);
492 page = init_inode_metadata(inode, dir, name); 497 page = init_inode_metadata(inode, dir, name);
493 if (IS_ERR(page)) { 498 if (IS_ERR(page)) {
494 err = PTR_ERR(page); 499 err = PTR_ERR(page);
@@ -511,7 +516,12 @@ add_dentry:
511 516
512 update_parent_metadata(dir, inode, current_depth); 517 update_parent_metadata(dir, inode, current_depth);
513fail: 518fail:
514 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 519 up_write(&F2FS_I(inode)->i_sem);
520
521 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
522 update_inode_page(dir);
523 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
524 }
515 kunmap(dentry_page); 525 kunmap(dentry_page);
516 f2fs_put_page(dentry_page, 1); 526 f2fs_put_page(dentry_page, 1);
517 return err; 527 return err;
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
528 unsigned int bit_pos; 538 unsigned int bit_pos;
529 struct address_space *mapping = page->mapping; 539 struct address_space *mapping = page->mapping;
530 struct inode *dir = mapping->host; 540 struct inode *dir = mapping->host;
531 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
532 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 541 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
533 void *kaddr = page_address(page); 542 void *kaddr = page_address(page);
534 int i; 543 int i;
535 544
536 lock_page(page); 545 lock_page(page);
537 wait_on_page_writeback(page); 546 f2fs_wait_on_page_writeback(page, DATA);
538 547
539 dentry_blk = (struct f2fs_dentry_block *)kaddr; 548 dentry_blk = (struct f2fs_dentry_block *)kaddr;
540 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 549 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
551 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 560 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
552 561
553 if (inode) { 562 if (inode) {
563 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
564
565 down_write(&F2FS_I(inode)->i_sem);
566
554 if (S_ISDIR(inode->i_mode)) { 567 if (S_ISDIR(inode->i_mode)) {
555 drop_nlink(dir); 568 drop_nlink(dir);
556 update_inode_page(dir); 569 update_inode_page(dir);
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
561 drop_nlink(inode); 574 drop_nlink(inode);
562 i_size_write(inode, 0); 575 i_size_write(inode, 0);
563 } 576 }
577 up_write(&F2FS_I(inode)->i_sem);
564 update_inode_page(inode); 578 update_inode_page(inode);
565 579
566 if (inode->i_nlink == 0) 580 if (inode->i_nlink == 0)
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
573 truncate_hole(dir, page->index, page->index + 1); 587 truncate_hole(dir, page->index, page->index + 1);
574 clear_page_dirty_for_io(page); 588 clear_page_dirty_for_io(page);
575 ClearPageUptodate(page); 589 ClearPageUptodate(page);
576 dec_page_count(sbi, F2FS_DIRTY_DENTS);
577 inode_dec_dirty_dents(dir); 590 inode_dec_dirty_dents(dir);
578 } 591 }
579 f2fs_put_page(page, 1); 592 f2fs_put_page(page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fc3c558cb4f3..2ecac8312359 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -40,6 +40,7 @@
40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
43 44
44#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
45#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -88,6 +89,16 @@ enum {
88 SIT_BITMAP 89 SIT_BITMAP
89}; 90};
90 91
92/*
93 * For CP/NAT/SIT/SSA readahead
94 */
95enum {
96 META_CP,
97 META_NAT,
98 META_SIT,
99 META_SSA
100};
101
91/* for the list of orphan inodes */ 102/* for the list of orphan inodes */
92struct orphan_inode_entry { 103struct orphan_inode_entry {
93 struct list_head list; /* list head */ 104 struct list_head list; /* list head */
@@ -187,16 +198,20 @@ struct extent_info {
187#define FADVISE_COLD_BIT 0x01 198#define FADVISE_COLD_BIT 0x01
188#define FADVISE_LOST_PINO_BIT 0x02 199#define FADVISE_LOST_PINO_BIT 0x02
189 200
201#define DEF_DIR_LEVEL 0
202
190struct f2fs_inode_info { 203struct f2fs_inode_info {
191 struct inode vfs_inode; /* serve a vfs inode */ 204 struct inode vfs_inode; /* serve a vfs inode */
192 unsigned long i_flags; /* keep an inode flags for ioctl */ 205 unsigned long i_flags; /* keep an inode flags for ioctl */
193 unsigned char i_advise; /* use to give file attribute hints */ 206 unsigned char i_advise; /* use to give file attribute hints */
207 unsigned char i_dir_level; /* use for dentry level for large dir */
194 unsigned int i_current_depth; /* use only in directory structure */ 208 unsigned int i_current_depth; /* use only in directory structure */
195 unsigned int i_pino; /* parent inode number */ 209 unsigned int i_pino; /* parent inode number */
196 umode_t i_acl_mode; /* keep file acl mode temporarily */ 210 umode_t i_acl_mode; /* keep file acl mode temporarily */
197 211
198 /* Use below internally in f2fs*/ 212 /* Use below internally in f2fs*/
199 unsigned long flags; /* use to pass per-file flags */ 213 unsigned long flags; /* use to pass per-file flags */
214 struct rw_semaphore i_sem; /* protect fi info */
200 atomic_t dirty_dents; /* # of dirty dentry pages */ 215 atomic_t dirty_dents; /* # of dirty dentry pages */
201 f2fs_hash_t chash; /* hash value of given file name */ 216 f2fs_hash_t chash; /* hash value of given file name */
202 unsigned int clevel; /* maximum level of given file name */ 217 unsigned int clevel; /* maximum level of given file name */
@@ -229,6 +244,7 @@ struct f2fs_nm_info {
229 block_t nat_blkaddr; /* base disk address of NAT */ 244 block_t nat_blkaddr; /* base disk address of NAT */
230 nid_t max_nid; /* maximum possible node ids */ 245 nid_t max_nid; /* maximum possible node ids */
231 nid_t next_scan_nid; /* the next nid to be scanned */ 246 nid_t next_scan_nid; /* the next nid to be scanned */
247 unsigned int ram_thresh; /* control the memory footprint */
232 248
233 /* NAT cache management */ 249 /* NAT cache management */
234 struct radix_tree_root nat_root;/* root of the nat entry cache */ 250 struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -238,6 +254,7 @@ struct f2fs_nm_info {
238 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 254 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
239 255
240 /* free node ids management */ 256 /* free node ids management */
257 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
241 struct list_head free_nid_list; /* a list for free nids */ 258 struct list_head free_nid_list; /* a list for free nids */
242 spinlock_t free_nid_list_lock; /* protect free nid list */ 259 spinlock_t free_nid_list_lock; /* protect free nid list */
243 unsigned int fcnt; /* the number of free node id */ 260 unsigned int fcnt; /* the number of free node id */
@@ -300,6 +317,12 @@ enum {
300 NO_CHECK_TYPE 317 NO_CHECK_TYPE
301}; 318};
302 319
320struct flush_cmd {
321 struct flush_cmd *next;
322 struct completion wait;
323 int ret;
324};
325
303struct f2fs_sm_info { 326struct f2fs_sm_info {
304 struct sit_info *sit_info; /* whole segment information */ 327 struct sit_info *sit_info; /* whole segment information */
305 struct free_segmap_info *free_info; /* free segment information */ 328 struct free_segmap_info *free_info; /* free segment information */
@@ -328,6 +351,14 @@ struct f2fs_sm_info {
328 351
329 unsigned int ipu_policy; /* in-place-update policy */ 352 unsigned int ipu_policy; /* in-place-update policy */
330 unsigned int min_ipu_util; /* in-place-update threshold */ 353 unsigned int min_ipu_util; /* in-place-update threshold */
354
355 /* for flush command control */
356 struct task_struct *f2fs_issue_flush; /* flush thread */
357 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
358 struct flush_cmd *issue_list; /* list for command issue */
359 struct flush_cmd *dispatch_list; /* list for command dispatch */
360 spinlock_t issue_lock; /* for issue list lock */
361 struct flush_cmd *issue_tail; /* list tail of issue list */
331}; 362};
332 363
333/* 364/*
@@ -378,7 +409,7 @@ struct f2fs_bio_info {
378 struct bio *bio; /* bios to merge */ 409 struct bio *bio; /* bios to merge */
379 sector_t last_block_in_bio; /* last block number */ 410 sector_t last_block_in_bio; /* last block number */
380 struct f2fs_io_info fio; /* store buffered io info. */ 411 struct f2fs_io_info fio; /* store buffered io info. */
381 struct mutex io_mutex; /* mutex for bio */ 412 struct rw_semaphore io_rwsem; /* blocking op for bio */
382}; 413};
383 414
384struct f2fs_sb_info { 415struct f2fs_sb_info {
@@ -398,6 +429,7 @@ struct f2fs_sb_info {
398 /* for bio operations */ 429 /* for bio operations */
399 struct f2fs_bio_info read_io; /* for read bios */ 430 struct f2fs_bio_info read_io; /* for read bios */
400 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 431 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
432 struct completion *wait_io; /* for completion bios */
401 433
402 /* for checkpoint */ 434 /* for checkpoint */
403 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 435 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -407,7 +439,6 @@ struct f2fs_sb_info {
407 struct mutex node_write; /* locking node writes */ 439 struct mutex node_write; /* locking node writes */
408 struct mutex writepages; /* mutex for writepages() */ 440 struct mutex writepages; /* mutex for writepages() */
409 bool por_doing; /* recovery is doing or not */ 441 bool por_doing; /* recovery is doing or not */
410 bool on_build_free_nids; /* build_free_nids is doing */
411 wait_queue_head_t cp_wait; 442 wait_queue_head_t cp_wait;
412 443
413 /* for orphan inode management */ 444 /* for orphan inode management */
@@ -436,6 +467,7 @@ struct f2fs_sb_info {
436 unsigned int total_valid_node_count; /* valid node block count */ 467 unsigned int total_valid_node_count; /* valid node block count */
437 unsigned int total_valid_inode_count; /* valid inode count */ 468 unsigned int total_valid_inode_count; /* valid inode count */
438 int active_logs; /* # of active logs */ 469 int active_logs; /* # of active logs */
470 int dir_level; /* directory level */
439 471
440 block_t user_block_count; /* # of user blocks */ 472 block_t user_block_count; /* # of user blocks */
441 block_t total_valid_block_count; /* # of valid blocks */ 473 block_t total_valid_block_count; /* # of valid blocks */
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
622 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; 654 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
623} 655}
624 656
657static inline bool f2fs_has_xattr_block(unsigned int ofs)
658{
659 return ofs == XATTR_NODE_OFFSET;
660}
661
625static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 662static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
626 struct inode *inode, blkcnt_t count) 663 struct inode *inode, blkcnt_t count)
627{ 664{
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
661 698
662static inline void inode_inc_dirty_dents(struct inode *inode) 699static inline void inode_inc_dirty_dents(struct inode *inode)
663{ 700{
701 inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
664 atomic_inc(&F2FS_I(inode)->dirty_dents); 702 atomic_inc(&F2FS_I(inode)->dirty_dents);
665} 703}
666 704
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
671 709
672static inline void inode_dec_dirty_dents(struct inode *inode) 710static inline void inode_dec_dirty_dents(struct inode *inode)
673{ 711{
712 if (!S_ISDIR(inode->i_mode))
713 return;
714
715 dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
674 atomic_dec(&F2FS_I(inode)->dirty_dents); 716 atomic_dec(&F2FS_I(inode)->dirty_dents);
675} 717}
676 718
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
679 return atomic_read(&sbi->nr_pages[count_type]); 721 return atomic_read(&sbi->nr_pages[count_type]);
680} 722}
681 723
724static inline int get_dirty_dents(struct inode *inode)
725{
726 return atomic_read(&F2FS_I(inode)->dirty_dents);
727}
728
682static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 729static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
683{ 730{
684 unsigned int pages_per_sec = sbi->segs_per_sec * 731 unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
689 736
690static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 737static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
691{ 738{
692 block_t ret; 739 return sbi->total_valid_block_count;
693 spin_lock(&sbi->stat_lock);
694 ret = sbi->total_valid_block_count;
695 spin_unlock(&sbi->stat_lock);
696 return ret;
697} 740}
698 741
699static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) 742static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
789 832
790static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) 833static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
791{ 834{
792 unsigned int ret; 835 return sbi->total_valid_node_count;
793 spin_lock(&sbi->stat_lock);
794 ret = sbi->total_valid_node_count;
795 spin_unlock(&sbi->stat_lock);
796 return ret;
797} 836}
798 837
799static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 838static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
814 853
815static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 854static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
816{ 855{
817 unsigned int ret; 856 return sbi->total_valid_inode_count;
818 spin_lock(&sbi->stat_lock);
819 ret = sbi->total_valid_inode_count;
820 spin_unlock(&sbi->stat_lock);
821 return ret;
822} 857}
823 858
824static inline void f2fs_put_page(struct page *page, int unlock) 859static inline void f2fs_put_page(struct page *page, int unlock)
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
844} 879}
845 880
846static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, 881static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
847 size_t size, void (*ctor)(void *)) 882 size_t size)
848{ 883{
849 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); 884 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
850} 885}
851 886
852static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, 887static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
983 ri->i_inline |= F2FS_INLINE_DATA; 1018 ri->i_inline |= F2FS_INLINE_DATA;
984} 1019}
985 1020
1021static inline int f2fs_has_inline_xattr(struct inode *inode)
1022{
1023 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
1024}
1025
986static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) 1026static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
987{ 1027{
988 if (is_inode_flag_set(fi, FI_INLINE_XATTR)) 1028 if (f2fs_has_inline_xattr(&fi->vfs_inode))
989 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; 1029 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
990 return DEF_ADDRS_PER_INODE; 1030 return DEF_ADDRS_PER_INODE;
991} 1031}
992 1032
993static inline void *inline_xattr_addr(struct page *page) 1033static inline void *inline_xattr_addr(struct page *page)
994{ 1034{
995 struct f2fs_inode *ri; 1035 struct f2fs_inode *ri = F2FS_INODE(page);
996 ri = (struct f2fs_inode *)page_address(page);
997 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 1036 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
998 F2FS_INLINE_XATTR_ADDRS]); 1037 F2FS_INLINE_XATTR_ADDRS]);
999} 1038}
1000 1039
1001static inline int inline_xattr_size(struct inode *inode) 1040static inline int inline_xattr_size(struct inode *inode)
1002{ 1041{
1003 if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) 1042 if (f2fs_has_inline_xattr(inode))
1004 return F2FS_INLINE_XATTR_ADDRS << 2; 1043 return F2FS_INLINE_XATTR_ADDRS << 2;
1005 else 1044 else
1006 return 0; 1045 return 0;
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1013 1052
1014static inline void *inline_data_addr(struct page *page) 1053static inline void *inline_data_addr(struct page *page)
1015{ 1054{
1016 struct f2fs_inode *ri; 1055 struct f2fs_inode *ri = F2FS_INODE(page);
1017 ri = (struct f2fs_inode *)page_address(page);
1018 return (void *)&(ri->i_addr[1]); 1056 return (void *)&(ri->i_addr[1]);
1019} 1057}
1020 1058
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
1023 return sb->s_flags & MS_RDONLY; 1061 return sb->s_flags & MS_RDONLY;
1024} 1062}
1025 1063
1064static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1065{
1066 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1067 sbi->sb->s_flags |= MS_RDONLY;
1068}
1069
1026#define get_inode_mode(i) \ 1070#define get_inode_mode(i) \
1027 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1071 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
1028 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 1072 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
1048struct inode *f2fs_iget(struct super_block *, unsigned long); 1092struct inode *f2fs_iget(struct super_block *, unsigned long);
1049int try_to_free_nats(struct f2fs_sb_info *, int); 1093int try_to_free_nats(struct f2fs_sb_info *, int);
1050void update_inode(struct inode *, struct page *); 1094void update_inode(struct inode *, struct page *);
1051int update_inode_page(struct inode *); 1095void update_inode_page(struct inode *);
1052int f2fs_write_inode(struct inode *, struct writeback_control *); 1096int f2fs_write_inode(struct inode *, struct writeback_control *);
1053void f2fs_evict_inode(struct inode *); 1097void f2fs_evict_inode(struct inode *);
1054 1098
@@ -1097,6 +1141,7 @@ struct dnode_of_data;
1097struct node_info; 1141struct node_info;
1098 1142
1099int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1143int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1144bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
1100void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1145void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1101int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1146int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1102int truncate_inode_blocks(struct inode *, pgoff_t); 1147int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1115void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1160void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1116void recover_node_page(struct f2fs_sb_info *, struct page *, 1161void recover_node_page(struct f2fs_sb_info *, struct page *,
1117 struct f2fs_summary *, struct node_info *, block_t); 1162 struct f2fs_summary *, struct node_info *, block_t);
1163bool recover_xattr_data(struct inode *, struct page *, block_t);
1118int recover_inode_page(struct f2fs_sb_info *, struct page *); 1164int recover_inode_page(struct f2fs_sb_info *, struct page *);
1119int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1165int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1120 struct f2fs_summary_block *); 1166 struct f2fs_summary_block *);
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
1129 */ 1175 */
1130void f2fs_balance_fs(struct f2fs_sb_info *); 1176void f2fs_balance_fs(struct f2fs_sb_info *);
1131void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1177void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1178int f2fs_issue_flush(struct f2fs_sb_info *);
1132void invalidate_blocks(struct f2fs_sb_info *, block_t); 1179void invalidate_blocks(struct f2fs_sb_info *, block_t);
1180void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1133void clear_prefree_segments(struct f2fs_sb_info *); 1181void clear_prefree_segments(struct f2fs_sb_info *);
1134int npages_for_summary_flush(struct f2fs_sb_info *); 1182int npages_for_summary_flush(struct f2fs_sb_info *);
1135void allocate_new_segments(struct f2fs_sb_info *); 1183void allocate_new_segments(struct f2fs_sb_info *);
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
1162 */ 1210 */
1163struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1211struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1164struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1212struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1213int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1165long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1214long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1166int acquire_orphan_inode(struct f2fs_sb_info *); 1215int acquire_orphan_inode(struct f2fs_sb_info *);
1167void release_orphan_inode(struct f2fs_sb_info *); 1216void release_orphan_inode(struct f2fs_sb_info *);
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
1231 int util_free, util_valid, util_invalid; 1280 int util_free, util_valid, util_invalid;
1232 int rsvd_segs, overp_segs; 1281 int rsvd_segs, overp_segs;
1233 int dirty_count, node_pages, meta_pages; 1282 int dirty_count, node_pages, meta_pages;
1234 int prefree_count, call_count; 1283 int prefree_count, call_count, cp_count;
1235 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1284 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1236 int tot_blks, data_blks, node_blks; 1285 int tot_blks, data_blks, node_blks;
1237 int curseg[NR_CURSEG_TYPE]; 1286 int curseg[NR_CURSEG_TYPE];
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1248 return (struct f2fs_stat_info *)sbi->stat_info; 1297 return (struct f2fs_stat_info *)sbi->stat_info;
1249} 1298}
1250 1299
1300#define stat_inc_cp_count(si) ((si)->cp_count++)
1251#define stat_inc_call_count(si) ((si)->call_count++) 1301#define stat_inc_call_count(si) ((si)->call_count++)
1252#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1302#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1253#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1303#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
1302void __init f2fs_create_root_stats(void); 1352void __init f2fs_create_root_stats(void);
1303void f2fs_destroy_root_stats(void); 1353void f2fs_destroy_root_stats(void);
1304#else 1354#else
1355#define stat_inc_cp_count(si)
1305#define stat_inc_call_count(si) 1356#define stat_inc_call_count(si)
1306#define stat_inc_bggc_count(si) 1357#define stat_inc_bggc_count(si)
1307#define stat_inc_dirty_dir(sbi) 1358#define stat_inc_dirty_dir(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 129a3bdb05ca..60e7d5448a1d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
76 trace_f2fs_vm_page_mkwrite(page, DATA); 76 trace_f2fs_vm_page_mkwrite(page, DATA);
77mapped: 77mapped:
78 /* fill the page */ 78 /* fill the page */
79 wait_on_page_writeback(page); 79 f2fs_wait_on_page_writeback(page, DATA);
80out: 80out:
81 sb_end_pagefault(inode->i_sb); 81 sb_end_pagefault(inode->i_sb);
82 return block_page_mkwrite_return(err); 82 return block_page_mkwrite_return(err);
@@ -112,11 +112,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
112int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 112int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
113{ 113{
114 struct inode *inode = file->f_mapping->host; 114 struct inode *inode = file->f_mapping->host;
115 struct f2fs_inode_info *fi = F2FS_I(inode);
115 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 116 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
116 int ret = 0; 117 int ret = 0;
117 bool need_cp = false; 118 bool need_cp = false;
118 struct writeback_control wbc = { 119 struct writeback_control wbc = {
119 .sync_mode = WB_SYNC_NONE, 120 .sync_mode = WB_SYNC_ALL,
120 .nr_to_write = LONG_MAX, 121 .nr_to_write = LONG_MAX,
121 .for_reclaim = 0, 122 .for_reclaim = 0,
122 }; 123 };
@@ -134,7 +135,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
134 /* guarantee free sections for fsync */ 135 /* guarantee free sections for fsync */
135 f2fs_balance_fs(sbi); 136 f2fs_balance_fs(sbi);
136 137
137 mutex_lock(&inode->i_mutex); 138 down_read(&fi->i_sem);
138 139
139 /* 140 /*
140 * Both of fdatasync() and fsync() are able to be recovered from 141 * Both of fdatasync() and fsync() are able to be recovered from
@@ -151,25 +152,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
151 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) 152 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
152 need_cp = true; 153 need_cp = true;
153 154
155 up_read(&fi->i_sem);
156
154 if (need_cp) { 157 if (need_cp) {
155 nid_t pino; 158 nid_t pino;
156 159
157 F2FS_I(inode)->xattr_ver = 0;
158
159 /* all the dirty node pages should be flushed for POR */ 160 /* all the dirty node pages should be flushed for POR */
160 ret = f2fs_sync_fs(inode->i_sb, 1); 161 ret = f2fs_sync_fs(inode->i_sb, 1);
162
163 down_write(&fi->i_sem);
164 F2FS_I(inode)->xattr_ver = 0;
161 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 165 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
162 get_parent_ino(inode, &pino)) { 166 get_parent_ino(inode, &pino)) {
163 F2FS_I(inode)->i_pino = pino; 167 F2FS_I(inode)->i_pino = pino;
164 file_got_pino(inode); 168 file_got_pino(inode);
169 up_write(&fi->i_sem);
165 mark_inode_dirty_sync(inode); 170 mark_inode_dirty_sync(inode);
166 ret = f2fs_write_inode(inode, NULL); 171 ret = f2fs_write_inode(inode, NULL);
167 if (ret) 172 if (ret)
168 goto out; 173 goto out;
174 } else {
175 up_write(&fi->i_sem);
169 } 176 }
170 } else { 177 } else {
171 /* if there is no written node page, write its inode page */ 178 /* if there is no written node page, write its inode page */
172 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 179 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
180 if (fsync_mark_done(sbi, inode->i_ino))
181 goto out;
173 mark_inode_dirty_sync(inode); 182 mark_inode_dirty_sync(inode);
174 ret = f2fs_write_inode(inode, NULL); 183 ret = f2fs_write_inode(inode, NULL);
175 if (ret) 184 if (ret)
@@ -178,10 +187,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
178 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 187 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
179 if (ret) 188 if (ret)
180 goto out; 189 goto out;
181 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 190 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
182 } 191 }
183out: 192out:
184 mutex_unlock(&inode->i_mutex);
185 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 193 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
186 return ret; 194 return ret;
187} 195}
@@ -246,7 +254,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
246 f2fs_put_page(page, 1); 254 f2fs_put_page(page, 1);
247 return; 255 return;
248 } 256 }
249 wait_on_page_writeback(page); 257 f2fs_wait_on_page_writeback(page, DATA);
250 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 258 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
251 set_page_dirty(page); 259 set_page_dirty(page);
252 f2fs_put_page(page, 1); 260 f2fs_put_page(page, 1);
@@ -423,7 +431,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
423 f2fs_unlock_op(sbi); 431 f2fs_unlock_op(sbi);
424 432
425 if (!IS_ERR(page)) { 433 if (!IS_ERR(page)) {
426 wait_on_page_writeback(page); 434 f2fs_wait_on_page_writeback(page, DATA);
427 zero_user(page, start, len); 435 zero_user(page, start, len);
428 set_page_dirty(page); 436 set_page_dirty(page);
429 f2fs_put_page(page, 1); 437 f2fs_put_page(page, 1);
@@ -561,6 +569,8 @@ static long f2fs_fallocate(struct file *file, int mode,
561 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 569 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
562 return -EOPNOTSUPP; 570 return -EOPNOTSUPP;
563 571
572 mutex_lock(&inode->i_mutex);
573
564 if (mode & FALLOC_FL_PUNCH_HOLE) 574 if (mode & FALLOC_FL_PUNCH_HOLE)
565 ret = punch_hole(inode, offset, len); 575 ret = punch_hole(inode, offset, len);
566 else 576 else
@@ -570,6 +580,9 @@ static long f2fs_fallocate(struct file *file, int mode,
570 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 580 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
571 mark_inode_dirty(inode); 581 mark_inode_dirty(inode);
572 } 582 }
583
584 mutex_unlock(&inode->i_mutex);
585
573 trace_f2fs_fallocate(inode, mode, offset, len, ret); 586 trace_f2fs_fallocate(inode, mode, offset, len, ret);
574 return ret; 587 return ret;
575} 588}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ea0371e854b4..b90dbe55403a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
531 set_page_dirty(page); 531 set_page_dirty(page);
532 set_cold_data(page); 532 set_cold_data(page);
533 } else { 533 } else {
534 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
535
536 f2fs_wait_on_page_writeback(page, DATA); 534 f2fs_wait_on_page_writeback(page, DATA);
537 535
538 if (clear_page_dirty_for_io(page) && 536 if (clear_page_dirty_for_io(page))
539 S_ISDIR(inode->i_mode)) {
540 dec_page_count(sbi, F2FS_DIRTY_DENTS);
541 inode_dec_dirty_dents(inode); 537 inode_dec_dirty_dents(inode);
542 }
543 set_cold_data(page); 538 set_cold_data(page);
544 do_write_data_page(page, &fio); 539 do_write_data_page(page, &fio);
545 clear_cold_data(page); 540 clear_cold_data(page);
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
701gc_more: 696gc_more:
702 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 697 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
703 goto stop; 698 goto stop;
699 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
700 goto stop;
704 701
705 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 702 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
706 gc_type = FG_GC; 703 gc_type = FG_GC;
@@ -711,6 +708,11 @@ gc_more:
711 goto stop; 708 goto stop;
712 ret = 0; 709 ret = 0;
713 710
711 /* readahead multi ssa blocks those have contiguous address */
712 if (sbi->segs_per_sec > 1)
713 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
714 META_SSA);
715
714 for (i = 0; i < sbi->segs_per_sec; i++) 716 for (i = 0; i < sbi->segs_per_sec; i++)
715 do_garbage_collect(sbi, segno + i, &ilist, gc_type); 717 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
716 718
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
740int __init create_gc_caches(void) 742int __init create_gc_caches(void)
741{ 743{
742 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 744 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
743 sizeof(struct inode_entry), NULL); 745 sizeof(struct inode_entry));
744 if (!winode_slab) 746 if (!winode_slab)
745 return -ENOMEM; 747 return -ENOMEM;
746 return 0; 748 return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 31ee5b164ff9..383db1fabcf4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
45 } 45 }
46 46
47 ipage = get_node_page(sbi, inode->i_ino); 47 ipage = get_node_page(sbi, inode->i_ino);
48 if (IS_ERR(ipage)) 48 if (IS_ERR(ipage)) {
49 unlock_page(page);
49 return PTR_ERR(ipage); 50 return PTR_ERR(ipage);
51 }
50 52
51 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 53 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
52 54
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 28cea76d78c6..ee829d360468 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
107 fi->flags = 0; 107 fi->flags = 0;
108 fi->i_advise = ri->i_advise; 108 fi->i_advise = ri->i_advise;
109 fi->i_pino = le32_to_cpu(ri->i_pino); 109 fi->i_pino = le32_to_cpu(ri->i_pino);
110 fi->i_dir_level = ri->i_dir_level;
110 111
111 get_extent_info(&fi->ext, ri->i_ext); 112 get_extent_info(&fi->ext, ri->i_ext);
112 get_inline_info(fi, ri); 113 get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
204 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 205 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
205 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 206 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
206 ri->i_generation = cpu_to_le32(inode->i_generation); 207 ri->i_generation = cpu_to_le32(inode->i_generation);
208 ri->i_dir_level = F2FS_I(inode)->i_dir_level;
207 209
208 __set_inode_rdev(inode, ri); 210 __set_inode_rdev(inode, ri);
209 set_cold_node(inode, node_page); 211 set_cold_node(inode, node_page);
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
212 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); 214 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
213} 215}
214 216
215int update_inode_page(struct inode *inode) 217void update_inode_page(struct inode *inode)
216{ 218{
217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 219 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
218 struct page *node_page; 220 struct page *node_page;
219 221retry:
220 node_page = get_node_page(sbi, inode->i_ino); 222 node_page = get_node_page(sbi, inode->i_ino);
221 if (IS_ERR(node_page)) 223 if (IS_ERR(node_page)) {
222 return PTR_ERR(node_page); 224 int err = PTR_ERR(node_page);
223 225 if (err == -ENOMEM) {
226 cond_resched();
227 goto retry;
228 } else if (err != -ENOENT) {
229 f2fs_stop_checkpoint(sbi);
230 }
231 return;
232 }
224 update_inode(inode, node_page); 233 update_inode(inode, node_page);
225 f2fs_put_page(node_page, 1); 234 f2fs_put_page(node_page, 1);
226 return 0;
227} 235}
228 236
229int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 237int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
230{ 238{
231 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
232 int ret;
233 240
234 if (inode->i_ino == F2FS_NODE_INO(sbi) || 241 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
235 inode->i_ino == F2FS_META_INO(sbi)) 242 inode->i_ino == F2FS_META_INO(sbi))
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
243 * during the urgent cleaning time when runing out of free sections. 250 * during the urgent cleaning time when runing out of free sections.
244 */ 251 */
245 f2fs_lock_op(sbi); 252 f2fs_lock_op(sbi);
246 ret = update_inode_page(inode); 253 update_inode_page(inode);
247 f2fs_unlock_op(sbi); 254 f2fs_unlock_op(sbi);
248 255
249 if (wbc) 256 if (wbc)
250 f2fs_balance_fs(sbi); 257 f2fs_balance_fs(sbi);
251 258
252 return ret; 259 return 0;
253} 260}
254 261
255/* 262/*
@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
266 inode->i_ino == F2FS_META_INO(sbi)) 273 inode->i_ino == F2FS_META_INO(sbi))
267 goto no_delete; 274 goto no_delete;
268 275
269 f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); 276 f2fs_bug_on(get_dirty_dents(inode));
270 remove_dirty_dir_inode(inode); 277 remove_dirty_dir_inode(inode);
271 278
272 if (inode->i_nlink || is_bad_inode(inode)) 279 if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459e97bf..a9409d19dfd4 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 inode = f2fs_iget(dir->i_sb, ino); 207 inode = f2fs_iget(dir->i_sb, ino);
208 if (IS_ERR(inode)) 208 if (IS_ERR(inode))
209 return ERR_CAST(inode); 209 return ERR_CAST(inode);
210
211 stat_inc_inline_inode(inode);
210 } 212 }
211 213
212 return d_splice_alias(inode, dentry); 214 return d_splice_alias(inode, dentry);
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
424 } 426 }
425 427
426 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 428 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
429 down_write(&F2FS_I(old_inode)->i_sem);
427 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 430 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
431 up_write(&F2FS_I(old_inode)->i_sem);
428 432
429 new_inode->i_ctime = CURRENT_TIME; 433 new_inode->i_ctime = CURRENT_TIME;
434 down_write(&F2FS_I(new_inode)->i_sem);
430 if (old_dir_entry) 435 if (old_dir_entry)
431 drop_nlink(new_inode); 436 drop_nlink(new_inode);
432 drop_nlink(new_inode); 437 drop_nlink(new_inode);
438 up_write(&F2FS_I(new_inode)->i_sem);
439
433 mark_inode_dirty(new_inode); 440 mark_inode_dirty(new_inode);
434 441
435 if (!new_inode->i_nlink) 442 if (!new_inode->i_nlink)
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
459 if (old_dir != new_dir) { 466 if (old_dir != new_dir) {
460 f2fs_set_link(old_inode, old_dir_entry, 467 f2fs_set_link(old_inode, old_dir_entry,
461 old_dir_page, new_dir); 468 old_dir_page, new_dir);
469 down_write(&F2FS_I(old_inode)->i_sem);
462 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 470 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
471 up_write(&F2FS_I(old_inode)->i_sem);
463 update_inode_page(old_inode); 472 update_inode_page(old_inode);
464 } else { 473 } else {
465 kunmap(old_dir_page); 474 kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b0649b76eb4f..a161e955c4c8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,27 @@
21#include "segment.h" 21#include "segment.h"
22#include <trace/events/f2fs.h> 22#include <trace/events/f2fs.h>
23 23
24#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25
24static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
25static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
26 28
29static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
30{
31 struct sysinfo val;
32 unsigned long mem_size = 0;
33
34 si_meminfo(&val);
35 if (type == FREE_NIDS)
36 mem_size = nm_i->fcnt * sizeof(struct free_nid);
37 else if (type == NAT_ENTRIES)
38 mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
39 mem_size >>= 12;
40
41 /* give 50:50 memory for free nids and nat caches respectively */
42 return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
43}
44
27static void clear_node_page_dirty(struct page *page) 45static void clear_node_page_dirty(struct page *page)
28{ 46{
29 struct address_space *mapping = page->mapping; 47 struct address_space *mapping = page->mapping;
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
82 return dst_page; 100 return dst_page;
83} 101}
84 102
85/*
86 * Readahead NAT pages
87 */
88static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
89{
90 struct address_space *mapping = META_MAPPING(sbi);
91 struct f2fs_nm_info *nm_i = NM_I(sbi);
92 struct page *page;
93 pgoff_t index;
94 int i;
95 struct f2fs_io_info fio = {
96 .type = META,
97 .rw = READ_SYNC | REQ_META | REQ_PRIO
98 };
99
100
101 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
102 if (unlikely(nid >= nm_i->max_nid))
103 nid = 0;
104 index = current_nat_addr(sbi, nid);
105
106 page = grab_cache_page(mapping, index);
107 if (!page)
108 continue;
109 if (PageUptodate(page)) {
110 mark_page_accessed(page);
111 f2fs_put_page(page, 1);
112 continue;
113 }
114 f2fs_submit_page_mbio(sbi, page, index, &fio);
115 mark_page_accessed(page);
116 f2fs_put_page(page, 0);
117 }
118 f2fs_submit_merged_bio(sbi, META, READ);
119}
120
121static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 103static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
122{ 104{
123 return radix_tree_lookup(&nm_i->nat_root, n); 105 return radix_tree_lookup(&nm_i->nat_root, n);
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
151 return is_cp; 133 return is_cp;
152} 134}
153 135
136bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
137{
138 struct f2fs_nm_info *nm_i = NM_I(sbi);
139 struct nat_entry *e;
140 bool fsync_done = false;
141
142 read_lock(&nm_i->nat_tree_lock);
143 e = __lookup_nat_cache(nm_i, nid);
144 if (e)
145 fsync_done = e->fsync_done;
146 read_unlock(&nm_i->nat_tree_lock);
147 return fsync_done;
148}
149
154static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 150static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
155{ 151{
156 struct nat_entry *new; 152 struct nat_entry *new;
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
164 } 160 }
165 memset(new, 0, sizeof(struct nat_entry)); 161 memset(new, 0, sizeof(struct nat_entry));
166 nat_set_nid(new, nid); 162 nat_set_nid(new, nid);
163 new->checkpointed = true;
167 list_add_tail(&new->list, &nm_i->nat_entries); 164 list_add_tail(&new->list, &nm_i->nat_entries);
168 nm_i->nat_cnt++; 165 nm_i->nat_cnt++;
169 return new; 166 return new;
@@ -185,13 +182,12 @@ retry:
185 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); 182 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
186 nat_set_ino(e, le32_to_cpu(ne->ino)); 183 nat_set_ino(e, le32_to_cpu(ne->ino));
187 nat_set_version(e, ne->version); 184 nat_set_version(e, ne->version);
188 e->checkpointed = true;
189 } 185 }
190 write_unlock(&nm_i->nat_tree_lock); 186 write_unlock(&nm_i->nat_tree_lock);
191} 187}
192 188
193static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 189static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
194 block_t new_blkaddr) 190 block_t new_blkaddr, bool fsync_done)
195{ 191{
196 struct f2fs_nm_info *nm_i = NM_I(sbi); 192 struct f2fs_nm_info *nm_i = NM_I(sbi);
197 struct nat_entry *e; 193 struct nat_entry *e;
@@ -205,7 +201,6 @@ retry:
205 goto retry; 201 goto retry;
206 } 202 }
207 e->ni = *ni; 203 e->ni = *ni;
208 e->checkpointed = true;
209 f2fs_bug_on(ni->blk_addr == NEW_ADDR); 204 f2fs_bug_on(ni->blk_addr == NEW_ADDR);
210 } else if (new_blkaddr == NEW_ADDR) { 205 } else if (new_blkaddr == NEW_ADDR) {
211 /* 206 /*
@@ -217,9 +212,6 @@ retry:
217 f2fs_bug_on(ni->blk_addr != NULL_ADDR); 212 f2fs_bug_on(ni->blk_addr != NULL_ADDR);
218 } 213 }
219 214
220 if (new_blkaddr == NEW_ADDR)
221 e->checkpointed = false;
222
223 /* sanity check */ 215 /* sanity check */
224 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 216 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
225 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 217 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
@@ -239,6 +231,11 @@ retry:
239 /* change address */ 231 /* change address */
240 nat_set_blkaddr(e, new_blkaddr); 232 nat_set_blkaddr(e, new_blkaddr);
241 __set_nat_cache_dirty(nm_i, e); 233 __set_nat_cache_dirty(nm_i, e);
234
235 /* update fsync_mark if its inode nat entry is still alive */
236 e = __lookup_nat_cache(nm_i, ni->ino);
237 if (e)
238 e->fsync_done = fsync_done;
242 write_unlock(&nm_i->nat_tree_lock); 239 write_unlock(&nm_i->nat_tree_lock);
243} 240}
244 241
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
246{ 243{
247 struct f2fs_nm_info *nm_i = NM_I(sbi); 244 struct f2fs_nm_info *nm_i = NM_I(sbi);
248 245
249 if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) 246 if (available_free_memory(nm_i, NAT_ENTRIES))
250 return 0; 247 return 0;
251 248
252 write_lock(&nm_i->nat_tree_lock); 249 write_lock(&nm_i->nat_tree_lock);
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
505 /* Deallocate node address */ 502 /* Deallocate node address */
506 invalidate_blocks(sbi, ni.blk_addr); 503 invalidate_blocks(sbi, ni.blk_addr);
507 dec_valid_node_count(sbi, dn->inode); 504 dec_valid_node_count(sbi, dn->inode);
508 set_node_addr(sbi, &ni, NULL_ADDR); 505 set_node_addr(sbi, &ni, NULL_ADDR, false);
509 506
510 if (dn->nid == dn->inode->i_ino) { 507 if (dn->nid == dn->inode->i_ino) {
511 remove_orphan_inode(sbi, dn->nid); 508 remove_orphan_inode(sbi, dn->nid);
@@ -763,7 +760,7 @@ skip_partial:
763 f2fs_put_page(page, 1); 760 f2fs_put_page(page, 1);
764 goto restart; 761 goto restart;
765 } 762 }
766 wait_on_page_writeback(page); 763 f2fs_wait_on_page_writeback(page, NODE);
767 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 764 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
768 set_page_dirty(page); 765 set_page_dirty(page);
769 unlock_page(page); 766 unlock_page(page);
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
852 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 849 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
853 return ERR_PTR(-EPERM); 850 return ERR_PTR(-EPERM);
854 851
855 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 852 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
853 dn->nid, AOP_FLAG_NOFS);
856 if (!page) 854 if (!page)
857 return ERR_PTR(-ENOMEM); 855 return ERR_PTR(-ENOMEM);
858 856
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
867 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 865 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
868 new_ni = old_ni; 866 new_ni = old_ni;
869 new_ni.ino = dn->inode->i_ino; 867 new_ni.ino = dn->inode->i_ino;
870 set_node_addr(sbi, &new_ni, NEW_ADDR); 868 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
871 869
872 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 870 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
873 set_cold_node(dn->inode, page); 871 set_cold_node(dn->inode, page);
874 SetPageUptodate(page); 872 SetPageUptodate(page);
875 set_page_dirty(page); 873 set_page_dirty(page);
876 874
877 if (ofs == XATTR_NODE_OFFSET) 875 if (f2fs_has_xattr_block(ofs))
878 F2FS_I(dn->inode)->i_xattr_nid = dn->nid; 876 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
879 877
880 dn->node_page = page; 878 dn->node_page = page;
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
948 struct page *page; 946 struct page *page;
949 int err; 947 int err;
950repeat: 948repeat:
951 page = grab_cache_page(NODE_MAPPING(sbi), nid); 949 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
950 nid, AOP_FLAG_NOFS);
952 if (!page) 951 if (!page)
953 return ERR_PTR(-ENOMEM); 952 return ERR_PTR(-ENOMEM);
954 953
@@ -959,7 +958,7 @@ repeat:
959 goto got_it; 958 goto got_it;
960 959
961 lock_page(page); 960 lock_page(page);
962 if (unlikely(!PageUptodate(page))) { 961 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
963 f2fs_put_page(page, 1); 962 f2fs_put_page(page, 1);
964 return ERR_PTR(-EIO); 963 return ERR_PTR(-EIO);
965 } 964 }
@@ -968,7 +967,6 @@ repeat:
968 goto repeat; 967 goto repeat;
969 } 968 }
970got_it: 969got_it:
971 f2fs_bug_on(nid != nid_of_node(page));
972 mark_page_accessed(page); 970 mark_page_accessed(page);
973 return page; 971 return page;
974} 972}
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1168 continue; 1166 continue;
1169 1167
1170 if (ino && ino_of_node(page) == ino) { 1168 if (ino && ino_of_node(page) == ino) {
1171 wait_on_page_writeback(page); 1169 f2fs_wait_on_page_writeback(page, NODE);
1172 if (TestClearPageError(page)) 1170 if (TestClearPageError(page))
1173 ret = -EIO; 1171 ret = -EIO;
1174 } 1172 }
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
1201 if (unlikely(sbi->por_doing)) 1199 if (unlikely(sbi->por_doing))
1202 goto redirty_out; 1200 goto redirty_out;
1203 1201
1204 wait_on_page_writeback(page); 1202 f2fs_wait_on_page_writeback(page, NODE);
1205 1203
1206 /* get old block addr of this node page */ 1204 /* get old block addr of this node page */
1207 nid = nid_of_node(page); 1205 nid = nid_of_node(page);
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
1222 mutex_lock(&sbi->node_write); 1220 mutex_lock(&sbi->node_write);
1223 set_page_writeback(page); 1221 set_page_writeback(page);
1224 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1222 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1225 set_node_addr(sbi, &ni, new_addr); 1223 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1226 dec_page_count(sbi, F2FS_DIRTY_NODES); 1224 dec_page_count(sbi, F2FS_DIRTY_NODES);
1227 mutex_unlock(&sbi->node_write); 1225 mutex_unlock(&sbi->node_write);
1228 unlock_page(page); 1226 unlock_page(page);
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
1231redirty_out: 1229redirty_out:
1232 dec_page_count(sbi, F2FS_DIRTY_NODES); 1230 dec_page_count(sbi, F2FS_DIRTY_NODES);
1233 wbc->pages_skipped++; 1231 wbc->pages_skipped++;
1232 account_page_redirty(page);
1234 set_page_dirty(page); 1233 set_page_dirty(page);
1235 return AOP_WRITEPAGE_ACTIVATE; 1234 return AOP_WRITEPAGE_ACTIVATE;
1236} 1235}
1237 1236
1238/*
1239 * It is very important to gather dirty pages and write at once, so that we can
1240 * submit a big bio without interfering other data writes.
1241 * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
1242 */
1243#define COLLECT_DIRTY_NODES 1536
1244static int f2fs_write_node_pages(struct address_space *mapping, 1237static int f2fs_write_node_pages(struct address_space *mapping,
1245 struct writeback_control *wbc) 1238 struct writeback_control *wbc)
1246{ 1239{
1247 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1240 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1248 long nr_to_write = wbc->nr_to_write; 1241 long diff;
1249 1242
1250 /* balancing f2fs's metadata in background */ 1243 /* balancing f2fs's metadata in background */
1251 f2fs_balance_fs_bg(sbi); 1244 f2fs_balance_fs_bg(sbi);
1252 1245
1253 /* collect a number of dirty node pages and write together */ 1246 /* collect a number of dirty node pages and write together */
1254 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) 1247 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1255 return 0; 1248 goto skip_write;
1256 1249
1257 /* if mounting is failed, skip writing node pages */ 1250 diff = nr_pages_to_write(sbi, NODE, wbc);
1258 wbc->nr_to_write = 3 * max_hw_blocks(sbi);
1259 wbc->sync_mode = WB_SYNC_NONE; 1251 wbc->sync_mode = WB_SYNC_NONE;
1260 sync_node_pages(sbi, 0, wbc); 1252 sync_node_pages(sbi, 0, wbc);
1261 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - 1253 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1262 wbc->nr_to_write); 1254 return 0;
1255
1256skip_write:
1257 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1263 return 0; 1258 return 0;
1264} 1259}
1265 1260
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
1307 .releasepage = f2fs_release_node_page, 1302 .releasepage = f2fs_release_node_page,
1308}; 1303};
1309 1304
1310static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) 1305static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1306 nid_t n)
1311{ 1307{
1312 struct list_head *this; 1308 return radix_tree_lookup(&nm_i->free_nid_root, n);
1313 struct free_nid *i;
1314 list_for_each(this, head) {
1315 i = list_entry(this, struct free_nid, list);
1316 if (i->nid == n)
1317 return i;
1318 }
1319 return NULL;
1320} 1309}
1321 1310
1322static void __del_from_free_nid_list(struct free_nid *i) 1311static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1312 struct free_nid *i)
1323{ 1313{
1324 list_del(&i->list); 1314 list_del(&i->list);
1325 kmem_cache_free(free_nid_slab, i); 1315 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1326} 1316}
1327 1317
1328static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) 1318static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1331 struct nat_entry *ne; 1321 struct nat_entry *ne;
1332 bool allocated = false; 1322 bool allocated = false;
1333 1323
1334 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) 1324 if (!available_free_memory(nm_i, FREE_NIDS))
1335 return -1; 1325 return -1;
1336 1326
1337 /* 0 nid should not be used */ 1327 /* 0 nid should not be used */
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1342 /* do not add allocated nids */ 1332 /* do not add allocated nids */
1343 read_lock(&nm_i->nat_tree_lock); 1333 read_lock(&nm_i->nat_tree_lock);
1344 ne = __lookup_nat_cache(nm_i, nid); 1334 ne = __lookup_nat_cache(nm_i, nid);
1345 if (ne && nat_get_blkaddr(ne) != NULL_ADDR) 1335 if (ne &&
1336 (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
1346 allocated = true; 1337 allocated = true;
1347 read_unlock(&nm_i->nat_tree_lock); 1338 read_unlock(&nm_i->nat_tree_lock);
1348 if (allocated) 1339 if (allocated)
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1354 i->state = NID_NEW; 1345 i->state = NID_NEW;
1355 1346
1356 spin_lock(&nm_i->free_nid_list_lock); 1347 spin_lock(&nm_i->free_nid_list_lock);
1357 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { 1348 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1358 spin_unlock(&nm_i->free_nid_list_lock); 1349 spin_unlock(&nm_i->free_nid_list_lock);
1359 kmem_cache_free(free_nid_slab, i); 1350 kmem_cache_free(free_nid_slab, i);
1360 return 0; 1351 return 0;
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1368static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1359static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1369{ 1360{
1370 struct free_nid *i; 1361 struct free_nid *i;
1362 bool need_free = false;
1363
1371 spin_lock(&nm_i->free_nid_list_lock); 1364 spin_lock(&nm_i->free_nid_list_lock);
1372 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1365 i = __lookup_free_nid_list(nm_i, nid);
1373 if (i && i->state == NID_NEW) { 1366 if (i && i->state == NID_NEW) {
1374 __del_from_free_nid_list(i); 1367 __del_from_free_nid_list(nm_i, i);
1375 nm_i->fcnt--; 1368 nm_i->fcnt--;
1369 need_free = true;
1376 } 1370 }
1377 spin_unlock(&nm_i->free_nid_list_lock); 1371 spin_unlock(&nm_i->free_nid_list_lock);
1372
1373 if (need_free)
1374 kmem_cache_free(free_nid_slab, i);
1378} 1375}
1379 1376
1380static void scan_nat_page(struct f2fs_nm_info *nm_i, 1377static void scan_nat_page(struct f2fs_nm_info *nm_i,
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1413 return; 1410 return;
1414 1411
1415 /* readahead nat pages to be scanned */ 1412 /* readahead nat pages to be scanned */
1416 ra_nat_pages(sbi, nid); 1413 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1417 1414
1418 while (1) { 1415 while (1) {
1419 struct page *page = get_current_nat_page(sbi, nid); 1416 struct page *page = get_current_nat_page(sbi, nid);
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1454{ 1451{
1455 struct f2fs_nm_info *nm_i = NM_I(sbi); 1452 struct f2fs_nm_info *nm_i = NM_I(sbi);
1456 struct free_nid *i = NULL; 1453 struct free_nid *i = NULL;
1457 struct list_head *this;
1458retry: 1454retry:
1459 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) 1455 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
1460 return false; 1456 return false;
@@ -1462,13 +1458,11 @@ retry:
1462 spin_lock(&nm_i->free_nid_list_lock); 1458 spin_lock(&nm_i->free_nid_list_lock);
1463 1459
1464 /* We should not use stale free nids created by build_free_nids */ 1460 /* We should not use stale free nids created by build_free_nids */
1465 if (nm_i->fcnt && !sbi->on_build_free_nids) { 1461 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1466 f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1462 f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1467 list_for_each(this, &nm_i->free_nid_list) { 1463 list_for_each_entry(i, &nm_i->free_nid_list, list)
1468 i = list_entry(this, struct free_nid, list);
1469 if (i->state == NID_NEW) 1464 if (i->state == NID_NEW)
1470 break; 1465 break;
1471 }
1472 1466
1473 f2fs_bug_on(i->state != NID_NEW); 1467 f2fs_bug_on(i->state != NID_NEW);
1474 *nid = i->nid; 1468 *nid = i->nid;
@@ -1481,9 +1475,7 @@ retry:
1481 1475
1482 /* Let's scan nat pages and its caches to get free nids */ 1476 /* Let's scan nat pages and its caches to get free nids */
1483 mutex_lock(&nm_i->build_lock); 1477 mutex_lock(&nm_i->build_lock);
1484 sbi->on_build_free_nids = true;
1485 build_free_nids(sbi); 1478 build_free_nids(sbi);
1486 sbi->on_build_free_nids = false;
1487 mutex_unlock(&nm_i->build_lock); 1479 mutex_unlock(&nm_i->build_lock);
1488 goto retry; 1480 goto retry;
1489} 1481}
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1497 struct free_nid *i; 1489 struct free_nid *i;
1498 1490
1499 spin_lock(&nm_i->free_nid_list_lock); 1491 spin_lock(&nm_i->free_nid_list_lock);
1500 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1492 i = __lookup_free_nid_list(nm_i, nid);
1501 f2fs_bug_on(!i || i->state != NID_ALLOC); 1493 f2fs_bug_on(!i || i->state != NID_ALLOC);
1502 __del_from_free_nid_list(i); 1494 __del_from_free_nid_list(nm_i, i);
1503 spin_unlock(&nm_i->free_nid_list_lock); 1495 spin_unlock(&nm_i->free_nid_list_lock);
1496
1497 kmem_cache_free(free_nid_slab, i);
1504} 1498}
1505 1499
1506/* 1500/*
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1510{ 1504{
1511 struct f2fs_nm_info *nm_i = NM_I(sbi); 1505 struct f2fs_nm_info *nm_i = NM_I(sbi);
1512 struct free_nid *i; 1506 struct free_nid *i;
1507 bool need_free = false;
1513 1508
1514 if (!nid) 1509 if (!nid)
1515 return; 1510 return;
1516 1511
1517 spin_lock(&nm_i->free_nid_list_lock); 1512 spin_lock(&nm_i->free_nid_list_lock);
1518 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1513 i = __lookup_free_nid_list(nm_i, nid);
1519 f2fs_bug_on(!i || i->state != NID_ALLOC); 1514 f2fs_bug_on(!i || i->state != NID_ALLOC);
1520 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { 1515 if (!available_free_memory(nm_i, FREE_NIDS)) {
1521 __del_from_free_nid_list(i); 1516 __del_from_free_nid_list(nm_i, i);
1517 need_free = true;
1522 } else { 1518 } else {
1523 i->state = NID_NEW; 1519 i->state = NID_NEW;
1524 nm_i->fcnt++; 1520 nm_i->fcnt++;
1525 } 1521 }
1526 spin_unlock(&nm_i->free_nid_list_lock); 1522 spin_unlock(&nm_i->free_nid_list_lock);
1523
1524 if (need_free)
1525 kmem_cache_free(free_nid_slab, i);
1527} 1526}
1528 1527
1529void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, 1528void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1531 block_t new_blkaddr) 1530 block_t new_blkaddr)
1532{ 1531{
1533 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); 1532 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1534 set_node_addr(sbi, ni, new_blkaddr); 1533 set_node_addr(sbi, ni, new_blkaddr, false);
1535 clear_node_page_dirty(page); 1534 clear_node_page_dirty(page);
1536} 1535}
1537 1536
1537void recover_inline_xattr(struct inode *inode, struct page *page)
1538{
1539 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1540 void *src_addr, *dst_addr;
1541 size_t inline_size;
1542 struct page *ipage;
1543 struct f2fs_inode *ri;
1544
1545 if (!f2fs_has_inline_xattr(inode))
1546 return;
1547
1548 if (!IS_INODE(page))
1549 return;
1550
1551 ri = F2FS_INODE(page);
1552 if (!(ri->i_inline & F2FS_INLINE_XATTR))
1553 return;
1554
1555 ipage = get_node_page(sbi, inode->i_ino);
1556 f2fs_bug_on(IS_ERR(ipage));
1557
1558 dst_addr = inline_xattr_addr(ipage);
1559 src_addr = inline_xattr_addr(page);
1560 inline_size = inline_xattr_size(inode);
1561
1562 memcpy(dst_addr, src_addr, inline_size);
1563
1564 update_inode(inode, ipage);
1565 f2fs_put_page(ipage, 1);
1566}
1567
1568bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1569{
1570 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1571 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1572 nid_t new_xnid = nid_of_node(page);
1573 struct node_info ni;
1574
1575 recover_inline_xattr(inode, page);
1576
1577 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1578 return false;
1579
1580 /* 1: invalidate the previous xattr nid */
1581 if (!prev_xnid)
1582 goto recover_xnid;
1583
1584 /* Deallocate node address */
1585 get_node_info(sbi, prev_xnid, &ni);
1586 f2fs_bug_on(ni.blk_addr == NULL_ADDR);
1587 invalidate_blocks(sbi, ni.blk_addr);
1588 dec_valid_node_count(sbi, inode);
1589 set_node_addr(sbi, &ni, NULL_ADDR, false);
1590
1591recover_xnid:
1592 /* 2: allocate new xattr nid */
1593 if (unlikely(!inc_valid_node_count(sbi, inode)))
1594 f2fs_bug_on(1);
1595
1596 remove_free_nid(NM_I(sbi), new_xnid);
1597 get_node_info(sbi, new_xnid, &ni);
1598 ni.ino = inode->i_ino;
1599 set_node_addr(sbi, &ni, NEW_ADDR, false);
1600 F2FS_I(inode)->i_xattr_nid = new_xnid;
1601
1602 /* 3: update xattr blkaddr */
1603 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1604 set_node_addr(sbi, &ni, blkaddr, false);
1605
1606 update_inode_page(inode);
1607 return true;
1608}
1609
1538int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1610int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1539{ 1611{
1540 struct f2fs_inode *src, *dst; 1612 struct f2fs_inode *src, *dst;
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1567 1639
1568 if (unlikely(!inc_valid_node_count(sbi, NULL))) 1640 if (unlikely(!inc_valid_node_count(sbi, NULL)))
1569 WARN_ON(1); 1641 WARN_ON(1);
1570 set_node_addr(sbi, &new_ni, NEW_ADDR); 1642 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1571 inc_valid_inode_count(sbi); 1643 inc_valid_inode_count(sbi);
1572 f2fs_put_page(ipage, 1); 1644 f2fs_put_page(ipage, 1);
1573 return 0; 1645 return 0;
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1590 for (; page_idx < start + nrpages; page_idx++) { 1662 for (; page_idx < start + nrpages; page_idx++) {
1591 /* alloc temporal page for read node summary info*/ 1663 /* alloc temporal page for read node summary info*/
1592 page = alloc_page(GFP_F2FS_ZERO); 1664 page = alloc_page(GFP_F2FS_ZERO);
1593 if (!page) { 1665 if (!page)
1594 struct page *tmp; 1666 break;
1595 list_for_each_entry_safe(page, tmp, pages, lru) {
1596 list_del(&page->lru);
1597 unlock_page(page);
1598 __free_pages(page, 0);
1599 }
1600 return -ENOMEM;
1601 }
1602 1667
1603 lock_page(page); 1668 lock_page(page);
1604 page->index = page_idx; 1669 page->index = page_idx;
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1609 f2fs_submit_page_mbio(sbi, page, page->index, &fio); 1674 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1610 1675
1611 f2fs_submit_merged_bio(sbi, META, READ); 1676 f2fs_submit_merged_bio(sbi, META, READ);
1612 return 0; 1677
1678 return page_idx - start;
1613} 1679}
1614 1680
1615int restore_node_summary(struct f2fs_sb_info *sbi, 1681int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1628 addr = START_BLOCK(sbi, segno); 1694 addr = START_BLOCK(sbi, segno);
1629 sum_entry = &sum->entries[0]; 1695 sum_entry = &sum->entries[0];
1630 1696
1631 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 1697 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1632 nrpages = min(last_offset - i, bio_blocks); 1698 nrpages = min(last_offset - i, bio_blocks);
1633 1699
1634 /* read ahead node pages */ 1700 /* read ahead node pages */
1635 err = ra_sum_pages(sbi, &page_list, addr, nrpages); 1701 nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
1636 if (err) 1702 if (!nrpages)
1637 return err; 1703 return -ENOMEM;
1638 1704
1639 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1705 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1706 if (err)
1707 goto skip;
1640 1708
1641 lock_page(page); 1709 lock_page(page);
1642 if (unlikely(!PageUptodate(page))) { 1710 if (unlikely(!PageUptodate(page))) {
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1648 sum_entry->ofs_in_node = 0; 1716 sum_entry->ofs_in_node = 0;
1649 sum_entry++; 1717 sum_entry++;
1650 } 1718 }
1651
1652 list_del(&page->lru);
1653 unlock_page(page); 1719 unlock_page(page);
1720skip:
1721 list_del(&page->lru);
1654 __free_pages(page, 0); 1722 __free_pages(page, 0);
1655 } 1723 }
1656 } 1724 }
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1709 struct f2fs_nm_info *nm_i = NM_I(sbi); 1777 struct f2fs_nm_info *nm_i = NM_I(sbi);
1710 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1778 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1711 struct f2fs_summary_block *sum = curseg->sum_blk; 1779 struct f2fs_summary_block *sum = curseg->sum_blk;
1712 struct list_head *cur, *n; 1780 struct nat_entry *ne, *cur;
1713 struct page *page = NULL; 1781 struct page *page = NULL;
1714 struct f2fs_nat_block *nat_blk = NULL; 1782 struct f2fs_nat_block *nat_blk = NULL;
1715 nid_t start_nid = 0, end_nid = 0; 1783 nid_t start_nid = 0, end_nid = 0;
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1721 mutex_lock(&curseg->curseg_mutex); 1789 mutex_lock(&curseg->curseg_mutex);
1722 1790
1723 /* 1) flush dirty nat caches */ 1791 /* 1) flush dirty nat caches */
1724 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { 1792 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1725 struct nat_entry *ne;
1726 nid_t nid; 1793 nid_t nid;
1727 struct f2fs_nat_entry raw_ne; 1794 struct f2fs_nat_entry raw_ne;
1728 int offset = -1; 1795 int offset = -1;
1729 block_t new_blkaddr; 1796 block_t new_blkaddr;
1730 1797
1731 ne = list_entry(cur, struct nat_entry, list);
1732 nid = nat_get_nid(ne);
1733
1734 if (nat_get_blkaddr(ne) == NEW_ADDR) 1798 if (nat_get_blkaddr(ne) == NEW_ADDR)
1735 continue; 1799 continue;
1800
1801 nid = nat_get_nid(ne);
1802
1736 if (flushed) 1803 if (flushed)
1737 goto to_nat_page; 1804 goto to_nat_page;
1738 1805
@@ -1783,16 +1850,12 @@ flush_now:
1783 } else { 1850 } else {
1784 write_lock(&nm_i->nat_tree_lock); 1851 write_lock(&nm_i->nat_tree_lock);
1785 __clear_nat_cache_dirty(nm_i, ne); 1852 __clear_nat_cache_dirty(nm_i, ne);
1786 ne->checkpointed = true;
1787 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1788 } 1854 }
1789 } 1855 }
1790 if (!flushed) 1856 if (!flushed)
1791 mutex_unlock(&curseg->curseg_mutex); 1857 mutex_unlock(&curseg->curseg_mutex);
1792 f2fs_put_page(page, 1); 1858 f2fs_put_page(page, 1);
1793
1794 /* 2) shrink nat caches if necessary */
1795 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1796} 1859}
1797 1860
1798static int init_node_manager(struct f2fs_sb_info *sbi) 1861static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1807 /* segment_count_nat includes pair segment so divide to 2. */ 1870 /* segment_count_nat includes pair segment so divide to 2. */
1808 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 1871 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1809 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 1872 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1810 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1873
1874 /* not used nids: 0, node, meta, (and root counted as valid node) */
1875 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
1811 nm_i->fcnt = 0; 1876 nm_i->fcnt = 0;
1812 nm_i->nat_cnt = 0; 1877 nm_i->nat_cnt = 0;
1878 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1813 1879
1880 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1814 INIT_LIST_HEAD(&nm_i->free_nid_list); 1881 INIT_LIST_HEAD(&nm_i->free_nid_list);
1815 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1882 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1816 INIT_LIST_HEAD(&nm_i->nat_entries); 1883 INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1864 spin_lock(&nm_i->free_nid_list_lock); 1931 spin_lock(&nm_i->free_nid_list_lock);
1865 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 1932 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1866 f2fs_bug_on(i->state == NID_ALLOC); 1933 f2fs_bug_on(i->state == NID_ALLOC);
1867 __del_from_free_nid_list(i); 1934 __del_from_free_nid_list(nm_i, i);
1868 nm_i->fcnt--; 1935 nm_i->fcnt--;
1936 spin_unlock(&nm_i->free_nid_list_lock);
1937 kmem_cache_free(free_nid_slab, i);
1938 spin_lock(&nm_i->free_nid_list_lock);
1869 } 1939 }
1870 f2fs_bug_on(nm_i->fcnt); 1940 f2fs_bug_on(nm_i->fcnt);
1871 spin_unlock(&nm_i->free_nid_list_lock); 1941 spin_unlock(&nm_i->free_nid_list_lock);
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1875 while ((found = __gang_lookup_nat_cache(nm_i, 1945 while ((found = __gang_lookup_nat_cache(nm_i,
1876 nid, NATVEC_SIZE, natvec))) { 1946 nid, NATVEC_SIZE, natvec))) {
1877 unsigned idx; 1947 unsigned idx;
1878 for (idx = 0; idx < found; idx++) { 1948 nid = nat_get_nid(natvec[found - 1]) + 1;
1879 struct nat_entry *e = natvec[idx]; 1949 for (idx = 0; idx < found; idx++)
1880 nid = nat_get_nid(e) + 1; 1950 __del_from_nat_cache(nm_i, natvec[idx]);
1881 __del_from_nat_cache(nm_i, e);
1882 }
1883 } 1951 }
1884 f2fs_bug_on(nm_i->nat_cnt); 1952 f2fs_bug_on(nm_i->nat_cnt);
1885 write_unlock(&nm_i->nat_tree_lock); 1953 write_unlock(&nm_i->nat_tree_lock);
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1892int __init create_node_manager_caches(void) 1960int __init create_node_manager_caches(void)
1893{ 1961{
1894 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1962 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1895 sizeof(struct nat_entry), NULL); 1963 sizeof(struct nat_entry));
1896 if (!nat_entry_slab) 1964 if (!nat_entry_slab)
1897 return -ENOMEM; 1965 return -ENOMEM;
1898 1966
1899 free_nid_slab = f2fs_kmem_cache_create("free_nid", 1967 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1900 sizeof(struct free_nid), NULL); 1968 sizeof(struct free_nid));
1901 if (!free_nid_slab) { 1969 if (!free_nid_slab) {
1902 kmem_cache_destroy(nat_entry_slab); 1970 kmem_cache_destroy(nat_entry_slab);
1903 return -ENOMEM; 1971 return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c4c79885c993..5decc1a375f0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
17/* # of pages to perform readahead before building free nids */ 17/* # of pages to perform readahead before building free nids */
18#define FREE_NID_PAGES 4 18#define FREE_NID_PAGES 4
19 19
20/* maximum # of free node ids to produce during build_free_nids */
21#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
22
23/* maximum readahead size for node during getting data blocks */ 20/* maximum readahead size for node during getting data blocks */
24#define MAX_RA_NODE 128 21#define MAX_RA_NODE 128
25 22
26/* maximum cached nat entries to manage memory footprint */ 23/* control the memory footprint threshold (10MB per 1GB ram) */
27#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) 24#define DEF_RAM_THRESHOLD 10
28 25
29/* vector size for gang look-up from nat cache that consists of radix tree */ 26/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64 27#define NATVEC_SIZE 64
@@ -45,6 +42,7 @@ struct node_info {
45struct nat_entry { 42struct nat_entry {
46 struct list_head list; /* for clean or dirty nat list */ 43 struct list_head list; /* for clean or dirty nat list */
47 bool checkpointed; /* whether it is checkpointed or not */ 44 bool checkpointed; /* whether it is checkpointed or not */
45 bool fsync_done; /* whether the latest node has fsync mark */
48 struct node_info ni; /* in-memory node information */ 46 struct node_info ni; /* in-memory node information */
49}; 47};
50 48
@@ -58,9 +56,15 @@ struct nat_entry {
58#define nat_set_version(nat, v) (nat->ni.version = v) 56#define nat_set_version(nat, v) (nat->ni.version = v)
59 57
60#define __set_nat_cache_dirty(nm_i, ne) \ 58#define __set_nat_cache_dirty(nm_i, ne) \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); 59 do { \
60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0);
62#define __clear_nat_cache_dirty(nm_i, ne) \ 63#define __clear_nat_cache_dirty(nm_i, ne) \
63 list_move_tail(&ne->list, &nm_i->nat_entries); 64 do { \
65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0);
64#define inc_node_version(version) (++version) 68#define inc_node_version(version) (++version)
65 69
66static inline void node_info_from_raw_nat(struct node_info *ni, 70static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
71 ni->version = raw_ne->version; 75 ni->version = raw_ne->version;
72} 76}
73 77
78enum nid_type {
79 FREE_NIDS, /* indicates the free nid list */
80 NAT_ENTRIES /* indicates the cached nat entry */
81};
82
74/* 83/*
75 * For free nid mangement 84 * For free nid mangement
76 */ 85 */
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
236{ 245{
237 unsigned int ofs = ofs_of_node(node_page); 246 unsigned int ofs = ofs_of_node(node_page);
238 247
239 if (ofs == XATTR_NODE_OFFSET) 248 if (f2fs_has_xattr_block(ofs))
240 return false; 249 return false;
241 250
242 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || 251 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 976a7a934db5..b1ae89f0f44e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, 27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
28 nid_t ino) 28 nid_t ino)
29{ 29{
30 struct list_head *this;
31 struct fsync_inode_entry *entry; 30 struct fsync_inode_entry *entry;
32 31
33 list_for_each(this, head) { 32 list_for_each_entry(entry, head, list)
34 entry = list_entry(this, struct fsync_inode_entry, list);
35 if (entry->inode->i_ino == ino) 33 if (entry->inode->i_ino == ino)
36 return entry; 34 return entry;
37 } 35
38 return NULL; 36 return NULL;
39} 37}
40 38
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
136 134
137 /* get node pages in the current segment */ 135 /* get node pages in the current segment */
138 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 136 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
139 blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; 137 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
140 138
141 /* read node page */ 139 /* read node page */
142 page = alloc_page(GFP_F2FS_ZERO); 140 page = alloc_page(GFP_F2FS_ZERO);
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
218{ 216{
219 struct seg_entry *sentry; 217 struct seg_entry *sentry;
220 unsigned int segno = GET_SEGNO(sbi, blkaddr); 218 unsigned int segno = GET_SEGNO(sbi, blkaddr);
221 unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & 219 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
222 (sbi->blocks_per_seg - 1); 220 struct f2fs_summary_block *sum_node;
223 struct f2fs_summary sum; 221 struct f2fs_summary sum;
222 struct page *sum_page, *node_page;
224 nid_t ino, nid; 223 nid_t ino, nid;
225 void *kaddr;
226 struct inode *inode; 224 struct inode *inode;
227 struct page *node_page;
228 unsigned int offset; 225 unsigned int offset;
229 block_t bidx; 226 block_t bidx;
230 int i; 227 int i;
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
238 struct curseg_info *curseg = CURSEG_I(sbi, i); 235 struct curseg_info *curseg = CURSEG_I(sbi, i);
239 if (curseg->segno == segno) { 236 if (curseg->segno == segno) {
240 sum = curseg->sum_blk->entries[blkoff]; 237 sum = curseg->sum_blk->entries[blkoff];
241 break; 238 goto got_it;
242 } 239 }
243 } 240 }
244 if (i > CURSEG_COLD_DATA) {
245 struct page *sum_page = get_sum_page(sbi, segno);
246 struct f2fs_summary_block *sum_node;
247 kaddr = page_address(sum_page);
248 sum_node = (struct f2fs_summary_block *)kaddr;
249 sum = sum_node->entries[blkoff];
250 f2fs_put_page(sum_page, 1);
251 }
252 241
242 sum_page = get_sum_page(sbi, segno);
243 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
244 sum = sum_node->entries[blkoff];
245 f2fs_put_page(sum_page, 1);
246got_it:
253 /* Use the locked dnode page and inode */ 247 /* Use the locked dnode page and inode */
254 nid = le32_to_cpu(sum.nid); 248 nid = le32_to_cpu(sum.nid);
255 if (dn->inode->i_ino == nid) { 249 if (dn->inode->i_ino == nid) {
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
301 if (recover_inline_data(inode, page)) 295 if (recover_inline_data(inode, page))
302 goto out; 296 goto out;
303 297
298 if (recover_xattr_data(inode, page, blkaddr))
299 goto out;
300
304 start = start_bidx_of_node(ofs_of_node(page), fi); 301 start = start_bidx_of_node(ofs_of_node(page), fi);
305 if (IS_INODE(page)) 302 if (IS_INODE(page))
306 end = start + ADDRS_PER_INODE(fi); 303 end = start + ADDRS_PER_INODE(fi);
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
317 goto out; 314 goto out;
318 } 315 }
319 316
320 wait_on_page_writeback(dn.node_page); 317 f2fs_wait_on_page_writeback(dn.node_page, NODE);
321 318
322 get_node_info(sbi, dn.nid, &ni); 319 get_node_info(sbi, dn.nid, &ni);
323 f2fs_bug_on(ni.ino != ino_of_node(page)); 320 f2fs_bug_on(ni.ino != ino_of_node(page));
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
437 bool need_writecp = false; 434 bool need_writecp = false;
438 435
439 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 436 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
440 sizeof(struct fsync_inode_entry), NULL); 437 sizeof(struct fsync_inode_entry));
441 if (!fsync_entry_slab) 438 if (!fsync_entry_slab)
442 return -ENOMEM; 439 return -ENOMEM;
443 440
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7caac5f2ca9e..085f548be7a3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h> 15#include <linux/prefetch.h>
16#include <linux/kthread.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
17#include <linux/swap.h> 18#include <linux/swap.h>
18 19
@@ -24,6 +25,7 @@
24#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
25 26
26static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *flush_cmd_slab;
27 29
28/* 30/*
29 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
195 f2fs_sync_fs(sbi->sb, true); 197 f2fs_sync_fs(sbi->sb, true);
196} 198}
197 199
200static int issue_flush_thread(void *data)
201{
202 struct f2fs_sb_info *sbi = data;
203 struct f2fs_sm_info *sm_i = SM_I(sbi);
204 wait_queue_head_t *q = &sm_i->flush_wait_queue;
205repeat:
206 if (kthread_should_stop())
207 return 0;
208
209 spin_lock(&sm_i->issue_lock);
210 if (sm_i->issue_list) {
211 sm_i->dispatch_list = sm_i->issue_list;
212 sm_i->issue_list = sm_i->issue_tail = NULL;
213 }
214 spin_unlock(&sm_i->issue_lock);
215
216 if (sm_i->dispatch_list) {
217 struct bio *bio = bio_alloc(GFP_NOIO, 0);
218 struct flush_cmd *cmd, *next;
219 int ret;
220
221 bio->bi_bdev = sbi->sb->s_bdev;
222 ret = submit_bio_wait(WRITE_FLUSH, bio);
223
224 for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
225 cmd->ret = ret;
226 next = cmd->next;
227 complete(&cmd->wait);
228 }
229 sm_i->dispatch_list = NULL;
230 }
231
232 wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
233 goto repeat;
234}
235
236int f2fs_issue_flush(struct f2fs_sb_info *sbi)
237{
238 struct f2fs_sm_info *sm_i = SM_I(sbi);
239 struct flush_cmd *cmd;
240 int ret;
241
242 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244
245 cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
246 cmd->next = NULL;
247 cmd->ret = 0;
248 init_completion(&cmd->wait);
249
250 spin_lock(&sm_i->issue_lock);
251 if (sm_i->issue_list)
252 sm_i->issue_tail->next = cmd;
253 else
254 sm_i->issue_list = cmd;
255 sm_i->issue_tail = cmd;
256 spin_unlock(&sm_i->issue_lock);
257
258 if (!sm_i->dispatch_list)
259 wake_up(&sm_i->flush_wait_queue);
260
261 wait_for_completion(&cmd->wait);
262 ret = cmd->ret;
263 kmem_cache_free(flush_cmd_slab, cmd);
264 return ret;
265}
266
198static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 267static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
199 enum dirty_type dirty_type) 268 enum dirty_type dirty_type)
200{ 269{
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
340void clear_prefree_segments(struct f2fs_sb_info *sbi) 409void clear_prefree_segments(struct f2fs_sb_info *sbi)
341{ 410{
342 struct list_head *head = &(SM_I(sbi)->discard_list); 411 struct list_head *head = &(SM_I(sbi)->discard_list);
343 struct list_head *this, *next; 412 struct discard_entry *entry, *this;
344 struct discard_entry *entry;
345 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 413 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
346 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 414 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
347 unsigned int total_segs = TOTAL_SEGS(sbi); 415 unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
370 mutex_unlock(&dirty_i->seglist_lock); 438 mutex_unlock(&dirty_i->seglist_lock);
371 439
372 /* send small discards */ 440 /* send small discards */
373 list_for_each_safe(this, next, head) { 441 list_for_each_entry_safe(entry, this, head, list) {
374 entry = list_entry(this, struct discard_entry, list);
375 f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 442 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
376 list_del(&entry->list); 443 list_del(&entry->list);
377 SM_I(sbi)->nr_discards -= entry->len; 444 SM_I(sbi)->nr_discards -= entry->len;
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
405 472
406 se = get_seg_entry(sbi, segno); 473 se = get_seg_entry(sbi, segno);
407 new_vblocks = se->valid_blocks + del; 474 new_vblocks = se->valid_blocks + del;
408 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); 475 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
409 476
410 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 477 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
411 (new_vblocks > sbi->blocks_per_seg))); 478 (new_vblocks > sbi->blocks_per_seg)));
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
434 get_sec_entry(sbi, segno)->valid_blocks += del; 501 get_sec_entry(sbi, segno)->valid_blocks += del;
435} 502}
436 503
437static void refresh_sit_entry(struct f2fs_sb_info *sbi, 504void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
438 block_t old_blkaddr, block_t new_blkaddr)
439{ 505{
440 update_sit_entry(sbi, new_blkaddr, 1); 506 update_sit_entry(sbi, new, 1);
441 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 507 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
442 update_sit_entry(sbi, old_blkaddr, -1); 508 update_sit_entry(sbi, old, -1);
509
510 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
511 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
443} 512}
444 513
445void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 514void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
881 950
882 stat_inc_block_count(sbi, curseg); 951 stat_inc_block_count(sbi, curseg);
883 952
953 if (!__has_curseg_space(sbi, type))
954 sit_i->s_ops->allocate_segment(sbi, type, false);
884 /* 955 /*
885 * SIT information should be updated before segment allocation, 956 * SIT information should be updated before segment allocation,
886 * since SSR needs latest valid block information. 957 * since SSR needs latest valid block information.
887 */ 958 */
888 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 959 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
889
890 if (!__has_curseg_space(sbi, type))
891 sit_i->s_ops->allocate_segment(sbi, type, false);
892
893 locate_dirty_segment(sbi, old_cursegno); 960 locate_dirty_segment(sbi, old_cursegno);
894 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 961
895 mutex_unlock(&sit_i->sentry_lock); 962 mutex_unlock(&sit_i->sentry_lock);
896 963
897 if (page && IS_NODESEG(type)) 964 if (page && IS_NODESEG(type))
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
987 change_curseg(sbi, type, true); 1054 change_curseg(sbi, type, true);
988 } 1055 }
989 1056
990 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1057 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
991 (sbi->blocks_per_seg - 1);
992 __add_sum_entry(sbi, type, sum); 1058 __add_sum_entry(sbi, type, sum);
993 1059
994 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1060 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
995
996 locate_dirty_segment(sbi, old_cursegno); 1061 locate_dirty_segment(sbi, old_cursegno);
997 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
998 1062
999 mutex_unlock(&sit_i->sentry_lock); 1063 mutex_unlock(&sit_i->sentry_lock);
1000 mutex_unlock(&curseg->curseg_mutex); 1064 mutex_unlock(&curseg->curseg_mutex);
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1028 curseg->next_segno = segno; 1092 curseg->next_segno = segno;
1029 change_curseg(sbi, type, true); 1093 change_curseg(sbi, type, true);
1030 } 1094 }
1031 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1095 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1032 (sbi->blocks_per_seg - 1);
1033 __add_sum_entry(sbi, type, sum); 1096 __add_sum_entry(sbi, type, sum);
1034 1097
1035 /* change the current log to the next block addr in advance */ 1098 /* change the current log to the next block addr in advance */
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1037 curseg->next_segno = next_segno; 1100 curseg->next_segno = next_segno;
1038 change_curseg(sbi, type, true); 1101 change_curseg(sbi, type, true);
1039 } 1102 }
1040 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & 1103 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1041 (sbi->blocks_per_seg - 1);
1042 1104
1043 /* rewrite node page */ 1105 /* rewrite node page */
1044 set_page_writeback(page); 1106 set_page_writeback(page);
1045 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); 1107 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1046 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1108 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1047 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1109 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1048
1049 locate_dirty_segment(sbi, old_cursegno); 1110 locate_dirty_segment(sbi, old_cursegno);
1050 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1051 1111
1052 mutex_unlock(&sit_i->sentry_lock); 1112 mutex_unlock(&sit_i->sentry_lock);
1053 mutex_unlock(&curseg->curseg_mutex); 1113 mutex_unlock(&curseg->curseg_mutex);
1054} 1114}
1055 1115
1116static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1117 struct page *page, enum page_type type)
1118{
1119 enum page_type btype = PAGE_TYPE_OF_BIO(type);
1120 struct f2fs_bio_info *io = &sbi->write_io[btype];
1121 struct bio_vec *bvec;
1122 int i;
1123
1124 down_read(&io->io_rwsem);
1125 if (!io->bio)
1126 goto out;
1127
1128 bio_for_each_segment_all(bvec, io->bio, i) {
1129 if (page == bvec->bv_page) {
1130 up_read(&io->io_rwsem);
1131 return true;
1132 }
1133 }
1134
1135out:
1136 up_read(&io->io_rwsem);
1137 return false;
1138}
1139
1056void f2fs_wait_on_page_writeback(struct page *page, 1140void f2fs_wait_on_page_writeback(struct page *page,
1057 enum page_type type) 1141 enum page_type type)
1058{ 1142{
1059 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1143 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1060 if (PageWriteback(page)) { 1144 if (PageWriteback(page)) {
1061 f2fs_submit_merged_bio(sbi, type, WRITE); 1145 if (is_merged_page(sbi, page, type))
1146 f2fs_submit_merged_bio(sbi, type, WRITE);
1062 wait_on_page_writeback(page); 1147 wait_on_page_writeback(page);
1063 } 1148 }
1064} 1149}
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1167 ns->ofs_in_node = 0; 1252 ns->ofs_in_node = 0;
1168 } 1253 }
1169 } else { 1254 } else {
1170 if (restore_node_summary(sbi, segno, sum)) { 1255 int err;
1256
1257 err = restore_node_summary(sbi, segno, sum);
1258 if (err) {
1171 f2fs_put_page(new, 1); 1259 f2fs_put_page(new, 1);
1172 return -EINVAL; 1260 return err;
1173 } 1261 }
1174 } 1262 }
1175 } 1263 }
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1190static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 1278static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1191{ 1279{
1192 int type = CURSEG_HOT_DATA; 1280 int type = CURSEG_HOT_DATA;
1281 int err;
1193 1282
1194 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { 1283 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1195 /* restore for compacted data summary */ 1284 /* restore for compacted data summary */
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1198 type = CURSEG_HOT_NODE; 1287 type = CURSEG_HOT_NODE;
1199 } 1288 }
1200 1289
1201 for (; type <= CURSEG_COLD_NODE; type++) 1290 for (; type <= CURSEG_COLD_NODE; type++) {
1202 if (read_normal_summaries(sbi, type)) 1291 err = read_normal_summaries(sbi, type);
1203 return -EINVAL; 1292 if (err)
1293 return err;
1294 }
1295
1204 return 0; 1296 return 0;
1205} 1297}
1206 1298
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1583 return restore_curseg_summaries(sbi); 1675 return restore_curseg_summaries(sbi);
1584} 1676}
1585 1677
1586static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
1587{
1588 struct address_space *mapping = META_MAPPING(sbi);
1589 struct page *page;
1590 block_t blk_addr, prev_blk_addr = 0;
1591 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1592 int blkno = start;
1593 struct f2fs_io_info fio = {
1594 .type = META,
1595 .rw = READ_SYNC | REQ_META | REQ_PRIO
1596 };
1597
1598 for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
1599
1600 blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
1601
1602 if (blkno != start && prev_blk_addr + 1 != blk_addr)
1603 break;
1604 prev_blk_addr = blk_addr;
1605repeat:
1606 page = grab_cache_page(mapping, blk_addr);
1607 if (!page) {
1608 cond_resched();
1609 goto repeat;
1610 }
1611 if (PageUptodate(page)) {
1612 mark_page_accessed(page);
1613 f2fs_put_page(page, 1);
1614 continue;
1615 }
1616
1617 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
1618
1619 mark_page_accessed(page);
1620 f2fs_put_page(page, 0);
1621 }
1622
1623 f2fs_submit_merged_bio(sbi, META, READ);
1624 return blkno - start;
1625}
1626
1627static void build_sit_entries(struct f2fs_sb_info *sbi) 1678static void build_sit_entries(struct f2fs_sb_info *sbi)
1628{ 1679{
1629 struct sit_info *sit_i = SIT_I(sbi); 1680 struct sit_info *sit_i = SIT_I(sbi);
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1635 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1686 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1636 1687
1637 do { 1688 do {
1638 readed = ra_sit_pages(sbi, start_blk, nrpages); 1689 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1639 1690
1640 start = start_blk * sit_i->sents_per_block; 1691 start = start_blk * sit_i->sents_per_block;
1641 end = (start_blk + readed) * sit_i->sents_per_block; 1692 end = (start_blk + readed) * sit_i->sents_per_block;
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1781{ 1832{
1782 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1833 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1783 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1834 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1835 dev_t dev = sbi->sb->s_bdev->bd_dev;
1784 struct f2fs_sm_info *sm_info; 1836 struct f2fs_sm_info *sm_info;
1785 int err; 1837 int err;
1786 1838
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1799 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 1851 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1800 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1852 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1801 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1853 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1802 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; 1854 sm_info->rec_prefree_segments = sm_info->main_segments *
1855 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
1803 sm_info->ipu_policy = F2FS_IPU_DISABLE; 1856 sm_info->ipu_policy = F2FS_IPU_DISABLE;
1804 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 1857 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
1805 1858
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1807 sm_info->nr_discards = 0; 1860 sm_info->nr_discards = 0;
1808 sm_info->max_discards = 0; 1861 sm_info->max_discards = 0;
1809 1862
1863 if (test_opt(sbi, FLUSH_MERGE)) {
1864 spin_lock_init(&sm_info->issue_lock);
1865 init_waitqueue_head(&sm_info->flush_wait_queue);
1866
1867 sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
1868 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
1869 if (IS_ERR(sm_info->f2fs_issue_flush))
1870 return PTR_ERR(sm_info->f2fs_issue_flush);
1871 }
1872
1810 err = build_sit_info(sbi); 1873 err = build_sit_info(sbi);
1811 if (err) 1874 if (err)
1812 return err; 1875 return err;
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1915 struct f2fs_sm_info *sm_info = SM_I(sbi); 1978 struct f2fs_sm_info *sm_info = SM_I(sbi);
1916 if (!sm_info) 1979 if (!sm_info)
1917 return; 1980 return;
1981 if (sm_info->f2fs_issue_flush)
1982 kthread_stop(sm_info->f2fs_issue_flush);
1918 destroy_dirty_segmap(sbi); 1983 destroy_dirty_segmap(sbi);
1919 destroy_curseg(sbi); 1984 destroy_curseg(sbi);
1920 destroy_free_segmap(sbi); 1985 destroy_free_segmap(sbi);
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1926int __init create_segment_manager_caches(void) 1991int __init create_segment_manager_caches(void)
1927{ 1992{
1928 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 1993 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1929 sizeof(struct discard_entry), NULL); 1994 sizeof(struct discard_entry));
1930 if (!discard_entry_slab) 1995 if (!discard_entry_slab)
1931 return -ENOMEM; 1996 return -ENOMEM;
1997 flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
1998 sizeof(struct flush_cmd));
1999 if (!flush_cmd_slab) {
2000 kmem_cache_destroy(discard_entry_slab);
2001 return -ENOMEM;
2002 }
1932 return 0; 2003 return 0;
1933} 2004}
1934 2005
1935void destroy_segment_manager_caches(void) 2006void destroy_segment_manager_caches(void)
1936{ 2007{
1937 kmem_cache_destroy(discard_entry_slab); 2008 kmem_cache_destroy(discard_entry_slab);
2009 kmem_cache_destroy(flush_cmd_slab);
1938} 2010}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5731682d7516..7091204680f4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,7 +14,7 @@
14#define NULL_SEGNO ((unsigned int)(~0)) 14#define NULL_SEGNO ((unsigned int)(~0))
15#define NULL_SECNO ((unsigned int)(~0)) 15#define NULL_SECNO ((unsigned int)(~0))
16 16
17#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ 17#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
18 18
19/* L: Logical segment # in volume, R: Relative segment # in main area */ 19/* L: Logical segment # in volume, R: Relative segment # in main area */
20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
@@ -57,6 +57,9 @@
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr) 57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
61 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
62
60#define GET_SEGNO(sbi, blk_addr) \ 63#define GET_SEGNO(sbi, blk_addr) \
61 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ 64 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
62 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ 65 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
377 380
378static inline block_t written_block_count(struct f2fs_sb_info *sbi) 381static inline block_t written_block_count(struct f2fs_sb_info *sbi)
379{ 382{
380 struct sit_info *sit_i = SIT_I(sbi); 383 return SIT_I(sbi)->written_valid_blocks;
381 block_t vblocks;
382
383 mutex_lock(&sit_i->sentry_lock);
384 vblocks = sit_i->written_valid_blocks;
385 mutex_unlock(&sit_i->sentry_lock);
386
387 return vblocks;
388} 384}
389 385
390static inline unsigned int free_segments(struct f2fs_sb_info *sbi) 386static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
391{ 387{
392 struct free_segmap_info *free_i = FREE_I(sbi); 388 return FREE_I(sbi)->free_segments;
393 unsigned int free_segs;
394
395 read_lock(&free_i->segmap_lock);
396 free_segs = free_i->free_segments;
397 read_unlock(&free_i->segmap_lock);
398
399 return free_segs;
400} 389}
401 390
402static inline int reserved_segments(struct f2fs_sb_info *sbi) 391static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
406 395
407static inline unsigned int free_sections(struct f2fs_sb_info *sbi) 396static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
408{ 397{
409 struct free_segmap_info *free_i = FREE_I(sbi); 398 return FREE_I(sbi)->free_sections;
410 unsigned int free_secs;
411
412 read_lock(&free_i->segmap_lock);
413 free_secs = free_i->free_sections;
414 read_unlock(&free_i->segmap_lock);
415
416 return free_secs;
417} 399}
418 400
419static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) 401static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
682 struct request_queue *q = bdev_get_queue(bdev); 664 struct request_queue *q = bdev_get_queue(bdev);
683 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 665 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
684} 666}
667
668/*
669 * It is very important to gather dirty pages and write at once, so that we can
670 * submit a big bio without interfering other data writes.
671 * By default, 512 pages for directory data,
672 * 512 pages (2MB) * 3 for three types of nodes, and
673 * max_bio_blocks for meta are set.
674 */
675static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
676{
677 if (type == DATA)
678 return sbi->blocks_per_seg;
679 else if (type == NODE)
680 return 3 * sbi->blocks_per_seg;
681 else if (type == META)
682 return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
683 else
684 return 0;
685}
686
687/*
688 * When writing pages, it'd better align nr_to_write for segment size.
689 */
690static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
691 struct writeback_control *wbc)
692{
693 long nr_to_write, desired;
694
695 if (wbc->sync_mode != WB_SYNC_NONE)
696 return 0;
697
698 nr_to_write = wbc->nr_to_write;
699
700 if (type == DATA)
701 desired = 4096;
702 else if (type == NODE)
703 desired = 3 * max_hw_blocks(sbi);
704 else
705 desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
706
707 wbc->nr_to_write = desired;
708 return desired - nr_to_write;
709}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 856bdf994c0a..c756923a7302 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@ enum {
51 Opt_disable_ext_identify, 51 Opt_disable_ext_identify,
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge,
54 Opt_err, 55 Opt_err,
55}; 56};
56 57
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
67 {Opt_disable_ext_identify, "disable_ext_identify"}, 68 {Opt_disable_ext_identify, "disable_ext_identify"},
68 {Opt_inline_xattr, "inline_xattr"}, 69 {Opt_inline_xattr, "inline_xattr"},
69 {Opt_inline_data, "inline_data"}, 70 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"},
70 {Opt_err, NULL}, 72 {Opt_err, NULL},
71}; 73};
72 74
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
74enum { 76enum {
75 GC_THREAD, /* struct f2fs_gc_thread */ 77 GC_THREAD, /* struct f2fs_gc_thread */
76 SM_INFO, /* struct f2fs_sm_info */ 78 SM_INFO, /* struct f2fs_sm_info */
79 NM_INFO, /* struct f2fs_nm_info */
77 F2FS_SBI, /* struct f2fs_sb_info */ 80 F2FS_SBI, /* struct f2fs_sb_info */
78}; 81};
79 82
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
92 return (unsigned char *)sbi->gc_thread; 95 return (unsigned char *)sbi->gc_thread;
93 else if (struct_type == SM_INFO) 96 else if (struct_type == SM_INFO)
94 return (unsigned char *)SM_I(sbi); 97 return (unsigned char *)SM_I(sbi);
98 else if (struct_type == NM_INFO)
99 return (unsigned char *)NM_I(sbi);
95 else if (struct_type == F2FS_SBI) 100 else if (struct_type == F2FS_SBI)
96 return (unsigned char *)sbi; 101 return (unsigned char *)sbi;
97 return NULL; 102 return NULL;
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
183F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 188F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
184F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 189F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
185F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
191F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
186F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 192F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
193F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
187 194
188#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 195#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
189static struct attribute *f2fs_attrs[] = { 196static struct attribute *f2fs_attrs[] = {
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
196 ATTR_LIST(ipu_policy), 203 ATTR_LIST(ipu_policy),
197 ATTR_LIST(min_ipu_util), 204 ATTR_LIST(min_ipu_util),
198 ATTR_LIST(max_victim_search), 205 ATTR_LIST(max_victim_search),
206 ATTR_LIST(dir_level),
207 ATTR_LIST(ram_thresh),
199 NULL, 208 NULL,
200}; 209};
201 210
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
256 265
257 if (!name) 266 if (!name)
258 return -ENOMEM; 267 return -ENOMEM;
259 if (!strncmp(name, "on", 2)) 268 if (strlen(name) == 2 && !strncmp(name, "on", 2))
260 set_opt(sbi, BG_GC); 269 set_opt(sbi, BG_GC);
261 else if (!strncmp(name, "off", 3)) 270 else if (strlen(name) == 3 && !strncmp(name, "off", 3))
262 clear_opt(sbi, BG_GC); 271 clear_opt(sbi, BG_GC);
263 else { 272 else {
264 kfree(name); 273 kfree(name);
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
327 case Opt_inline_data: 336 case Opt_inline_data:
328 set_opt(sbi, INLINE_DATA); 337 set_opt(sbi, INLINE_DATA);
329 break; 338 break;
339 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE);
341 break;
330 default: 342 default:
331 f2fs_msg(sb, KERN_ERR, 343 f2fs_msg(sb, KERN_ERR,
332 "Unrecognized mount option \"%s\" or missing value", 344 "Unrecognized mount option \"%s\" or missing value",
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
353 fi->i_current_depth = 1; 365 fi->i_current_depth = 1;
354 fi->i_advise = 0; 366 fi->i_advise = 0;
355 rwlock_init(&fi->ext.ext_lock); 367 rwlock_init(&fi->ext.ext_lock);
368 init_rwsem(&fi->i_sem);
356 369
357 set_inode_flag(fi, FI_NEW_INODE); 370 set_inode_flag(fi, FI_NEW_INODE);
358 371
359 if (test_opt(F2FS_SB(sb), INLINE_XATTR)) 372 if (test_opt(F2FS_SB(sb), INLINE_XATTR))
360 set_inode_flag(fi, FI_INLINE_XATTR); 373 set_inode_flag(fi, FI_INLINE_XATTR);
361 374
375 /* Will be used by directory only */
376 fi->i_dir_level = F2FS_SB(sb)->dir_level;
377
362 return &fi->vfs_inode; 378 return &fi->vfs_inode;
363} 379}
364 380
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
526 seq_puts(seq, ",disable_ext_identify"); 542 seq_puts(seq, ",disable_ext_identify");
527 if (test_opt(sbi, INLINE_DATA)) 543 if (test_opt(sbi, INLINE_DATA))
528 seq_puts(seq, ",inline_data"); 544 seq_puts(seq, ",inline_data");
545 if (test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge");
529 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 547 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
530 548
531 return 0; 549 return 0;
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
539 le32_to_cpu(sbi->raw_super->segment_count_main); 557 le32_to_cpu(sbi->raw_super->segment_count_main);
540 int i; 558 int i;
541 559
560 seq_puts(seq, "format: segment_type|valid_blocks\n"
561 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
562
542 for (i = 0; i < total_segs; i++) { 563 for (i = 0; i < total_segs; i++) {
543 seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); 564 struct seg_entry *se = get_seg_entry(sbi, i);
544 if (i != 0 && (i % 10) == 0) 565
545 seq_puts(seq, "\n"); 566 if ((i % 10) == 0)
567 seq_printf(seq, "%-5d", i);
568 seq_printf(seq, "%d|%-3u", se->type,
569 get_valid_blocks(sbi, i, 1));
570 if ((i % 10) == 9 || i == (total_segs - 1))
571 seq_putc(seq, '\n');
546 else 572 else
547 seq_puts(seq, " "); 573 seq_putc(seq, ' ');
548 } 574 }
575
549 return 0; 576 return 0;
550} 577}
551 578
@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
640 667
641 if (unlikely(ino < F2FS_ROOT_INO(sbi))) 668 if (unlikely(ino < F2FS_ROOT_INO(sbi)))
642 return ERR_PTR(-ESTALE); 669 return ERR_PTR(-ESTALE);
670 if (unlikely(ino >= NM_I(sbi)->max_nid))
671 return ERR_PTR(-ESTALE);
643 672
644 /* 673 /*
645 * f2fs_iget isn't quite right if the inode is currently unallocated! 674 * f2fs_iget isn't quite right if the inode is currently unallocated!
@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
787 816
788 for (i = 0; i < NR_COUNT_TYPE; i++) 817 for (i = 0; i < NR_COUNT_TYPE; i++)
789 atomic_set(&sbi->nr_pages[i], 0); 818 atomic_set(&sbi->nr_pages[i], 0);
819
820 sbi->dir_level = DEF_DIR_LEVEL;
790} 821}
791 822
792/* 823/*
@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
898 sbi->por_doing = false; 929 sbi->por_doing = false;
899 spin_lock_init(&sbi->stat_lock); 930 spin_lock_init(&sbi->stat_lock);
900 931
901 mutex_init(&sbi->read_io.io_mutex); 932 init_rwsem(&sbi->read_io.io_rwsem);
902 sbi->read_io.sbi = sbi; 933 sbi->read_io.sbi = sbi;
903 sbi->read_io.bio = NULL; 934 sbi->read_io.bio = NULL;
904 for (i = 0; i < NR_PAGE_TYPE; i++) { 935 for (i = 0; i < NR_PAGE_TYPE; i++) {
905 mutex_init(&sbi->write_io[i].io_mutex); 936 init_rwsem(&sbi->write_io[i].io_rwsem);
906 sbi->write_io[i].sbi = sbi; 937 sbi->write_io[i].sbi = sbi;
907 sbi->write_io[i].bio = NULL; 938 sbi->write_io[i].bio = NULL;
908 } 939 }
@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
991 goto free_root_inode; 1022 goto free_root_inode;
992 } 1023 }
993 1024
994 /* recover fsynced data */
995 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
996 err = recover_fsync_data(sbi);
997 if (err)
998 f2fs_msg(sb, KERN_ERR,
999 "Cannot recover all fsync data errno=%ld", err);
1000 }
1001
1002 /*
1003 * If filesystem is not mounted as read-only then
1004 * do start the gc_thread.
1005 */
1006 if (!(sb->s_flags & MS_RDONLY)) {
1007 /* After POR, we can run background GC thread.*/
1008 err = start_gc_thread(sbi);
1009 if (err)
1010 goto free_gc;
1011 }
1012
1013 err = f2fs_build_stats(sbi); 1025 err = f2fs_build_stats(sbi);
1014 if (err) 1026 if (err)
1015 goto free_gc; 1027 goto free_root_inode;
1016 1028
1017 if (f2fs_proc_root) 1029 if (f2fs_proc_root)
1018 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1030 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, 1046 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
1035 "%s", sb->s_id); 1047 "%s", sb->s_id);
1036 if (err) 1048 if (err)
1037 goto fail; 1049 goto free_proc;
1050
1051 /* recover fsynced data */
1052 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1053 err = recover_fsync_data(sbi);
1054 if (err)
1055 f2fs_msg(sb, KERN_ERR,
1056 "Cannot recover all fsync data errno=%ld", err);
1057 }
1038 1058
1059 /*
1060 * If filesystem is not mounted as read-only then
1061 * do start the gc_thread.
1062 */
1063 if (!(sb->s_flags & MS_RDONLY)) {
1064 /* After POR, we can run background GC thread.*/
1065 err = start_gc_thread(sbi);
1066 if (err)
1067 goto free_kobj;
1068 }
1039 return 0; 1069 return 0;
1040fail: 1070
1071free_kobj:
1072 kobject_del(&sbi->s_kobj);
1073free_proc:
1041 if (sbi->s_proc) { 1074 if (sbi->s_proc) {
1042 remove_proc_entry("segment_info", sbi->s_proc); 1075 remove_proc_entry("segment_info", sbi->s_proc);
1043 remove_proc_entry(sb->s_id, f2fs_proc_root); 1076 remove_proc_entry(sb->s_id, f2fs_proc_root);
1044 } 1077 }
1045 f2fs_destroy_stats(sbi); 1078 f2fs_destroy_stats(sbi);
1046free_gc:
1047 stop_gc_thread(sbi);
1048free_root_inode: 1079free_root_inode:
1049 dput(sb->s_root); 1080 dput(sb->s_root);
1050 sb->s_root = NULL; 1081 sb->s_root = NULL;
@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
1084static int __init init_inodecache(void) 1115static int __init init_inodecache(void)
1085{ 1116{
1086 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 1117 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
1087 sizeof(struct f2fs_inode_info), NULL); 1118 sizeof(struct f2fs_inode_info));
1088 if (!f2fs_inode_cachep) 1119 if (!f2fs_inode_cachep)
1089 return -ENOMEM; 1120 return -ENOMEM;
1090 return 0; 1121 return 0;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89d0422a91a8..503c2451131e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
275 275
276 inline_size = inline_xattr_size(inode); 276 inline_size = inline_xattr_size(inode);
277 277
278 txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); 278 txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
279 if (!txattr_addr) 279 if (!txattr_addr)
280 return NULL; 280 return NULL;
281 281
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
407 if (name == NULL) 407 if (name == NULL)
408 return -EINVAL; 408 return -EINVAL;
409 name_len = strlen(name); 409 name_len = strlen(name);
410 if (name_len > F2FS_NAME_LEN)
411 return -ERANGE;
410 412
411 base_addr = read_all_xattrs(inode, NULL); 413 base_addr = read_all_xattrs(inode, NULL);
412 if (!base_addr) 414 if (!base_addr)
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
590 f2fs_balance_fs(sbi); 592 f2fs_balance_fs(sbi);
591 593
592 f2fs_lock_op(sbi); 594 f2fs_lock_op(sbi);
595 /* protect xattr_ver */
596 down_write(&F2FS_I(inode)->i_sem);
593 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); 597 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
598 up_write(&F2FS_I(inode)->i_sem);
594 f2fs_unlock_op(sbi); 599 f2fs_unlock_op(sbi);
595 600
596 return err; 601 return err;