aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 22:11:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-09 22:11:44 -0400
commit64b2d1fbbfda07765dae3f601862796a61b2c451 (patch)
tree67947ede8fc007a9f0925e697a302a02bd087032
parentb1cce8032f6abe900b078d24f3c3938726528f97 (diff)
parent9ab701349247368f9d57a993b95a5bb05bb37e10 (diff)
Merge tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, there is no special interesting feature, but we've investigated a couple of tuning points with respect to the I/O flow. Several major bug fixes and a bunch of clean-ups also have been made. This patch-set includes the following major enhancement patches: - enhance wait_on_page_writeback - support SEEK_DATA and SEEK_HOLE - enhance readahead flows - enhance IO flushes - support fiemap - add some tracepoints The other bug fixes are as follows: - fix to support a large volume > 2TB correctly - recovery bug fix wrt fallocated space - fix recursive lock on xattr operations - fix some cases on the remount flow And, there are a bunch of cleanups" * tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits) f2fs: support f2fs_fiemap f2fs: avoid not to call remove_dirty_inode f2fs: recover fallocated space f2fs: fix to recover data written by dio f2fs: large volume support f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages f2fs: avoid overflow when large directory feathure is enabled f2fs: fix recursive lock by f2fs_setxattr MAINTAINERS: add a co-maintainer from samsung for F2FS MAINTAINERS: change the email address for f2fs f2fs: use inode_init_owner() to simplify codes f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency f2fs: add a tracepoint for f2fs_read_data_page f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page f2fs: add a tracepoint for f2fs_write_end f2fs: add a tracepoint for f2fs_write_begin f2fs: fix checkpatch warning f2fs: deactivate inode page if the inode is evicted f2fs: decrease the lock granularity during write_begin ...
-rw-r--r--Documentation/filesystems/f2fs.txt8
-rw-r--r--MAINTAINERS3
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/checkpoint.c118
-rw-r--r--fs/f2fs/data.c63
-rw-r--r--fs/f2fs/dir.c12
-rw-r--r--fs/f2fs/f2fs.h49
-rw-r--r--fs/f2fs/file.c139
-rw-r--r--fs/f2fs/inline.c40
-rw-r--r--fs/f2fs/inode.c18
-rw-r--r--fs/f2fs/namei.c11
-rw-r--r--fs/f2fs/node.c154
-rw-r--r--fs/f2fs/node.h19
-rw-r--r--fs/f2fs/recovery.c35
-rw-r--r--fs/f2fs/segment.c134
-rw-r--r--fs/f2fs/super.c32
-rw-r--r--fs/f2fs/xattr.c110
-rw-r--r--fs/f2fs/xattr.h8
-rw-r--r--include/linux/f2fs_fs.h8
-rw-r--r--include/trace/events/f2fs.h146
20 files changed, 797 insertions, 312 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 25311e113e75..51afba17bbae 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -461,11 +461,11 @@ The number of blocks and buckets are determined by,
461 # of blocks in level #n = | 461 # of blocks in level #n = |
462 `- 4, Otherwise 462 `- 4, Otherwise
463 463
464 ,- 2^ (n + dir_level), 464 ,- 2^(n + dir_level),
465 | if n < MAX_DIR_HASH_DEPTH / 2, 465 | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
466 # of buckets in level #n = | 466 # of buckets in level #n = |
467 `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1), 467 `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
468 Otherwise 468 Otherwise
469 469
470When F2FS finds a file name in a directory, at first a hash value of the file 470When F2FS finds a file name in a directory, at first a hash value of the file
471name is calculated. Then, F2FS scans the hash table in level #0 to find the 471name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/MAINTAINERS b/MAINTAINERS
index a1f4b576628a..948379508e44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3792,7 +3792,8 @@ F: fs/fscache/
3792F: include/linux/fscache*.h 3792F: include/linux/fscache*.h
3793 3793
3794F2FS FILE SYSTEM 3794F2FS FILE SYSTEM
3795M: Jaegeuk Kim <jaegeuk.kim@samsung.com> 3795M: Jaegeuk Kim <jaegeuk@kernel.org>
3796M: Changman Lee <cm224.lee@samsung.com>
3796L: linux-f2fs-devel@lists.sourceforge.net 3797L: linux-f2fs-devel@lists.sourceforge.net
3797W: http://en.wikipedia.org/wiki/F2FS 3798W: http://en.wikipedia.org/wiki/F2FS
3798T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 3799T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e93e4ec7d165..dbe2141d10ad 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
240 } 240 }
241 } 241 }
242 242
243 error = f2fs_setxattr(inode, name_index, "", value, size, ipage); 243 error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
244 244
245 kfree(value); 245 kfree(value);
246 if (!error) 246 if (!error)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c405b8f17054..0b4710c1d370 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,12 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
33 struct address_space *mapping = META_MAPPING(sbi); 33 struct address_space *mapping = META_MAPPING(sbi);
34 struct page *page = NULL; 34 struct page *page = NULL;
35repeat: 35repeat:
36 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 36 page = grab_cache_page(mapping, index);
37 if (!page) { 37 if (!page) {
38 cond_resched(); 38 cond_resched();
39 goto repeat; 39 goto repeat;
40 } 40 }
41 41 f2fs_wait_on_page_writeback(page, META);
42 SetPageUptodate(page); 42 SetPageUptodate(page);
43 return page; 43 return page;
44} 44}
@@ -72,7 +72,7 @@ out:
72 return page; 72 return page;
73} 73}
74 74
75inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) 75static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
76{ 76{
77 switch (type) { 77 switch (type) {
78 case META_NAT: 78 case META_NAT:
@@ -154,6 +154,8 @@ static int f2fs_write_meta_page(struct page *page,
154 struct inode *inode = page->mapping->host; 154 struct inode *inode = page->mapping->host;
155 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 155 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
156 156
157 trace_f2fs_writepage(page, META);
158
157 if (unlikely(sbi->por_doing)) 159 if (unlikely(sbi->por_doing))
158 goto redirty_out; 160 goto redirty_out;
159 if (wbc->for_reclaim) 161 if (wbc->for_reclaim)
@@ -171,10 +173,7 @@ no_write:
171 return 0; 173 return 0;
172 174
173redirty_out: 175redirty_out:
174 dec_page_count(sbi, F2FS_DIRTY_META); 176 redirty_page_for_writepage(wbc, page);
175 wbc->pages_skipped++;
176 account_page_redirty(page);
177 set_page_dirty(page);
178 return AOP_WRITEPAGE_ACTIVATE; 177 return AOP_WRITEPAGE_ACTIVATE;
179} 178}
180 179
@@ -184,6 +183,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
184 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 183 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
185 long diff, written; 184 long diff, written;
186 185
186 trace_f2fs_writepages(mapping->host, wbc, META);
187
187 /* collect a number of dirty meta pages and write together */ 188 /* collect a number of dirty meta pages and write together */
188 if (wbc->for_kupdate || 189 if (wbc->for_kupdate ||
189 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) 190 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
@@ -367,7 +368,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
367 return; 368 return;
368 369
369 sbi->por_doing = true; 370 sbi->por_doing = true;
370 start_blk = __start_cp_addr(sbi) + 1; 371
372 start_blk = __start_cp_addr(sbi) + 1 +
373 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
371 orphan_blkaddr = __start_sum_addr(sbi) - 1; 374 orphan_blkaddr = __start_sum_addr(sbi) - 1;
372 375
373 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); 376 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -508,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
508 unsigned long blk_size = sbi->blocksize; 511 unsigned long blk_size = sbi->blocksize;
509 unsigned long long cp1_version = 0, cp2_version = 0; 512 unsigned long long cp1_version = 0, cp2_version = 0;
510 unsigned long long cp_start_blk_no; 513 unsigned long long cp_start_blk_no;
514 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
515 block_t cp_blk_no;
516 int i;
511 517
512 sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); 518 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
513 if (!sbi->ckpt) 519 if (!sbi->ckpt)
514 return -ENOMEM; 520 return -ENOMEM;
515 /* 521 /*
@@ -540,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
540 cp_block = (struct f2fs_checkpoint *)page_address(cur_page); 546 cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
541 memcpy(sbi->ckpt, cp_block, blk_size); 547 memcpy(sbi->ckpt, cp_block, blk_size);
542 548
549 if (cp_blks <= 1)
550 goto done;
551
552 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
553 if (cur_page == cp2)
554 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
555
556 for (i = 1; i < cp_blks; i++) {
557 void *sit_bitmap_ptr;
558 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
559
560 cur_page = get_meta_page(sbi, cp_blk_no + i);
561 sit_bitmap_ptr = page_address(cur_page);
562 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
563 f2fs_put_page(cur_page, 1);
564 }
565done:
543 f2fs_put_page(cp1, 1); 566 f2fs_put_page(cp1, 1);
544 f2fs_put_page(cp2, 1); 567 f2fs_put_page(cp2, 1);
545 return 0; 568 return 0;
@@ -552,14 +575,13 @@ fail_no_cp:
552static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 575static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
553{ 576{
554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 577 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
555 struct list_head *head = &sbi->dir_inode_list;
556 struct dir_inode_entry *entry;
557 578
558 list_for_each_entry(entry, head, list) 579 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
559 if (unlikely(entry->inode == inode)) 580 return -EEXIST;
560 return -EEXIST;
561 581
562 list_add_tail(&new->list, head); 582 set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
583 F2FS_I(inode)->dirty_dir = new;
584 list_add_tail(&new->list, &sbi->dir_inode_list);
563 stat_inc_dirty_dir(sbi); 585 stat_inc_dirty_dir(sbi);
564 return 0; 586 return 0;
565} 587}
@@ -608,31 +630,26 @@ void add_dirty_dir_inode(struct inode *inode)
608void remove_dirty_dir_inode(struct inode *inode) 630void remove_dirty_dir_inode(struct inode *inode)
609{ 631{
610 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 632 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
611 struct list_head *head;
612 struct dir_inode_entry *entry; 633 struct dir_inode_entry *entry;
613 634
614 if (!S_ISDIR(inode->i_mode)) 635 if (!S_ISDIR(inode->i_mode))
615 return; 636 return;
616 637
617 spin_lock(&sbi->dir_inode_lock); 638 spin_lock(&sbi->dir_inode_lock);
618 if (get_dirty_dents(inode)) { 639 if (get_dirty_dents(inode) ||
640 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
619 spin_unlock(&sbi->dir_inode_lock); 641 spin_unlock(&sbi->dir_inode_lock);
620 return; 642 return;
621 } 643 }
622 644
623 head = &sbi->dir_inode_list; 645 entry = F2FS_I(inode)->dirty_dir;
624 list_for_each_entry(entry, head, list) { 646 list_del(&entry->list);
625 if (entry->inode == inode) { 647 F2FS_I(inode)->dirty_dir = NULL;
626 list_del(&entry->list); 648 clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
627 stat_dec_dirty_dir(sbi); 649 stat_dec_dirty_dir(sbi);
628 spin_unlock(&sbi->dir_inode_lock);
629 kmem_cache_free(inode_entry_slab, entry);
630 goto done;
631 }
632 }
633 spin_unlock(&sbi->dir_inode_lock); 650 spin_unlock(&sbi->dir_inode_lock);
651 kmem_cache_free(inode_entry_slab, entry);
634 652
635done:
636 /* Only from the recovery routine */ 653 /* Only from the recovery routine */
637 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 654 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
638 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 655 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -640,26 +657,6 @@ done:
640 } 657 }
641} 658}
642 659
643struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
644{
645
646 struct list_head *head;
647 struct inode *inode = NULL;
648 struct dir_inode_entry *entry;
649
650 spin_lock(&sbi->dir_inode_lock);
651
652 head = &sbi->dir_inode_list;
653 list_for_each_entry(entry, head, list) {
654 if (entry->inode->i_ino == ino) {
655 inode = entry->inode;
656 break;
657 }
658 }
659 spin_unlock(&sbi->dir_inode_lock);
660 return inode;
661}
662
663void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 660void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
664{ 661{
665 struct list_head *head; 662 struct list_head *head;
@@ -758,6 +755,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
758 __u32 crc32 = 0; 755 __u32 crc32 = 0;
759 void *kaddr; 756 void *kaddr;
760 int i; 757 int i;
758 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
759
760 /*
761 * This avoids to conduct wrong roll-forward operations and uses
762 * metapages, so should be called prior to sync_meta_pages below.
763 */
764 discard_next_dnode(sbi);
761 765
762 /* Flush all the NAT/SIT pages */ 766 /* Flush all the NAT/SIT pages */
763 while (get_pages(sbi, F2FS_DIRTY_META)) 767 while (get_pages(sbi, F2FS_DIRTY_META))
@@ -802,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
802 806
803 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) 807 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
804 / F2FS_ORPHANS_PER_BLOCK; 808 / F2FS_ORPHANS_PER_BLOCK;
805 ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); 809 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
810 orphan_blocks);
806 811
807 if (is_umount) { 812 if (is_umount) {
808 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 813 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
809 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 814 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
810 data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); 815 cp_payload_blks + data_sum_blocks +
816 orphan_blocks + NR_CURSEG_NODE_TYPE);
811 } else { 817 } else {
812 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 818 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
813 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 819 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
814 data_sum_blocks + orphan_blocks); 820 cp_payload_blks + data_sum_blocks +
821 orphan_blocks);
815 } 822 }
816 823
817 if (sbi->n_orphans) 824 if (sbi->n_orphans)
@@ -837,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
837 set_page_dirty(cp_page); 844 set_page_dirty(cp_page);
838 f2fs_put_page(cp_page, 1); 845 f2fs_put_page(cp_page, 1);
839 846
847 for (i = 1; i < 1 + cp_payload_blks; i++) {
848 cp_page = grab_meta_page(sbi, start_blk++);
849 kaddr = page_address(cp_page);
850 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
851 (1 << sbi->log_blocksize));
852 set_page_dirty(cp_page);
853 f2fs_put_page(cp_page, 1);
854 }
855
840 if (sbi->n_orphans) { 856 if (sbi->n_orphans) {
841 write_orphan_inodes(sbi, start_blk); 857 write_orphan_inodes(sbi, start_blk);
842 start_blk += orphan_blocks; 858 start_blk += orphan_blocks;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45abd60e2bff..c1fb6dd10911 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -417,7 +417,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
417 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 417 if (unlikely(dn.data_blkaddr == NEW_ADDR))
418 return ERR_PTR(-EINVAL); 418 return ERR_PTR(-EINVAL);
419 419
420 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 420 page = grab_cache_page(mapping, index);
421 if (!page) 421 if (!page)
422 return ERR_PTR(-ENOMEM); 422 return ERR_PTR(-ENOMEM);
423 423
@@ -455,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
455 int err; 455 int err;
456 456
457repeat: 457repeat:
458 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 458 page = grab_cache_page(mapping, index);
459 if (!page) 459 if (!page)
460 return ERR_PTR(-ENOMEM); 460 return ERR_PTR(-ENOMEM);
461 461
@@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
652 goto put_out; 652 goto put_out;
653 } 653 }
654 654
655 end_offset = IS_INODE(dn.node_page) ? 655 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
656 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
657 bh_result->b_size = (((size_t)1) << blkbits); 656 bh_result->b_size = (((size_t)1) << blkbits);
658 dn.ofs_in_node++; 657 dn.ofs_in_node++;
659 pgofs++; 658 pgofs++;
@@ -675,8 +674,7 @@ get_next:
675 if (dn.data_blkaddr == NEW_ADDR) 674 if (dn.data_blkaddr == NEW_ADDR)
676 goto put_out; 675 goto put_out;
677 676
678 end_offset = IS_INODE(dn.node_page) ? 677 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
679 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
680 } 678 }
681 679
682 if (maxblocks > (bh_result->b_size >> blkbits)) { 680 if (maxblocks > (bh_result->b_size >> blkbits)) {
@@ -710,11 +708,19 @@ out:
710 return err; 708 return err;
711} 709}
712 710
711int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
712 u64 start, u64 len)
713{
714 return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
715}
716
713static int f2fs_read_data_page(struct file *file, struct page *page) 717static int f2fs_read_data_page(struct file *file, struct page *page)
714{ 718{
715 struct inode *inode = page->mapping->host; 719 struct inode *inode = page->mapping->host;
716 int ret; 720 int ret;
717 721
722 trace_f2fs_readpage(page, DATA);
723
718 /* If the file has inline data, try to read it directlly */ 724 /* If the file has inline data, try to read it directlly */
719 if (f2fs_has_inline_data(inode)) 725 if (f2fs_has_inline_data(inode))
720 ret = f2fs_read_inline_data(inode, page); 726 ret = f2fs_read_inline_data(inode, page);
@@ -790,6 +796,8 @@ static int f2fs_write_data_page(struct page *page,
790 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 796 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
791 }; 797 };
792 798
799 trace_f2fs_writepage(page, DATA);
800
793 if (page->index < end_index) 801 if (page->index < end_index)
794 goto write; 802 goto write;
795 803
@@ -798,10 +806,8 @@ static int f2fs_write_data_page(struct page *page,
798 * this page does not have to be written to disk. 806 * this page does not have to be written to disk.
799 */ 807 */
800 offset = i_size & (PAGE_CACHE_SIZE - 1); 808 offset = i_size & (PAGE_CACHE_SIZE - 1);
801 if ((page->index >= end_index + 1) || !offset) { 809 if ((page->index >= end_index + 1) || !offset)
802 inode_dec_dirty_dents(inode);
803 goto out; 810 goto out;
804 }
805 811
806 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 812 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
807write: 813write:
@@ -810,7 +816,6 @@ write:
810 816
811 /* Dentry blocks are controlled by checkpoint */ 817 /* Dentry blocks are controlled by checkpoint */
812 if (S_ISDIR(inode->i_mode)) { 818 if (S_ISDIR(inode->i_mode)) {
813 inode_dec_dirty_dents(inode);
814 err = do_write_data_page(page, &fio); 819 err = do_write_data_page(page, &fio);
815 goto done; 820 goto done;
816 } 821 }
@@ -832,15 +837,16 @@ done:
832 837
833 clear_cold_data(page); 838 clear_cold_data(page);
834out: 839out:
840 inode_dec_dirty_dents(inode);
835 unlock_page(page); 841 unlock_page(page);
836 if (need_balance_fs) 842 if (need_balance_fs)
837 f2fs_balance_fs(sbi); 843 f2fs_balance_fs(sbi);
844 if (wbc->for_reclaim)
845 f2fs_submit_merged_bio(sbi, DATA, WRITE);
838 return 0; 846 return 0;
839 847
840redirty_out: 848redirty_out:
841 wbc->pages_skipped++; 849 redirty_page_for_writepage(wbc, page);
842 account_page_redirty(page);
843 set_page_dirty(page);
844 return AOP_WRITEPAGE_ACTIVATE; 850 return AOP_WRITEPAGE_ACTIVATE;
845} 851}
846 852
@@ -862,12 +868,15 @@ static int f2fs_write_data_pages(struct address_space *mapping,
862 int ret; 868 int ret;
863 long diff; 869 long diff;
864 870
871 trace_f2fs_writepages(mapping->host, wbc, DATA);
872
865 /* deal with chardevs and other special file */ 873 /* deal with chardevs and other special file */
866 if (!mapping->a_ops->writepage) 874 if (!mapping->a_ops->writepage)
867 return 0; 875 return 0;
868 876
869 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 877 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
870 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA)) 878 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
879 available_free_memory(sbi, DIRTY_DENTS))
871 goto skip_write; 880 goto skip_write;
872 881
873 diff = nr_pages_to_write(sbi, DATA, wbc); 882 diff = nr_pages_to_write(sbi, DATA, wbc);
@@ -903,6 +912,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
903 struct dnode_of_data dn; 912 struct dnode_of_data dn;
904 int err = 0; 913 int err = 0;
905 914
915 trace_f2fs_write_begin(inode, pos, len, flags);
916
906 f2fs_balance_fs(sbi); 917 f2fs_balance_fs(sbi);
907repeat: 918repeat:
908 err = f2fs_convert_inline_data(inode, pos + len); 919 err = f2fs_convert_inline_data(inode, pos + len);
@@ -912,6 +923,10 @@ repeat:
912 page = grab_cache_page_write_begin(mapping, index, flags); 923 page = grab_cache_page_write_begin(mapping, index, flags);
913 if (!page) 924 if (!page)
914 return -ENOMEM; 925 return -ENOMEM;
926
927 /* to avoid latency during memory pressure */
928 unlock_page(page);
929
915 *pagep = page; 930 *pagep = page;
916 931
917 if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) 932 if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
@@ -923,10 +938,18 @@ repeat:
923 f2fs_unlock_op(sbi); 938 f2fs_unlock_op(sbi);
924 939
925 if (err) { 940 if (err) {
926 f2fs_put_page(page, 1); 941 f2fs_put_page(page, 0);
927 return err; 942 return err;
928 } 943 }
929inline_data: 944inline_data:
945 lock_page(page);
946 if (unlikely(page->mapping != mapping)) {
947 f2fs_put_page(page, 1);
948 goto repeat;
949 }
950
951 f2fs_wait_on_page_writeback(page, DATA);
952
930 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 953 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
931 return 0; 954 return 0;
932 955
@@ -978,6 +1001,8 @@ static int f2fs_write_end(struct file *file,
978{ 1001{
979 struct inode *inode = page->mapping->host; 1002 struct inode *inode = page->mapping->host;
980 1003
1004 trace_f2fs_write_end(inode, pos, len, copied);
1005
981 SetPageUptodate(page); 1006 SetPageUptodate(page);
982 set_page_dirty(page); 1007 set_page_dirty(page);
983 1008
@@ -1022,6 +1047,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1022 if (check_direct_IO(inode, rw, iov, offset, nr_segs)) 1047 if (check_direct_IO(inode, rw, iov, offset, nr_segs))
1023 return 0; 1048 return 0;
1024 1049
1050 /* clear fsync mark to recover these blocks */
1051 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1052
1025 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1053 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1026 get_data_block); 1054 get_data_block);
1027} 1055}
@@ -1061,6 +1089,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
1061 1089
1062static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1090static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1063{ 1091{
1092 struct inode *inode = mapping->host;
1093
1094 if (f2fs_has_inline_data(inode))
1095 return 0;
1096
1064 return generic_block_bmap(mapping, block, get_data_block); 1097 return generic_block_bmap(mapping, block, get_data_block);
1065} 1098}
1066 1099
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 972fd0ef230f..966acb039e3b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode)
23 23
24static unsigned int dir_buckets(unsigned int level, int dir_level) 24static unsigned int dir_buckets(unsigned int level, int dir_level)
25{ 25{
26 if (level < MAX_DIR_HASH_DEPTH / 2) 26 if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
27 return 1 << (level + dir_level); 27 return 1 << (level + dir_level);
28 else 28 else
29 return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); 29 return MAX_DIR_BUCKETS;
30} 30}
31 31
32static unsigned int bucket_blocks(unsigned int level) 32static unsigned int bucket_blocks(unsigned int level)
@@ -268,6 +268,8 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
268{ 268{
269 struct f2fs_inode *ri; 269 struct f2fs_inode *ri;
270 270
271 f2fs_wait_on_page_writeback(ipage, NODE);
272
271 /* copy name info. to this inode page */ 273 /* copy name info. to this inode page */
272 ri = F2FS_INODE(ipage); 274 ri = F2FS_INODE(ipage);
273 ri->i_namelen = cpu_to_le32(name->len); 275 ri->i_namelen = cpu_to_le32(name->len);
@@ -637,11 +639,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
637 struct f2fs_dentry_block *dentry_blk = NULL; 639 struct f2fs_dentry_block *dentry_blk = NULL;
638 struct f2fs_dir_entry *de = NULL; 640 struct f2fs_dir_entry *de = NULL;
639 struct page *dentry_page = NULL; 641 struct page *dentry_page = NULL;
642 struct file_ra_state *ra = &file->f_ra;
640 unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); 643 unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
641 unsigned char d_type = DT_UNKNOWN; 644 unsigned char d_type = DT_UNKNOWN;
642 645
643 bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); 646 bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
644 647
648 /* readahead for multi pages of dir */
649 if (npages - n > 1 && !ra_has_index(ra, n))
650 page_cache_sync_readahead(inode->i_mapping, ra, file, n,
651 min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
652
645 for (; n < npages; n++) { 653 for (; n < npages; n++) {
646 dentry_page = get_lock_data_page(inode, n); 654 dentry_page = get_lock_data_page(inode, n);
647 if (IS_ERR(dentry_page)) 655 if (IS_ERR(dentry_page))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2ecac8312359..e51c732b0dd9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -182,6 +182,8 @@ enum {
182 182
183#define F2FS_LINK_MAX 32000 /* maximum link count per file */ 183#define F2FS_LINK_MAX 32000 /* maximum link count per file */
184 184
185#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
186
185/* for in-memory extent cache entry */ 187/* for in-memory extent cache entry */
186#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ 188#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
187 189
@@ -218,6 +220,7 @@ struct f2fs_inode_info {
218 nid_t i_xattr_nid; /* node id that contains xattrs */ 220 nid_t i_xattr_nid; /* node id that contains xattrs */
219 unsigned long long xattr_ver; /* cp version of xattr modification */ 221 unsigned long long xattr_ver; /* cp version of xattr modification */
220 struct extent_info ext; /* in-memory extent cache entry */ 222 struct extent_info ext; /* in-memory extent cache entry */
223 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
221}; 224};
222 225
223static inline void get_extent_info(struct extent_info *ext, 226static inline void get_extent_info(struct extent_info *ext,
@@ -243,6 +246,7 @@ static inline void set_raw_extent(struct extent_info *ext,
243struct f2fs_nm_info { 246struct f2fs_nm_info {
244 block_t nat_blkaddr; /* base disk address of NAT */ 247 block_t nat_blkaddr; /* base disk address of NAT */
245 nid_t max_nid; /* maximum possible node ids */ 248 nid_t max_nid; /* maximum possible node ids */
249 nid_t available_nids; /* maximum available node ids */
246 nid_t next_scan_nid; /* the next nid to be scanned */ 250 nid_t next_scan_nid; /* the next nid to be scanned */
247 unsigned int ram_thresh; /* control the memory footprint */ 251 unsigned int ram_thresh; /* control the memory footprint */
248 252
@@ -323,6 +327,15 @@ struct flush_cmd {
323 int ret; 327 int ret;
324}; 328};
325 329
330struct flush_cmd_control {
331 struct task_struct *f2fs_issue_flush; /* flush thread */
332 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
333 struct flush_cmd *issue_list; /* list for command issue */
334 struct flush_cmd *dispatch_list; /* list for command dispatch */
335 spinlock_t issue_lock; /* for issue list lock */
336 struct flush_cmd *issue_tail; /* list tail of issue list */
337};
338
326struct f2fs_sm_info { 339struct f2fs_sm_info {
327 struct sit_info *sit_info; /* whole segment information */ 340 struct sit_info *sit_info; /* whole segment information */
328 struct free_segmap_info *free_info; /* free segment information */ 341 struct free_segmap_info *free_info; /* free segment information */
@@ -353,12 +366,8 @@ struct f2fs_sm_info {
353 unsigned int min_ipu_util; /* in-place-update threshold */ 366 unsigned int min_ipu_util; /* in-place-update threshold */
354 367
355 /* for flush command control */ 368 /* for flush command control */
356 struct task_struct *f2fs_issue_flush; /* flush thread */ 369 struct flush_cmd_control *cmd_control_info;
357 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ 370
358 struct flush_cmd *issue_list; /* list for command issue */
359 struct flush_cmd *dispatch_list; /* list for command dispatch */
360 spinlock_t issue_lock; /* for issue list lock */
361 struct flush_cmd *issue_tail; /* list tail of issue list */
362}; 371};
363 372
364/* 373/*
@@ -755,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
755static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) 764static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
756{ 765{
757 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 766 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
758 int offset = (flag == NAT_BITMAP) ? 767 int offset;
768
769 if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
770 if (flag == NAT_BITMAP)
771 return &ckpt->sit_nat_version_bitmap;
772 else
773 return ((unsigned char *)ckpt + F2FS_BLKSIZE);
774 } else {
775 offset = (flag == NAT_BITMAP) ?
759 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; 776 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
760 return &ckpt->sit_nat_version_bitmap + offset; 777 return &ckpt->sit_nat_version_bitmap + offset;
778 }
761} 779}
762 780
763static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) 781static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
@@ -958,6 +976,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
958enum { 976enum {
959 FI_NEW_INODE, /* indicate newly allocated inode */ 977 FI_NEW_INODE, /* indicate newly allocated inode */
960 FI_DIRTY_INODE, /* indicate inode is dirty or not */ 978 FI_DIRTY_INODE, /* indicate inode is dirty or not */
979 FI_DIRTY_DIR, /* indicate directory has dirty pages */
961 FI_INC_LINK, /* need to increment i_nlink */ 980 FI_INC_LINK, /* need to increment i_nlink */
962 FI_ACL_MODE, /* indicate acl mode */ 981 FI_ACL_MODE, /* indicate acl mode */
963 FI_NO_ALLOC, /* should not allocate any blocks */ 982 FI_NO_ALLOC, /* should not allocate any blocks */
@@ -1071,6 +1090,12 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1071 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1090 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
1072 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 1091 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
1073 1092
1093/* get offset of first page in next direct node */
1094#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
1095 ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
1096 (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
1097 ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
1098
1074/* 1099/*
1075 * file.c 1100 * file.c
1076 */ 1101 */
@@ -1140,8 +1165,10 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
1140struct dnode_of_data; 1165struct dnode_of_data;
1141struct node_info; 1166struct node_info;
1142 1167
1168bool available_free_memory(struct f2fs_sb_info *, int);
1143int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1169int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1144bool fsync_mark_done(struct f2fs_sb_info *, nid_t); 1170bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
1171void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
1145void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1172void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1146int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1173int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1147int truncate_inode_blocks(struct inode *, pgoff_t); 1174int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1176,9 +1203,12 @@ void destroy_node_manager_caches(void);
1176void f2fs_balance_fs(struct f2fs_sb_info *); 1203void f2fs_balance_fs(struct f2fs_sb_info *);
1177void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1204void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1178int f2fs_issue_flush(struct f2fs_sb_info *); 1205int f2fs_issue_flush(struct f2fs_sb_info *);
1206int create_flush_cmd_control(struct f2fs_sb_info *);
1207void destroy_flush_cmd_control(struct f2fs_sb_info *);
1179void invalidate_blocks(struct f2fs_sb_info *, block_t); 1208void invalidate_blocks(struct f2fs_sb_info *, block_t);
1180void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1209void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1181void clear_prefree_segments(struct f2fs_sb_info *); 1210void clear_prefree_segments(struct f2fs_sb_info *);
1211void discard_next_dnode(struct f2fs_sb_info *);
1182int npages_for_summary_flush(struct f2fs_sb_info *); 1212int npages_for_summary_flush(struct f2fs_sb_info *);
1183void allocate_new_segments(struct f2fs_sb_info *); 1213void allocate_new_segments(struct f2fs_sb_info *);
1184struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1214struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1221,7 +1251,6 @@ int get_valid_checkpoint(struct f2fs_sb_info *);
1221void set_dirty_dir_page(struct inode *, struct page *); 1251void set_dirty_dir_page(struct inode *, struct page *);
1222void add_dirty_dir_inode(struct inode *); 1252void add_dirty_dir_inode(struct inode *);
1223void remove_dirty_dir_inode(struct inode *); 1253void remove_dirty_dir_inode(struct inode *);
1224struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t);
1225void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1254void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1226void write_checkpoint(struct f2fs_sb_info *, bool); 1255void write_checkpoint(struct f2fs_sb_info *, bool);
1227void init_orphan_info(struct f2fs_sb_info *); 1256void init_orphan_info(struct f2fs_sb_info *);
@@ -1242,6 +1271,7 @@ struct page *find_data_page(struct inode *, pgoff_t, bool);
1242struct page *get_lock_data_page(struct inode *, pgoff_t); 1271struct page *get_lock_data_page(struct inode *, pgoff_t);
1243struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1272struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1244int do_write_data_page(struct page *, struct f2fs_io_info *); 1273int do_write_data_page(struct page *, struct f2fs_io_info *);
1274int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1245 1275
1246/* 1276/*
1247 * gc.c 1277 * gc.c
@@ -1391,5 +1421,6 @@ bool f2fs_may_inline(struct inode *);
1391int f2fs_read_inline_data(struct inode *, struct page *); 1421int f2fs_read_inline_data(struct inode *, struct page *);
1392int f2fs_convert_inline_data(struct inode *, pgoff_t); 1422int f2fs_convert_inline_data(struct inode *, pgoff_t);
1393int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); 1423int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
1424void truncate_inline_data(struct inode *, u64);
1394int recover_inline_data(struct inode *, struct page *); 1425int recover_inline_data(struct inode *, struct page *);
1395#endif 1426#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60e7d5448a1d..9c49c593d8eb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -19,6 +19,7 @@
19#include <linux/compat.h> 19#include <linux/compat.h>
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/mount.h> 21#include <linux/mount.h>
22#include <linux/pagevec.h>
22 23
23#include "f2fs.h" 24#include "f2fs.h"
24#include "node.h" 25#include "node.h"
@@ -194,6 +195,132 @@ out:
194 return ret; 195 return ret;
195} 196}
196 197
198static pgoff_t __get_first_dirty_index(struct address_space *mapping,
199 pgoff_t pgofs, int whence)
200{
201 struct pagevec pvec;
202 int nr_pages;
203
204 if (whence != SEEK_DATA)
205 return 0;
206
207 /* find first dirty page index */
208 pagevec_init(&pvec, 0);
209 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
210 pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
211 pagevec_release(&pvec);
212 return pgofs;
213}
214
215static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
216 int whence)
217{
218 switch (whence) {
219 case SEEK_DATA:
220 if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
221 (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
222 return true;
223 break;
224 case SEEK_HOLE:
225 if (blkaddr == NULL_ADDR)
226 return true;
227 break;
228 }
229 return false;
230}
231
232static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
233{
234 struct inode *inode = file->f_mapping->host;
235 loff_t maxbytes = inode->i_sb->s_maxbytes;
236 struct dnode_of_data dn;
237 pgoff_t pgofs, end_offset, dirty;
238 loff_t data_ofs = offset;
239 loff_t isize;
240 int err = 0;
241
242 mutex_lock(&inode->i_mutex);
243
244 isize = i_size_read(inode);
245 if (offset >= isize)
246 goto fail;
247
248 /* handle inline data case */
249 if (f2fs_has_inline_data(inode)) {
250 if (whence == SEEK_HOLE)
251 data_ofs = isize;
252 goto found;
253 }
254
255 pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
256
257 dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
258
259 for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
260 set_new_dnode(&dn, inode, NULL, NULL, 0);
261 err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
262 if (err && err != -ENOENT) {
263 goto fail;
264 } else if (err == -ENOENT) {
265 /* direct node is not exist */
266 if (whence == SEEK_DATA) {
267 pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
268 F2FS_I(inode));
269 continue;
270 } else {
271 goto found;
272 }
273 }
274
275 end_offset = IS_INODE(dn.node_page) ?
276 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
277
278 /* find data/hole in dnode block */
279 for (; dn.ofs_in_node < end_offset;
280 dn.ofs_in_node++, pgofs++,
281 data_ofs = pgofs << PAGE_CACHE_SHIFT) {
282 block_t blkaddr;
283 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
284
285 if (__found_offset(blkaddr, dirty, pgofs, whence)) {
286 f2fs_put_dnode(&dn);
287 goto found;
288 }
289 }
290 f2fs_put_dnode(&dn);
291 }
292
293 if (whence == SEEK_DATA)
294 goto fail;
295found:
296 if (whence == SEEK_HOLE && data_ofs > isize)
297 data_ofs = isize;
298 mutex_unlock(&inode->i_mutex);
299 return vfs_setpos(file, data_ofs, maxbytes);
300fail:
301 mutex_unlock(&inode->i_mutex);
302 return -ENXIO;
303}
304
305static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
306{
307 struct inode *inode = file->f_mapping->host;
308 loff_t maxbytes = inode->i_sb->s_maxbytes;
309
310 switch (whence) {
311 case SEEK_SET:
312 case SEEK_CUR:
313 case SEEK_END:
314 return generic_file_llseek_size(file, offset, whence,
315 maxbytes, i_size_read(inode));
316 case SEEK_DATA:
317 case SEEK_HOLE:
318 return f2fs_seek_block(file, offset, whence);
319 }
320
321 return -EINVAL;
322}
323
197static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) 324static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
198{ 325{
199 file_accessed(file); 326 file_accessed(file);
@@ -242,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
242 unsigned offset = from & (PAGE_CACHE_SIZE - 1); 369 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
243 struct page *page; 370 struct page *page;
244 371
372 if (f2fs_has_inline_data(inode))
373 return truncate_inline_data(inode, from);
374
245 if (!offset) 375 if (!offset)
246 return; 376 return;
247 377
@@ -288,10 +418,7 @@ int truncate_blocks(struct inode *inode, u64 from)
288 return err; 418 return err;
289 } 419 }
290 420
291 if (IS_INODE(dn.node_page)) 421 count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
292 count = ADDRS_PER_INODE(F2FS_I(inode));
293 else
294 count = ADDRS_PER_BLOCK;
295 422
296 count -= dn.ofs_in_node; 423 count -= dn.ofs_in_node;
297 f2fs_bug_on(count < 0); 424 f2fs_bug_on(count < 0);
@@ -413,6 +540,7 @@ const struct inode_operations f2fs_file_inode_operations = {
413 .listxattr = f2fs_listxattr, 540 .listxattr = f2fs_listxattr,
414 .removexattr = generic_removexattr, 541 .removexattr = generic_removexattr,
415#endif 542#endif
543 .fiemap = f2fs_fiemap,
416}; 544};
417 545
418static void fill_zero(struct inode *inode, pgoff_t index, 546static void fill_zero(struct inode *inode, pgoff_t index,
@@ -555,6 +683,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
555 i_size_read(inode) < new_size) { 683 i_size_read(inode) < new_size) {
556 i_size_write(inode, new_size); 684 i_size_write(inode, new_size);
557 mark_inode_dirty(inode); 685 mark_inode_dirty(inode);
686 f2fs_write_inode(inode, NULL);
558 } 687 }
559 688
560 return ret; 689 return ret;
@@ -678,7 +807,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
678#endif 807#endif
679 808
680const struct file_operations f2fs_file_operations = { 809const struct file_operations f2fs_file_operations = {
681 .llseek = generic_file_llseek, 810 .llseek = f2fs_llseek,
682 .read = do_sync_read, 811 .read = do_sync_read,
683 .write = do_sync_write, 812 .write = do_sync_write,
684 .aio_read = generic_file_aio_read, 813 .aio_read = generic_file_aio_read,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 383db1fabcf4..1bba5228c197 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -81,8 +81,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
81 81
82 f2fs_lock_op(sbi); 82 f2fs_lock_op(sbi);
83 ipage = get_node_page(sbi, inode->i_ino); 83 ipage = get_node_page(sbi, inode->i_ino);
84 if (IS_ERR(ipage)) 84 if (IS_ERR(ipage)) {
85 return PTR_ERR(ipage); 85 err = PTR_ERR(ipage);
86 goto out;
87 }
86 88
87 /* 89 /*
88 * i_addr[0] is not used for inline data, 90 * i_addr[0] is not used for inline data,
@@ -90,11 +92,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
90 */ 92 */
91 set_new_dnode(&dn, inode, ipage, NULL, 0); 93 set_new_dnode(&dn, inode, ipage, NULL, 0);
92 err = f2fs_reserve_block(&dn, 0); 94 err = f2fs_reserve_block(&dn, 0);
93 if (err) { 95 if (err)
94 f2fs_unlock_op(sbi); 96 goto out;
95 return err;
96 }
97 97
98 f2fs_wait_on_page_writeback(page, DATA);
98 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 99 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
99 100
100 /* Copy the whole inline data block */ 101 /* Copy the whole inline data block */
@@ -118,6 +119,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
118 119
119 sync_inode_page(&dn); 120 sync_inode_page(&dn);
120 f2fs_put_dnode(&dn); 121 f2fs_put_dnode(&dn);
122out:
121 f2fs_unlock_op(sbi); 123 f2fs_unlock_op(sbi);
122 return err; 124 return err;
123} 125}
@@ -132,7 +134,7 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
132 else if (to_size <= MAX_INLINE_DATA) 134 else if (to_size <= MAX_INLINE_DATA)
133 return 0; 135 return 0;
134 136
135 page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); 137 page = grab_cache_page(inode->i_mapping, 0);
136 if (!page) 138 if (!page)
137 return -ENOMEM; 139 return -ENOMEM;
138 140
@@ -155,6 +157,7 @@ int f2fs_write_inline_data(struct inode *inode,
155 return err; 157 return err;
156 ipage = dn.inode_page; 158 ipage = dn.inode_page;
157 159
160 f2fs_wait_on_page_writeback(ipage, NODE);
158 zero_user_segment(ipage, INLINE_DATA_OFFSET, 161 zero_user_segment(ipage, INLINE_DATA_OFFSET,
159 INLINE_DATA_OFFSET + MAX_INLINE_DATA); 162 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
160 src_addr = kmap(page); 163 src_addr = kmap(page);
@@ -175,6 +178,26 @@ int f2fs_write_inline_data(struct inode *inode,
175 return 0; 178 return 0;
176} 179}
177 180
181void truncate_inline_data(struct inode *inode, u64 from)
182{
183 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
184 struct page *ipage;
185
186 if (from >= MAX_INLINE_DATA)
187 return;
188
189 ipage = get_node_page(sbi, inode->i_ino);
190 if (IS_ERR(ipage))
191 return;
192
193 f2fs_wait_on_page_writeback(ipage, NODE);
194
195 zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
196 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
197 set_page_dirty(ipage);
198 f2fs_put_page(ipage, 1);
199}
200
178int recover_inline_data(struct inode *inode, struct page *npage) 201int recover_inline_data(struct inode *inode, struct page *npage)
179{ 202{
180 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 203 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -199,6 +222,8 @@ process_inline:
199 ipage = get_node_page(sbi, inode->i_ino); 222 ipage = get_node_page(sbi, inode->i_ino);
200 f2fs_bug_on(IS_ERR(ipage)); 223 f2fs_bug_on(IS_ERR(ipage));
201 224
225 f2fs_wait_on_page_writeback(ipage, NODE);
226
202 src_addr = inline_data_addr(npage); 227 src_addr = inline_data_addr(npage);
203 dst_addr = inline_data_addr(ipage); 228 dst_addr = inline_data_addr(ipage);
204 memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 229 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -210,6 +235,7 @@ process_inline:
210 if (f2fs_has_inline_data(inode)) { 235 if (f2fs_has_inline_data(inode)) {
211 ipage = get_node_page(sbi, inode->i_ino); 236 ipage = get_node_page(sbi, inode->i_ino);
212 f2fs_bug_on(IS_ERR(ipage)); 237 f2fs_bug_on(IS_ERR(ipage));
238 f2fs_wait_on_page_writeback(ipage, NODE);
213 zero_user_segment(ipage, INLINE_DATA_OFFSET, 239 zero_user_segment(ipage, INLINE_DATA_OFFSET,
214 INLINE_DATA_OFFSET + MAX_INLINE_DATA); 240 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
215 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 241 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ee829d360468..adc622c6bdce 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,6 +12,7 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/bitops.h>
15 16
16#include "f2fs.h" 17#include "f2fs.h"
17#include "node.h" 18#include "node.h"
@@ -21,20 +22,20 @@
21void f2fs_set_inode_flags(struct inode *inode) 22void f2fs_set_inode_flags(struct inode *inode)
22{ 23{
23 unsigned int flags = F2FS_I(inode)->i_flags; 24 unsigned int flags = F2FS_I(inode)->i_flags;
24 25 unsigned int new_fl = 0;
25 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
26 S_NOATIME | S_DIRSYNC);
27 26
28 if (flags & FS_SYNC_FL) 27 if (flags & FS_SYNC_FL)
29 inode->i_flags |= S_SYNC; 28 new_fl |= S_SYNC;
30 if (flags & FS_APPEND_FL) 29 if (flags & FS_APPEND_FL)
31 inode->i_flags |= S_APPEND; 30 new_fl |= S_APPEND;
32 if (flags & FS_IMMUTABLE_FL) 31 if (flags & FS_IMMUTABLE_FL)
33 inode->i_flags |= S_IMMUTABLE; 32 new_fl |= S_IMMUTABLE;
34 if (flags & FS_NOATIME_FL) 33 if (flags & FS_NOATIME_FL)
35 inode->i_flags |= S_NOATIME; 34 new_fl |= S_NOATIME;
36 if (flags & FS_DIRSYNC_FL) 35 if (flags & FS_DIRSYNC_FL)
37 inode->i_flags |= S_DIRSYNC; 36 new_fl |= S_DIRSYNC;
37 set_mask_bits(&inode->i_flags,
38 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
38} 39}
39 40
40static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 41static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -294,4 +295,5 @@ void f2fs_evict_inode(struct inode *inode)
294 sb_end_intwrite(inode->i_sb); 295 sb_end_intwrite(inode->i_sb);
295no_delete: 296no_delete:
296 clear_inode(inode); 297 clear_inode(inode);
298 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
297} 299}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a9409d19dfd4..9138c32aa698 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -41,18 +41,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
41 } 41 }
42 f2fs_unlock_op(sbi); 42 f2fs_unlock_op(sbi);
43 43
44 inode->i_uid = current_fsuid(); 44 inode_init_owner(inode, dir, mode);
45
46 if (dir->i_mode & S_ISGID) {
47 inode->i_gid = dir->i_gid;
48 if (S_ISDIR(mode))
49 mode |= S_ISGID;
50 } else {
51 inode->i_gid = current_fsgid();
52 }
53 45
54 inode->i_ino = ino; 46 inode->i_ino = ino;
55 inode->i_mode = mode;
56 inode->i_blocks = 0; 47 inode->i_blocks = 0;
57 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 48 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
58 inode->i_generation = sbi->s_next_generation++; 49 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 57caa6eaf47b..9dfb9a042fd2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -26,20 +26,26 @@
26static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
28 28
29static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) 29bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30{ 30{
31 struct f2fs_nm_info *nm_i = NM_I(sbi);
31 struct sysinfo val; 32 struct sysinfo val;
32 unsigned long mem_size = 0; 33 unsigned long mem_size = 0;
34 bool res = false;
33 35
34 si_meminfo(&val); 36 si_meminfo(&val);
35 if (type == FREE_NIDS) 37 /* give 25%, 25%, 50% memory for each components respectively */
36 mem_size = nm_i->fcnt * sizeof(struct free_nid); 38 if (type == FREE_NIDS) {
37 else if (type == NAT_ENTRIES) 39 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
38 mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); 40 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
39 mem_size >>= 12; 41 } else if (type == NAT_ENTRIES) {
40 42 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
41 /* give 50:50 memory for free nids and nat caches respectively */ 43 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
42 return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); 44 } else if (type == DIRTY_DENTS) {
45 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
46 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
47 }
48 return res;
43} 49}
44 50
45static void clear_node_page_dirty(struct page *page) 51static void clear_node_page_dirty(struct page *page)
@@ -147,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
147 return fsync_done; 153 return fsync_done;
148} 154}
149 155
156void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
157{
158 struct f2fs_nm_info *nm_i = NM_I(sbi);
159 struct nat_entry *e;
160
161 write_lock(&nm_i->nat_tree_lock);
162 e = __lookup_nat_cache(nm_i, nid);
163 if (e)
164 e->fsync_done = false;
165 write_unlock(&nm_i->nat_tree_lock);
166}
167
150static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 168static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
151{ 169{
152 struct nat_entry *new; 170 struct nat_entry *new;
@@ -179,9 +197,7 @@ retry:
179 write_unlock(&nm_i->nat_tree_lock); 197 write_unlock(&nm_i->nat_tree_lock);
180 goto retry; 198 goto retry;
181 } 199 }
182 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); 200 node_info_from_raw_nat(&e->ni, ne);
183 nat_set_ino(e, le32_to_cpu(ne->ino));
184 nat_set_version(e, ne->version);
185 } 201 }
186 write_unlock(&nm_i->nat_tree_lock); 202 write_unlock(&nm_i->nat_tree_lock);
187} 203}
@@ -243,7 +259,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
243{ 259{
244 struct f2fs_nm_info *nm_i = NM_I(sbi); 260 struct f2fs_nm_info *nm_i = NM_I(sbi);
245 261
246 if (available_free_memory(nm_i, NAT_ENTRIES)) 262 if (available_free_memory(sbi, NAT_ENTRIES))
247 return 0; 263 return 0;
248 264
249 write_lock(&nm_i->nat_tree_lock); 265 write_lock(&nm_i->nat_tree_lock);
@@ -849,8 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
849 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 865 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
850 return ERR_PTR(-EPERM); 866 return ERR_PTR(-EPERM);
851 867
852 page = grab_cache_page_write_begin(NODE_MAPPING(sbi), 868 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
853 dn->nid, AOP_FLAG_NOFS);
854 if (!page) 869 if (!page)
855 return ERR_PTR(-ENOMEM); 870 return ERR_PTR(-ENOMEM);
856 871
@@ -867,6 +882,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
867 new_ni.ino = dn->inode->i_ino; 882 new_ni.ino = dn->inode->i_ino;
868 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 883 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
869 884
885 f2fs_wait_on_page_writeback(page, NODE);
870 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 886 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
871 set_cold_node(dn->inode, page); 887 set_cold_node(dn->inode, page);
872 SetPageUptodate(page); 888 SetPageUptodate(page);
@@ -946,8 +962,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
946 struct page *page; 962 struct page *page;
947 int err; 963 int err;
948repeat: 964repeat:
949 page = grab_cache_page_write_begin(NODE_MAPPING(sbi), 965 page = grab_cache_page(NODE_MAPPING(sbi), nid);
950 nid, AOP_FLAG_NOFS);
951 if (!page) 966 if (!page)
952 return ERR_PTR(-ENOMEM); 967 return ERR_PTR(-ENOMEM);
953 968
@@ -1194,6 +1209,8 @@ static int f2fs_write_node_page(struct page *page,
1194 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 1209 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1195 }; 1210 };
1196 1211
1212 trace_f2fs_writepage(page, NODE);
1213
1197 if (unlikely(sbi->por_doing)) 1214 if (unlikely(sbi->por_doing))
1198 goto redirty_out; 1215 goto redirty_out;
1199 1216
@@ -1225,10 +1242,7 @@ static int f2fs_write_node_page(struct page *page,
1225 return 0; 1242 return 0;
1226 1243
1227redirty_out: 1244redirty_out:
1228 dec_page_count(sbi, F2FS_DIRTY_NODES); 1245 redirty_page_for_writepage(wbc, page);
1229 wbc->pages_skipped++;
1230 account_page_redirty(page);
1231 set_page_dirty(page);
1232 return AOP_WRITEPAGE_ACTIVATE; 1246 return AOP_WRITEPAGE_ACTIVATE;
1233} 1247}
1234 1248
@@ -1238,6 +1252,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1238 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1252 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1239 long diff; 1253 long diff;
1240 1254
1255 trace_f2fs_writepages(mapping->host, wbc, NODE);
1256
1241 /* balancing f2fs's metadata in background */ 1257 /* balancing f2fs's metadata in background */
1242 f2fs_balance_fs_bg(sbi); 1258 f2fs_balance_fs_bg(sbi);
1243 1259
@@ -1313,13 +1329,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1313 radix_tree_delete(&nm_i->free_nid_root, i->nid); 1329 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1314} 1330}
1315 1331
1316static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) 1332static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1317{ 1333{
1334 struct f2fs_nm_info *nm_i = NM_I(sbi);
1318 struct free_nid *i; 1335 struct free_nid *i;
1319 struct nat_entry *ne; 1336 struct nat_entry *ne;
1320 bool allocated = false; 1337 bool allocated = false;
1321 1338
1322 if (!available_free_memory(nm_i, FREE_NIDS)) 1339 if (!available_free_memory(sbi, FREE_NIDS))
1323 return -1; 1340 return -1;
1324 1341
1325 /* 0 nid should not be used */ 1342 /* 0 nid should not be used */
@@ -1372,9 +1389,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1372 kmem_cache_free(free_nid_slab, i); 1389 kmem_cache_free(free_nid_slab, i);
1373} 1390}
1374 1391
1375static void scan_nat_page(struct f2fs_nm_info *nm_i, 1392static void scan_nat_page(struct f2fs_sb_info *sbi,
1376 struct page *nat_page, nid_t start_nid) 1393 struct page *nat_page, nid_t start_nid)
1377{ 1394{
1395 struct f2fs_nm_info *nm_i = NM_I(sbi);
1378 struct f2fs_nat_block *nat_blk = page_address(nat_page); 1396 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1379 block_t blk_addr; 1397 block_t blk_addr;
1380 int i; 1398 int i;
@@ -1389,7 +1407,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
1389 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1407 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1390 f2fs_bug_on(blk_addr == NEW_ADDR); 1408 f2fs_bug_on(blk_addr == NEW_ADDR);
1391 if (blk_addr == NULL_ADDR) { 1409 if (blk_addr == NULL_ADDR) {
1392 if (add_free_nid(nm_i, start_nid, true) < 0) 1410 if (add_free_nid(sbi, start_nid, true) < 0)
1393 break; 1411 break;
1394 } 1412 }
1395 } 1413 }
@@ -1413,7 +1431,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1413 while (1) { 1431 while (1) {
1414 struct page *page = get_current_nat_page(sbi, nid); 1432 struct page *page = get_current_nat_page(sbi, nid);
1415 1433
1416 scan_nat_page(nm_i, page, nid); 1434 scan_nat_page(sbi, page, nid);
1417 f2fs_put_page(page, 1); 1435 f2fs_put_page(page, 1);
1418 1436
1419 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1437 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -1433,7 +1451,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1433 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); 1451 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1434 nid = le32_to_cpu(nid_in_journal(sum, i)); 1452 nid = le32_to_cpu(nid_in_journal(sum, i));
1435 if (addr == NULL_ADDR) 1453 if (addr == NULL_ADDR)
1436 add_free_nid(nm_i, nid, true); 1454 add_free_nid(sbi, nid, true);
1437 else 1455 else
1438 remove_free_nid(nm_i, nid); 1456 remove_free_nid(nm_i, nid);
1439 } 1457 }
@@ -1450,7 +1468,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1450 struct f2fs_nm_info *nm_i = NM_I(sbi); 1468 struct f2fs_nm_info *nm_i = NM_I(sbi);
1451 struct free_nid *i = NULL; 1469 struct free_nid *i = NULL;
1452retry: 1470retry:
1453 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) 1471 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
1454 return false; 1472 return false;
1455 1473
1456 spin_lock(&nm_i->free_nid_list_lock); 1474 spin_lock(&nm_i->free_nid_list_lock);
@@ -1510,7 +1528,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1510 spin_lock(&nm_i->free_nid_list_lock); 1528 spin_lock(&nm_i->free_nid_list_lock);
1511 i = __lookup_free_nid_list(nm_i, nid); 1529 i = __lookup_free_nid_list(nm_i, nid);
1512 f2fs_bug_on(!i || i->state != NID_ALLOC); 1530 f2fs_bug_on(!i || i->state != NID_ALLOC);
1513 if (!available_free_memory(nm_i, FREE_NIDS)) { 1531 if (!available_free_memory(sbi, FREE_NIDS)) {
1514 __del_from_free_nid_list(nm_i, i); 1532 __del_from_free_nid_list(nm_i, i);
1515 need_free = true; 1533 need_free = true;
1516 } else { 1534 } else {
@@ -1532,7 +1550,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1532 clear_node_page_dirty(page); 1550 clear_node_page_dirty(page);
1533} 1551}
1534 1552
1535void recover_inline_xattr(struct inode *inode, struct page *page) 1553static void recover_inline_xattr(struct inode *inode, struct page *page)
1536{ 1554{
1537 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1555 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1538 void *src_addr, *dst_addr; 1556 void *src_addr, *dst_addr;
@@ -1557,6 +1575,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
1557 src_addr = inline_xattr_addr(page); 1575 src_addr = inline_xattr_addr(page);
1558 inline_size = inline_xattr_size(inode); 1576 inline_size = inline_xattr_size(inode);
1559 1577
1578 f2fs_wait_on_page_writeback(ipage, NODE);
1560 memcpy(dst_addr, src_addr, inline_size); 1579 memcpy(dst_addr, src_addr, inline_size);
1561 1580
1562 update_inode(inode, ipage); 1581 update_inode(inode, ipage);
@@ -1612,6 +1631,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1612 struct node_info old_ni, new_ni; 1631 struct node_info old_ni, new_ni;
1613 struct page *ipage; 1632 struct page *ipage;
1614 1633
1634 get_node_info(sbi, ino, &old_ni);
1635
1636 if (unlikely(old_ni.blk_addr != NULL_ADDR))
1637 return -EINVAL;
1638
1615 ipage = grab_cache_page(NODE_MAPPING(sbi), ino); 1639 ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
1616 if (!ipage) 1640 if (!ipage)
1617 return -ENOMEM; 1641 return -ENOMEM;
@@ -1619,7 +1643,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1619 /* Should not use this inode from free nid list */ 1643 /* Should not use this inode from free nid list */
1620 remove_free_nid(NM_I(sbi), ino); 1644 remove_free_nid(NM_I(sbi), ino);
1621 1645
1622 get_node_info(sbi, ino, &old_ni);
1623 SetPageUptodate(ipage); 1646 SetPageUptodate(ipage);
1624 fill_node_footer(ipage, ino, ino, 0, true); 1647 fill_node_footer(ipage, ino, ino, 0, true);
1625 1648
@@ -1645,35 +1668,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1645 1668
1646/* 1669/*
1647 * ra_sum_pages() merge contiguous pages into one bio and submit. 1670 * ra_sum_pages() merge contiguous pages into one bio and submit.
1648 * these pre-readed pages are linked in pages list. 1671 * these pre-readed pages are alloced in bd_inode's mapping tree.
1649 */ 1672 */
1650static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, 1673static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1651 int start, int nrpages) 1674 int start, int nrpages)
1652{ 1675{
1653 struct page *page; 1676 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1654 int page_idx = start; 1677 struct address_space *mapping = inode->i_mapping;
1678 int i, page_idx = start;
1655 struct f2fs_io_info fio = { 1679 struct f2fs_io_info fio = {
1656 .type = META, 1680 .type = META,
1657 .rw = READ_SYNC | REQ_META | REQ_PRIO 1681 .rw = READ_SYNC | REQ_META | REQ_PRIO
1658 }; 1682 };
1659 1683
1660 for (; page_idx < start + nrpages; page_idx++) { 1684 for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
1661 /* alloc temporal page for read node summary info*/ 1685 /* alloc page in bd_inode for reading node summary info */
1662 page = alloc_page(GFP_F2FS_ZERO); 1686 pages[i] = grab_cache_page(mapping, page_idx);
1663 if (!page) 1687 if (!pages[i])
1664 break; 1688 break;
1665 1689 f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
1666 lock_page(page);
1667 page->index = page_idx;
1668 list_add_tail(&page->lru, pages);
1669 } 1690 }
1670 1691
1671 list_for_each_entry(page, pages, lru)
1672 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1673
1674 f2fs_submit_merged_bio(sbi, META, READ); 1692 f2fs_submit_merged_bio(sbi, META, READ);
1675 1693 return i;
1676 return page_idx - start;
1677} 1694}
1678 1695
1679int restore_node_summary(struct f2fs_sb_info *sbi, 1696int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1681,11 +1698,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1681{ 1698{
1682 struct f2fs_node *rn; 1699 struct f2fs_node *rn;
1683 struct f2fs_summary *sum_entry; 1700 struct f2fs_summary *sum_entry;
1684 struct page *page, *tmp; 1701 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1685 block_t addr; 1702 block_t addr;
1686 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1703 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1687 int i, last_offset, nrpages, err = 0; 1704 struct page *pages[bio_blocks];
1688 LIST_HEAD(page_list); 1705 int i, idx, last_offset, nrpages, err = 0;
1689 1706
1690 /* scan the node segment */ 1707 /* scan the node segment */
1691 last_offset = sbi->blocks_per_seg; 1708 last_offset = sbi->blocks_per_seg;
@@ -1696,29 +1713,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1696 nrpages = min(last_offset - i, bio_blocks); 1713 nrpages = min(last_offset - i, bio_blocks);
1697 1714
1698 /* read ahead node pages */ 1715 /* read ahead node pages */
1699 nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); 1716 nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
1700 if (!nrpages) 1717 if (!nrpages)
1701 return -ENOMEM; 1718 return -ENOMEM;
1702 1719
1703 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1720 for (idx = 0; idx < nrpages; idx++) {
1704 if (err) 1721 if (err)
1705 goto skip; 1722 goto skip;
1706 1723
1707 lock_page(page); 1724 lock_page(pages[idx]);
1708 if (unlikely(!PageUptodate(page))) { 1725 if (unlikely(!PageUptodate(pages[idx]))) {
1709 err = -EIO; 1726 err = -EIO;
1710 } else { 1727 } else {
1711 rn = F2FS_NODE(page); 1728 rn = F2FS_NODE(pages[idx]);
1712 sum_entry->nid = rn->footer.nid; 1729 sum_entry->nid = rn->footer.nid;
1713 sum_entry->version = 0; 1730 sum_entry->version = 0;
1714 sum_entry->ofs_in_node = 0; 1731 sum_entry->ofs_in_node = 0;
1715 sum_entry++; 1732 sum_entry++;
1716 } 1733 }
1717 unlock_page(page); 1734 unlock_page(pages[idx]);
1718skip: 1735skip:
1719 list_del(&page->lru); 1736 page_cache_release(pages[idx]);
1720 __free_pages(page, 0);
1721 } 1737 }
1738
1739 invalidate_mapping_pages(inode->i_mapping, addr,
1740 addr + nrpages);
1722 } 1741 }
1723 return err; 1742 return err;
1724} 1743}
@@ -1756,9 +1775,7 @@ retry:
1756 write_unlock(&nm_i->nat_tree_lock); 1775 write_unlock(&nm_i->nat_tree_lock);
1757 goto retry; 1776 goto retry;
1758 } 1777 }
1759 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); 1778 node_info_from_raw_nat(&ne->ni, &raw_ne);
1760 nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1761 nat_set_version(ne, raw_ne.version);
1762 __set_nat_cache_dirty(nm_i, ne); 1779 __set_nat_cache_dirty(nm_i, ne);
1763 write_unlock(&nm_i->nat_tree_lock); 1780 write_unlock(&nm_i->nat_tree_lock);
1764 } 1781 }
@@ -1791,7 +1808,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1791 nid_t nid; 1808 nid_t nid;
1792 struct f2fs_nat_entry raw_ne; 1809 struct f2fs_nat_entry raw_ne;
1793 int offset = -1; 1810 int offset = -1;
1794 block_t new_blkaddr;
1795 1811
1796 if (nat_get_blkaddr(ne) == NEW_ADDR) 1812 if (nat_get_blkaddr(ne) == NEW_ADDR)
1797 continue; 1813 continue;
@@ -1827,11 +1843,7 @@ to_nat_page:
1827 f2fs_bug_on(!nat_blk); 1843 f2fs_bug_on(!nat_blk);
1828 raw_ne = nat_blk->entries[nid - start_nid]; 1844 raw_ne = nat_blk->entries[nid - start_nid];
1829flush_now: 1845flush_now:
1830 new_blkaddr = nat_get_blkaddr(ne); 1846 raw_nat_from_node_info(&raw_ne, &ne->ni);
1831
1832 raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1833 raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1834 raw_ne.version = nat_get_version(ne);
1835 1847
1836 if (offset < 0) { 1848 if (offset < 0) {
1837 nat_blk->entries[nid - start_nid] = raw_ne; 1849 nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1841,7 +1853,7 @@ flush_now:
1841 } 1853 }
1842 1854
1843 if (nat_get_blkaddr(ne) == NULL_ADDR && 1855 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1844 add_free_nid(NM_I(sbi), nid, false) <= 0) { 1856 add_free_nid(sbi, nid, false) <= 0) {
1845 write_lock(&nm_i->nat_tree_lock); 1857 write_lock(&nm_i->nat_tree_lock);
1846 __del_from_nat_cache(nm_i, ne); 1858 __del_from_nat_cache(nm_i, ne);
1847 write_unlock(&nm_i->nat_tree_lock); 1859 write_unlock(&nm_i->nat_tree_lock);
@@ -1869,8 +1881,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1869 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 1881 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1870 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 1882 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1871 1883
1884 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1885
1872 /* not used nids: 0, node, meta, (and root counted as valid node) */ 1886 /* not used nids: 0, node, meta, (and root counted as valid node) */
1873 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; 1887 nm_i->available_nids = nm_i->max_nid - 3;
1874 nm_i->fcnt = 0; 1888 nm_i->fcnt = 0;
1875 nm_i->nat_cnt = 0; 1889 nm_i->nat_cnt = 0;
1876 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 1890 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5decc1a375f0..7281112cd1c8 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -59,12 +59,12 @@ struct nat_entry {
59 do { \ 59 do { \
60 ne->checkpointed = false; \ 60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ 61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0); 62 } while (0)
63#define __clear_nat_cache_dirty(nm_i, ne) \ 63#define __clear_nat_cache_dirty(nm_i, ne) \
64 do { \ 64 do { \
65 ne->checkpointed = true; \ 65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \ 66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0); 67 } while (0)
68#define inc_node_version(version) (++version) 68#define inc_node_version(version) (++version)
69 69
70static inline void node_info_from_raw_nat(struct node_info *ni, 70static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -75,9 +75,18 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
75 ni->version = raw_ne->version; 75 ni->version = raw_ne->version;
76} 76}
77 77
78enum nid_type { 78static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
79 struct node_info *ni)
80{
81 raw_ne->ino = cpu_to_le32(ni->ino);
82 raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
83 raw_ne->version = ni->version;
84}
85
86enum mem_type {
79 FREE_NIDS, /* indicates the free nid list */ 87 FREE_NIDS, /* indicates the free nid list */
80 NAT_ENTRIES /* indicates the cached nat entry */ 88 NAT_ENTRIES, /* indicates the cached nat entry */
89 DIRTY_DENTS /* indicates dirty dentry pages */
81}; 90};
82 91
83/* 92/*
@@ -263,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
263{ 272{
264 struct f2fs_node *rn = F2FS_NODE(p); 273 struct f2fs_node *rn = F2FS_NODE(p);
265 274
266 wait_on_page_writeback(p); 275 f2fs_wait_on_page_writeback(p, NODE);
267 276
268 if (i) 277 if (i)
269 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); 278 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b1ae89f0f44e..a112368a4a86 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -46,15 +46,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
46 struct inode *dir, *einode; 46 struct inode *dir, *einode;
47 int err = 0; 47 int err = 0;
48 48
49 dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); 49 dir = f2fs_iget(inode->i_sb, pino);
50 if (!dir) { 50 if (IS_ERR(dir)) {
51 dir = f2fs_iget(inode->i_sb, pino); 51 err = PTR_ERR(dir);
52 if (IS_ERR(dir)) { 52 goto out;
53 err = PTR_ERR(dir);
54 goto out;
55 }
56 set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
57 add_dirty_dir_inode(dir);
58 } 53 }
59 54
60 name.len = le32_to_cpu(raw_inode->i_namelen); 55 name.len = le32_to_cpu(raw_inode->i_namelen);
@@ -63,7 +58,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
63 if (unlikely(name.len > F2FS_NAME_LEN)) { 58 if (unlikely(name.len > F2FS_NAME_LEN)) {
64 WARN_ON(1); 59 WARN_ON(1);
65 err = -ENAMETOOLONG; 60 err = -ENAMETOOLONG;
66 goto out; 61 goto out_err;
67 } 62 }
68retry: 63retry:
69 de = f2fs_find_entry(dir, &name, &page); 64 de = f2fs_find_entry(dir, &name, &page);
@@ -73,7 +68,8 @@ retry:
73 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 68 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
74 if (IS_ERR(einode)) { 69 if (IS_ERR(einode)) {
75 WARN_ON(1); 70 WARN_ON(1);
76 if (PTR_ERR(einode) == -ENOENT) 71 err = PTR_ERR(einode);
72 if (err == -ENOENT)
77 err = -EEXIST; 73 err = -EEXIST;
78 goto out_unmap_put; 74 goto out_unmap_put;
79 } 75 }
@@ -87,11 +83,23 @@ retry:
87 goto retry; 83 goto retry;
88 } 84 }
89 err = __f2fs_add_link(dir, &name, inode); 85 err = __f2fs_add_link(dir, &name, inode);
86 if (err)
87 goto out_err;
88
89 if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
90 iput(dir);
91 } else {
92 add_dirty_dir_inode(dir);
93 set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
94 }
95
90 goto out; 96 goto out;
91 97
92out_unmap_put: 98out_unmap_put:
93 kunmap(page); 99 kunmap(page);
94 f2fs_put_page(page, 0); 100 f2fs_put_page(page, 0);
101out_err:
102 iput(dir);
95out: 103out:
96 f2fs_msg(inode->i_sb, KERN_NOTICE, 104 f2fs_msg(inode->i_sb, KERN_NOTICE,
97 "%s: ino = %x, name = %s, dir = %lx, err = %d", 105 "%s: ino = %x, name = %s, dir = %lx, err = %d",
@@ -299,10 +307,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
299 goto out; 307 goto out;
300 308
301 start = start_bidx_of_node(ofs_of_node(page), fi); 309 start = start_bidx_of_node(ofs_of_node(page), fi);
302 if (IS_INODE(page)) 310 end = start + ADDRS_PER_PAGE(page, fi);
303 end = start + ADDRS_PER_INODE(fi);
304 else
305 end = start + ADDRS_PER_BLOCK;
306 311
307 f2fs_lock_op(sbi); 312 f2fs_lock_op(sbi);
308 313
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 085f548be7a3..f25f0e07e26f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -25,7 +25,6 @@
25#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
26 26
27static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *flush_cmd_slab;
29 28
30/* 29/*
31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 30 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -200,20 +199,20 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
200static int issue_flush_thread(void *data) 199static int issue_flush_thread(void *data)
201{ 200{
202 struct f2fs_sb_info *sbi = data; 201 struct f2fs_sb_info *sbi = data;
203 struct f2fs_sm_info *sm_i = SM_I(sbi); 202 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
204 wait_queue_head_t *q = &sm_i->flush_wait_queue; 203 wait_queue_head_t *q = &fcc->flush_wait_queue;
205repeat: 204repeat:
206 if (kthread_should_stop()) 205 if (kthread_should_stop())
207 return 0; 206 return 0;
208 207
209 spin_lock(&sm_i->issue_lock); 208 spin_lock(&fcc->issue_lock);
210 if (sm_i->issue_list) { 209 if (fcc->issue_list) {
211 sm_i->dispatch_list = sm_i->issue_list; 210 fcc->dispatch_list = fcc->issue_list;
212 sm_i->issue_list = sm_i->issue_tail = NULL; 211 fcc->issue_list = fcc->issue_tail = NULL;
213 } 212 }
214 spin_unlock(&sm_i->issue_lock); 213 spin_unlock(&fcc->issue_lock);
215 214
216 if (sm_i->dispatch_list) { 215 if (fcc->dispatch_list) {
217 struct bio *bio = bio_alloc(GFP_NOIO, 0); 216 struct bio *bio = bio_alloc(GFP_NOIO, 0);
218 struct flush_cmd *cmd, *next; 217 struct flush_cmd *cmd, *next;
219 int ret; 218 int ret;
@@ -221,47 +220,79 @@ repeat:
221 bio->bi_bdev = sbi->sb->s_bdev; 220 bio->bi_bdev = sbi->sb->s_bdev;
222 ret = submit_bio_wait(WRITE_FLUSH, bio); 221 ret = submit_bio_wait(WRITE_FLUSH, bio);
223 222
224 for (cmd = sm_i->dispatch_list; cmd; cmd = next) { 223 for (cmd = fcc->dispatch_list; cmd; cmd = next) {
225 cmd->ret = ret; 224 cmd->ret = ret;
226 next = cmd->next; 225 next = cmd->next;
227 complete(&cmd->wait); 226 complete(&cmd->wait);
228 } 227 }
229 sm_i->dispatch_list = NULL; 228 bio_put(bio);
229 fcc->dispatch_list = NULL;
230 } 230 }
231 231
232 wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list); 232 wait_event_interruptible(*q,
233 kthread_should_stop() || fcc->issue_list);
233 goto repeat; 234 goto repeat;
234} 235}
235 236
236int f2fs_issue_flush(struct f2fs_sb_info *sbi) 237int f2fs_issue_flush(struct f2fs_sb_info *sbi)
237{ 238{
238 struct f2fs_sm_info *sm_i = SM_I(sbi); 239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
239 struct flush_cmd *cmd; 240 struct flush_cmd cmd;
240 int ret;
241 241
242 if (!test_opt(sbi, FLUSH_MERGE)) 242 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244 244
245 cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC); 245 init_completion(&cmd.wait);
246 cmd->next = NULL; 246 cmd.next = NULL;
247 cmd->ret = 0;
248 init_completion(&cmd->wait);
249 247
250 spin_lock(&sm_i->issue_lock); 248 spin_lock(&fcc->issue_lock);
251 if (sm_i->issue_list) 249 if (fcc->issue_list)
252 sm_i->issue_tail->next = cmd; 250 fcc->issue_tail->next = &cmd;
253 else 251 else
254 sm_i->issue_list = cmd; 252 fcc->issue_list = &cmd;
255 sm_i->issue_tail = cmd; 253 fcc->issue_tail = &cmd;
256 spin_unlock(&sm_i->issue_lock); 254 spin_unlock(&fcc->issue_lock);
257 255
258 if (!sm_i->dispatch_list) 256 if (!fcc->dispatch_list)
259 wake_up(&sm_i->flush_wait_queue); 257 wake_up(&fcc->flush_wait_queue);
260 258
261 wait_for_completion(&cmd->wait); 259 wait_for_completion(&cmd.wait);
262 ret = cmd->ret; 260
263 kmem_cache_free(flush_cmd_slab, cmd); 261 return cmd.ret;
264 return ret; 262}
263
264int create_flush_cmd_control(struct f2fs_sb_info *sbi)
265{
266 dev_t dev = sbi->sb->s_bdev->bd_dev;
267 struct flush_cmd_control *fcc;
268 int err = 0;
269
270 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
271 if (!fcc)
272 return -ENOMEM;
273 spin_lock_init(&fcc->issue_lock);
274 init_waitqueue_head(&fcc->flush_wait_queue);
275 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
276 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
277 if (IS_ERR(fcc->f2fs_issue_flush)) {
278 err = PTR_ERR(fcc->f2fs_issue_flush);
279 kfree(fcc);
280 return err;
281 }
282 sbi->sm_info->cmd_control_info = fcc;
283
284 return err;
285}
286
287void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
288{
289 struct flush_cmd_control *fcc =
290 sbi->sm_info->cmd_control_info;
291
292 if (fcc && fcc->f2fs_issue_flush)
293 kthread_stop(fcc->f2fs_issue_flush);
294 kfree(fcc);
295 sbi->sm_info->cmd_control_info = NULL;
265} 296}
266 297
267static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 298static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -336,13 +367,26 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
336 mutex_unlock(&dirty_i->seglist_lock); 367 mutex_unlock(&dirty_i->seglist_lock);
337} 368}
338 369
339static void f2fs_issue_discard(struct f2fs_sb_info *sbi, 370static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
340 block_t blkstart, block_t blklen) 371 block_t blkstart, block_t blklen)
341{ 372{
342 sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); 373 sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
343 sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); 374 sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
344 blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
345 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); 375 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
376 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
377}
378
379void discard_next_dnode(struct f2fs_sb_info *sbi)
380{
381 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
382 block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
383
384 if (f2fs_issue_discard(sbi, blkaddr, 1)) {
385 struct page *page = grab_meta_page(sbi, blkaddr);
386 /* zero-filled page */
387 set_page_dirty(page);
388 f2fs_put_page(page, 1);
389 }
346} 390}
347 391
348static void add_discard_addrs(struct f2fs_sb_info *sbi, 392static void add_discard_addrs(struct f2fs_sb_info *sbi,
@@ -1832,7 +1876,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1832{ 1876{
1833 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1877 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1834 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1878 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1835 dev_t dev = sbi->sb->s_bdev->bd_dev;
1836 struct f2fs_sm_info *sm_info; 1879 struct f2fs_sm_info *sm_info;
1837 int err; 1880 int err;
1838 1881
@@ -1860,14 +1903,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1860 sm_info->nr_discards = 0; 1903 sm_info->nr_discards = 0;
1861 sm_info->max_discards = 0; 1904 sm_info->max_discards = 0;
1862 1905
1863 if (test_opt(sbi, FLUSH_MERGE)) { 1906 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
1864 spin_lock_init(&sm_info->issue_lock); 1907 err = create_flush_cmd_control(sbi);
1865 init_waitqueue_head(&sm_info->flush_wait_queue); 1908 if (err)
1866 1909 return err;
1867 sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
1868 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
1869 if (IS_ERR(sm_info->f2fs_issue_flush))
1870 return PTR_ERR(sm_info->f2fs_issue_flush);
1871 } 1910 }
1872 1911
1873 err = build_sit_info(sbi); 1912 err = build_sit_info(sbi);
@@ -1976,10 +2015,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
1976void destroy_segment_manager(struct f2fs_sb_info *sbi) 2015void destroy_segment_manager(struct f2fs_sb_info *sbi)
1977{ 2016{
1978 struct f2fs_sm_info *sm_info = SM_I(sbi); 2017 struct f2fs_sm_info *sm_info = SM_I(sbi);
2018
1979 if (!sm_info) 2019 if (!sm_info)
1980 return; 2020 return;
1981 if (sm_info->f2fs_issue_flush) 2021 destroy_flush_cmd_control(sbi);
1982 kthread_stop(sm_info->f2fs_issue_flush);
1983 destroy_dirty_segmap(sbi); 2022 destroy_dirty_segmap(sbi);
1984 destroy_curseg(sbi); 2023 destroy_curseg(sbi);
1985 destroy_free_segmap(sbi); 2024 destroy_free_segmap(sbi);
@@ -1994,17 +2033,10 @@ int __init create_segment_manager_caches(void)
1994 sizeof(struct discard_entry)); 2033 sizeof(struct discard_entry));
1995 if (!discard_entry_slab) 2034 if (!discard_entry_slab)
1996 return -ENOMEM; 2035 return -ENOMEM;
1997 flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
1998 sizeof(struct flush_cmd));
1999 if (!flush_cmd_slab) {
2000 kmem_cache_destroy(discard_entry_slab);
2001 return -ENOMEM;
2002 }
2003 return 0; 2036 return 0;
2004} 2037}
2005 2038
2006void destroy_segment_manager_caches(void) 2039void destroy_segment_manager_caches(void)
2007{ 2040{
2008 kmem_cache_destroy(discard_entry_slab); 2041 kmem_cache_destroy(discard_entry_slab);
2009 kmem_cache_destroy(flush_cmd_slab);
2010} 2042}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c756923a7302..b2b18637cb9e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -514,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
514{ 514{
515 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); 515 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
516 516
517 if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) 517 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
518 seq_printf(seq, ",background_gc=%s", "on"); 518 seq_printf(seq, ",background_gc=%s", "on");
519 else 519 else
520 seq_printf(seq, ",background_gc=%s", "off"); 520 seq_printf(seq, ",background_gc=%s", "off");
@@ -542,7 +542,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
542 seq_puts(seq, ",disable_ext_identify"); 542 seq_puts(seq, ",disable_ext_identify");
543 if (test_opt(sbi, INLINE_DATA)) 543 if (test_opt(sbi, INLINE_DATA))
544 seq_puts(seq, ",inline_data"); 544 seq_puts(seq, ",inline_data");
545 if (test_opt(sbi, FLUSH_MERGE)) 545 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge"); 546 seq_puts(seq, ",flush_merge");
547 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 547 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
548 548
@@ -594,6 +594,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
594 struct f2fs_sb_info *sbi = F2FS_SB(sb); 594 struct f2fs_sb_info *sbi = F2FS_SB(sb);
595 struct f2fs_mount_info org_mount_opt; 595 struct f2fs_mount_info org_mount_opt;
596 int err, active_logs; 596 int err, active_logs;
597 bool need_restart_gc = false;
598 bool need_stop_gc = false;
597 599
598 sync_filesystem(sb); 600 sync_filesystem(sb);
599 601
@@ -611,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
611 613
612 /* 614 /*
613 * Previous and new state of filesystem is RO, 615 * Previous and new state of filesystem is RO,
614 * so no point in checking GC conditions. 616 * so skip checking GC and FLUSH_MERGE conditions.
615 */ 617 */
616 if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 618 if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
617 goto skip; 619 goto skip;
@@ -625,18 +627,40 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
625 if (sbi->gc_thread) { 627 if (sbi->gc_thread) {
626 stop_gc_thread(sbi); 628 stop_gc_thread(sbi);
627 f2fs_sync_fs(sb, 1); 629 f2fs_sync_fs(sb, 1);
630 need_restart_gc = true;
628 } 631 }
629 } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { 632 } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
630 err = start_gc_thread(sbi); 633 err = start_gc_thread(sbi);
631 if (err) 634 if (err)
632 goto restore_opts; 635 goto restore_opts;
636 need_stop_gc = true;
637 }
638
639 /*
640 * We stop issue flush thread if FS is mounted as RO
641 * or if flush_merge is not passed in mount option.
642 */
643 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
644 destroy_flush_cmd_control(sbi);
645 } else if (test_opt(sbi, FLUSH_MERGE) &&
646 !sbi->sm_info->cmd_control_info) {
647 err = create_flush_cmd_control(sbi);
648 if (err)
649 goto restore_gc;
633 } 650 }
634skip: 651skip:
635 /* Update the POSIXACL Flag */ 652 /* Update the POSIXACL Flag */
636 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 653 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
637 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); 654 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
638 return 0; 655 return 0;
639 656restore_gc:
657 if (need_restart_gc) {
658 if (start_gc_thread(sbi))
659 f2fs_msg(sbi->sb, KERN_WARNING,
660 "background gc thread is stop");
661 } else if (need_stop_gc) {
662 stop_gc_thread(sbi);
663 }
640restore_opts: 664restore_opts:
641 sbi->mount_opt = org_mount_opt; 665 sbi->mount_opt = org_mount_opt;
642 sbi->active_logs = active_logs; 666 sbi->active_logs = active_logs;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 503c2451131e..8bea941ee309 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -26,7 +26,7 @@
26#include "xattr.h" 26#include "xattr.h"
27 27
28static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, 28static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
29 size_t list_size, const char *name, size_t name_len, int type) 29 size_t list_size, const char *name, size_t len, int type)
30{ 30{
31 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); 31 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
32 int total_len, prefix_len = 0; 32 int total_len, prefix_len = 0;
@@ -53,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
53 return -EINVAL; 53 return -EINVAL;
54 } 54 }
55 55
56 total_len = prefix_len + name_len + 1; 56 total_len = prefix_len + len + 1;
57 if (list && total_len <= list_size) { 57 if (list && total_len <= list_size) {
58 memcpy(list, prefix, prefix_len); 58 memcpy(list, prefix, prefix_len);
59 memcpy(list + prefix_len, name, name_len); 59 memcpy(list + prefix_len, name, len);
60 list[prefix_len + name_len] = '\0'; 60 list[prefix_len + len] = '\0';
61 } 61 }
62 return total_len; 62 return total_len;
63} 63}
@@ -108,11 +108,12 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
108 if (strcmp(name, "") == 0) 108 if (strcmp(name, "") == 0)
109 return -EINVAL; 109 return -EINVAL;
110 110
111 return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); 111 return f2fs_setxattr(dentry->d_inode, type, name,
112 value, size, NULL, flags);
112} 113}
113 114
114static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, 115static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
115 size_t list_size, const char *name, size_t name_len, int type) 116 size_t list_size, const char *name, size_t len, int type)
116{ 117{
117 const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; 118 const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
118 size_t size; 119 size_t size;
@@ -155,9 +156,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
155} 156}
156 157
157#ifdef CONFIG_F2FS_FS_SECURITY 158#ifdef CONFIG_F2FS_FS_SECURITY
158static int __f2fs_setxattr(struct inode *inode, int name_index,
159 const char *name, const void *value, size_t value_len,
160 struct page *ipage);
161static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, 159static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
162 void *page) 160 void *page)
163{ 161{
@@ -165,9 +163,9 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
165 int err = 0; 163 int err = 0;
166 164
167 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 165 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
168 err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, 166 err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
169 xattr->name, xattr->value, 167 xattr->name, xattr->value,
170 xattr->value_len, (struct page *)page); 168 xattr->value_len, (struct page *)page, 0);
171 if (err < 0) 169 if (err < 0)
172 break; 170 break;
173 } 171 }
@@ -241,26 +239,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = {
241 NULL, 239 NULL,
242}; 240};
243 241
244static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) 242static inline const struct xattr_handler *f2fs_xattr_handler(int index)
245{ 243{
246 const struct xattr_handler *handler = NULL; 244 const struct xattr_handler *handler = NULL;
247 245
248 if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) 246 if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
249 handler = f2fs_xattr_handler_map[name_index]; 247 handler = f2fs_xattr_handler_map[index];
250 return handler; 248 return handler;
251} 249}
252 250
253static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, 251static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
254 size_t name_len, const char *name) 252 size_t len, const char *name)
255{ 253{
256 struct f2fs_xattr_entry *entry; 254 struct f2fs_xattr_entry *entry;
257 255
258 list_for_each_xattr(entry, base_addr) { 256 list_for_each_xattr(entry, base_addr) {
259 if (entry->e_name_index != name_index) 257 if (entry->e_name_index != index)
260 continue; 258 continue;
261 if (entry->e_name_len != name_len) 259 if (entry->e_name_len != len)
262 continue; 260 continue;
263 if (!memcmp(entry->e_name, name, name_len)) 261 if (!memcmp(entry->e_name, name, len))
264 break; 262 break;
265 } 263 }
266 return entry; 264 return entry;
@@ -347,6 +345,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
347 345
348 if (ipage) { 346 if (ipage) {
349 inline_addr = inline_xattr_addr(ipage); 347 inline_addr = inline_xattr_addr(ipage);
348 f2fs_wait_on_page_writeback(ipage, NODE);
350 } else { 349 } else {
351 page = get_node_page(sbi, inode->i_ino); 350 page = get_node_page(sbi, inode->i_ino);
352 if (IS_ERR(page)) { 351 if (IS_ERR(page)) {
@@ -354,6 +353,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
354 return PTR_ERR(page); 353 return PTR_ERR(page);
355 } 354 }
356 inline_addr = inline_xattr_addr(page); 355 inline_addr = inline_xattr_addr(page);
356 f2fs_wait_on_page_writeback(page, NODE);
357 } 357 }
358 memcpy(inline_addr, txattr_addr, inline_size); 358 memcpy(inline_addr, txattr_addr, inline_size);
359 f2fs_put_page(page, 1); 359 f2fs_put_page(page, 1);
@@ -374,6 +374,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
374 return PTR_ERR(xpage); 374 return PTR_ERR(xpage);
375 } 375 }
376 f2fs_bug_on(new_nid); 376 f2fs_bug_on(new_nid);
377 f2fs_wait_on_page_writeback(xpage, NODE);
377 } else { 378 } else {
378 struct dnode_of_data dn; 379 struct dnode_of_data dn;
379 set_new_dnode(&dn, inode, NULL, NULL, new_nid); 380 set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -396,42 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
396 return 0; 397 return 0;
397} 398}
398 399
399int f2fs_getxattr(struct inode *inode, int name_index, const char *name, 400int f2fs_getxattr(struct inode *inode, int index, const char *name,
400 void *buffer, size_t buffer_size) 401 void *buffer, size_t buffer_size)
401{ 402{
402 struct f2fs_xattr_entry *entry; 403 struct f2fs_xattr_entry *entry;
403 void *base_addr; 404 void *base_addr;
404 int error = 0; 405 int error = 0;
405 size_t value_len, name_len; 406 size_t size, len;
406 407
407 if (name == NULL) 408 if (name == NULL)
408 return -EINVAL; 409 return -EINVAL;
409 name_len = strlen(name); 410
410 if (name_len > F2FS_NAME_LEN) 411 len = strlen(name);
412 if (len > F2FS_NAME_LEN)
411 return -ERANGE; 413 return -ERANGE;
412 414
413 base_addr = read_all_xattrs(inode, NULL); 415 base_addr = read_all_xattrs(inode, NULL);
414 if (!base_addr) 416 if (!base_addr)
415 return -ENOMEM; 417 return -ENOMEM;
416 418
417 entry = __find_xattr(base_addr, name_index, name_len, name); 419 entry = __find_xattr(base_addr, index, len, name);
418 if (IS_XATTR_LAST_ENTRY(entry)) { 420 if (IS_XATTR_LAST_ENTRY(entry)) {
419 error = -ENODATA; 421 error = -ENODATA;
420 goto cleanup; 422 goto cleanup;
421 } 423 }
422 424
423 value_len = le16_to_cpu(entry->e_value_size); 425 size = le16_to_cpu(entry->e_value_size);
424 426
425 if (buffer && value_len > buffer_size) { 427 if (buffer && size > buffer_size) {
426 error = -ERANGE; 428 error = -ERANGE;
427 goto cleanup; 429 goto cleanup;
428 } 430 }
429 431
430 if (buffer) { 432 if (buffer) {
431 char *pval = entry->e_name + entry->e_name_len; 433 char *pval = entry->e_name + entry->e_name_len;
432 memcpy(buffer, pval, value_len); 434 memcpy(buffer, pval, size);
433 } 435 }
434 error = value_len; 436 error = size;
435 437
436cleanup: 438cleanup:
437 kzfree(base_addr); 439 kzfree(base_addr);
@@ -475,15 +477,15 @@ cleanup:
475 return error; 477 return error;
476} 478}
477 479
478static int __f2fs_setxattr(struct inode *inode, int name_index, 480static int __f2fs_setxattr(struct inode *inode, int index,
479 const char *name, const void *value, size_t value_len, 481 const char *name, const void *value, size_t size,
480 struct page *ipage) 482 struct page *ipage, int flags)
481{ 483{
482 struct f2fs_inode_info *fi = F2FS_I(inode); 484 struct f2fs_inode_info *fi = F2FS_I(inode);
483 struct f2fs_xattr_entry *here, *last; 485 struct f2fs_xattr_entry *here, *last;
484 void *base_addr; 486 void *base_addr;
485 int found, newsize; 487 int found, newsize;
486 size_t name_len; 488 size_t len;
487 __u32 new_hsize; 489 __u32 new_hsize;
488 int error = -ENOMEM; 490 int error = -ENOMEM;
489 491
@@ -491,11 +493,11 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
491 return -EINVAL; 493 return -EINVAL;
492 494
493 if (value == NULL) 495 if (value == NULL)
494 value_len = 0; 496 size = 0;
495 497
496 name_len = strlen(name); 498 len = strlen(name);
497 499
498 if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) 500 if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
499 return -ERANGE; 501 return -ERANGE;
500 502
501 base_addr = read_all_xattrs(inode, ipage); 503 base_addr = read_all_xattrs(inode, ipage);
@@ -503,16 +505,23 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
503 goto exit; 505 goto exit;
504 506
505 /* find entry with wanted name. */ 507 /* find entry with wanted name. */
506 here = __find_xattr(base_addr, name_index, name_len, name); 508 here = __find_xattr(base_addr, index, len, name);
507 509
508 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; 510 found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
509 last = here;
510 511
512 if ((flags & XATTR_REPLACE) && !found) {
513 error = -ENODATA;
514 goto exit;
515 } else if ((flags & XATTR_CREATE) && found) {
516 error = -EEXIST;
517 goto exit;
518 }
519
520 last = here;
511 while (!IS_XATTR_LAST_ENTRY(last)) 521 while (!IS_XATTR_LAST_ENTRY(last))
512 last = XATTR_NEXT_ENTRY(last); 522 last = XATTR_NEXT_ENTRY(last);
513 523
514 newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + 524 newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
515 name_len + value_len);
516 525
517 /* 1. Check space */ 526 /* 1. Check space */
518 if (value) { 527 if (value) {
@@ -555,12 +564,12 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
555 * We just write new entry. 564 * We just write new entry.
556 */ 565 */
557 memset(last, 0, newsize); 566 memset(last, 0, newsize);
558 last->e_name_index = name_index; 567 last->e_name_index = index;
559 last->e_name_len = name_len; 568 last->e_name_len = len;
560 memcpy(last->e_name, name, name_len); 569 memcpy(last->e_name, name, len);
561 pval = last->e_name + name_len; 570 pval = last->e_name + len;
562 memcpy(pval, value, value_len); 571 memcpy(pval, value, size);
563 last->e_value_size = cpu_to_le16(value_len); 572 last->e_value_size = cpu_to_le16(size);
564 new_hsize += newsize; 573 new_hsize += newsize;
565 } 574 }
566 575
@@ -583,18 +592,23 @@ exit:
583 return error; 592 return error;
584} 593}
585 594
586int f2fs_setxattr(struct inode *inode, int name_index, const char *name, 595int f2fs_setxattr(struct inode *inode, int index, const char *name,
587 const void *value, size_t value_len, struct page *ipage) 596 const void *value, size_t size,
597 struct page *ipage, int flags)
588{ 598{
589 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 599 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
590 int err; 600 int err;
591 601
602 /* this case is only from init_inode_metadata */
603 if (ipage)
604 return __f2fs_setxattr(inode, index, name, value,
605 size, ipage, flags);
592 f2fs_balance_fs(sbi); 606 f2fs_balance_fs(sbi);
593 607
594 f2fs_lock_op(sbi); 608 f2fs_lock_op(sbi);
595 /* protect xattr_ver */ 609 /* protect xattr_ver */
596 down_write(&F2FS_I(inode)->i_sem); 610 down_write(&F2FS_I(inode)->i_sem);
597 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); 611 err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
598 up_write(&F2FS_I(inode)->i_sem); 612 up_write(&F2FS_I(inode)->i_sem);
599 f2fs_unlock_op(sbi); 613 f2fs_unlock_op(sbi);
600 614
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index b21d9ebdeff3..34ab7dbcf5e3 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -114,18 +114,18 @@ extern const struct xattr_handler f2fs_xattr_security_handler;
114extern const struct xattr_handler *f2fs_xattr_handlers[]; 114extern const struct xattr_handler *f2fs_xattr_handlers[];
115 115
116extern int f2fs_setxattr(struct inode *, int, const char *, 116extern int f2fs_setxattr(struct inode *, int, const char *,
117 const void *, size_t, struct page *); 117 const void *, size_t, struct page *, int);
118extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); 118extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
119extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); 119extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
120#else 120#else
121 121
122#define f2fs_xattr_handlers NULL 122#define f2fs_xattr_handlers NULL
123static inline int f2fs_setxattr(struct inode *inode, int name_index, 123static inline int f2fs_setxattr(struct inode *inode, int index,
124 const char *name, const void *value, size_t value_len) 124 const char *name, const void *value, size_t size, int flags)
125{ 125{
126 return -EOPNOTSUPP; 126 return -EOPNOTSUPP;
127} 127}
128static inline int f2fs_getxattr(struct inode *inode, int name_index, 128static inline int f2fs_getxattr(struct inode *inode, int index,
129 const char *name, void *buffer, size_t buffer_size) 129 const char *name, void *buffer, size_t buffer_size)
130{ 130{
131 return -EOPNOTSUPP; 131 return -EOPNOTSUPP;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index df53e1753a76..6ff0b0b42d47 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,6 +19,7 @@
19#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ 19#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */
20#define F2FS_BLKSIZE 4096 /* support only 4KB block */ 20#define F2FS_BLKSIZE 4096 /* support only 4KB block */
21#define F2FS_MAX_EXTENSION 64 /* # of extension entries */ 21#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
22#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
22 23
23#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ 24#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
24#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ 25#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
@@ -75,6 +76,7 @@ struct f2fs_super_block {
75 __le16 volume_name[512]; /* volume name */ 76 __le16 volume_name[512]; /* volume name */
76 __le32 extension_count; /* # of extensions below */ 77 __le32 extension_count; /* # of extensions below */
77 __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ 78 __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
79 __le32 cp_payload;
78} __packed; 80} __packed;
79 81
80/* 82/*
@@ -146,6 +148,9 @@ struct f2fs_extent {
146#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ 148#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
147#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ 149#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
148 150
151#define ADDRS_PER_PAGE(page, fi) \
152 (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
153
149#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) 154#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
150#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) 155#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
151#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) 156#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3)
@@ -391,6 +396,9 @@ typedef __le32 f2fs_hash_t;
391/* MAX level for dir lookup */ 396/* MAX level for dir lookup */
392#define MAX_DIR_HASH_DEPTH 63 397#define MAX_DIR_HASH_DEPTH 63
393 398
399/* MAX buckets in one level of dir */
400#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
401
394#define SIZE_OF_DIR_ENTRY 11 /* by byte */ 402#define SIZE_OF_DIR_ENTRY 11 /* by byte */
395#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ 403#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
396 BITS_PER_BYTE) 404 BITS_PER_BYTE)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 67f38faac589..b983990b4a9f 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -659,6 +659,66 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
659 TP_CONDITION(bio) 659 TP_CONDITION(bio)
660); 660);
661 661
662TRACE_EVENT(f2fs_write_begin,
663
664 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
665 unsigned int flags),
666
667 TP_ARGS(inode, pos, len, flags),
668
669 TP_STRUCT__entry(
670 __field(dev_t, dev)
671 __field(ino_t, ino)
672 __field(loff_t, pos)
673 __field(unsigned int, len)
674 __field(unsigned int, flags)
675 ),
676
677 TP_fast_assign(
678 __entry->dev = inode->i_sb->s_dev;
679 __entry->ino = inode->i_ino;
680 __entry->pos = pos;
681 __entry->len = len;
682 __entry->flags = flags;
683 ),
684
685 TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u",
686 show_dev_ino(__entry),
687 (unsigned long long)__entry->pos,
688 __entry->len,
689 __entry->flags)
690);
691
692TRACE_EVENT(f2fs_write_end,
693
694 TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
695 unsigned int copied),
696
697 TP_ARGS(inode, pos, len, copied),
698
699 TP_STRUCT__entry(
700 __field(dev_t, dev)
701 __field(ino_t, ino)
702 __field(loff_t, pos)
703 __field(unsigned int, len)
704 __field(unsigned int, copied)
705 ),
706
707 TP_fast_assign(
708 __entry->dev = inode->i_sb->s_dev;
709 __entry->ino = inode->i_ino;
710 __entry->pos = pos;
711 __entry->len = len;
712 __entry->copied = copied;
713 ),
714
715 TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u",
716 show_dev_ino(__entry),
717 (unsigned long long)__entry->pos,
718 __entry->len,
719 __entry->copied)
720);
721
662DECLARE_EVENT_CLASS(f2fs__page, 722DECLARE_EVENT_CLASS(f2fs__page,
663 723
664 TP_PROTO(struct page *page, int type), 724 TP_PROTO(struct page *page, int type),
@@ -672,6 +732,7 @@ DECLARE_EVENT_CLASS(f2fs__page,
672 __field(int, dir) 732 __field(int, dir)
673 __field(pgoff_t, index) 733 __field(pgoff_t, index)
674 __field(int, dirty) 734 __field(int, dirty)
735 __field(int, uptodate)
675 ), 736 ),
676 737
677 TP_fast_assign( 738 TP_fast_assign(
@@ -681,14 +742,31 @@ DECLARE_EVENT_CLASS(f2fs__page,
681 __entry->dir = S_ISDIR(page->mapping->host->i_mode); 742 __entry->dir = S_ISDIR(page->mapping->host->i_mode);
682 __entry->index = page->index; 743 __entry->index = page->index;
683 __entry->dirty = PageDirty(page); 744 __entry->dirty = PageDirty(page);
745 __entry->uptodate = PageUptodate(page);
684 ), 746 ),
685 747
686 TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d", 748 TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, "
749 "dirty = %d, uptodate = %d",
687 show_dev_ino(__entry), 750 show_dev_ino(__entry),
688 show_block_type(__entry->type), 751 show_block_type(__entry->type),
689 show_file_type(__entry->dir), 752 show_file_type(__entry->dir),
690 (unsigned long)__entry->index, 753 (unsigned long)__entry->index,
691 __entry->dirty) 754 __entry->dirty,
755 __entry->uptodate)
756);
757
758DEFINE_EVENT(f2fs__page, f2fs_writepage,
759
760 TP_PROTO(struct page *page, int type),
761
762 TP_ARGS(page, type)
763);
764
765DEFINE_EVENT(f2fs__page, f2fs_readpage,
766
767 TP_PROTO(struct page *page, int type),
768
769 TP_ARGS(page, type)
692); 770);
693 771
694DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, 772DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
@@ -705,6 +783,70 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
705 TP_ARGS(page, type) 783 TP_ARGS(page, type)
706); 784);
707 785
786TRACE_EVENT(f2fs_writepages,
787
788 TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
789
790 TP_ARGS(inode, wbc, type),
791
792 TP_STRUCT__entry(
793 __field(dev_t, dev)
794 __field(ino_t, ino)
795 __field(int, type)
796 __field(int, dir)
797 __field(long, nr_to_write)
798 __field(long, pages_skipped)
799 __field(loff_t, range_start)
800 __field(loff_t, range_end)
801 __field(pgoff_t, writeback_index)
802 __field(int, sync_mode)
803 __field(char, for_kupdate)
804 __field(char, for_background)
805 __field(char, tagged_writepages)
806 __field(char, for_reclaim)
807 __field(char, range_cyclic)
808 __field(char, for_sync)
809 ),
810
811 TP_fast_assign(
812 __entry->dev = inode->i_sb->s_dev;
813 __entry->ino = inode->i_ino;
814 __entry->type = type;
815 __entry->dir = S_ISDIR(inode->i_mode);
816 __entry->nr_to_write = wbc->nr_to_write;
817 __entry->pages_skipped = wbc->pages_skipped;
818 __entry->range_start = wbc->range_start;
819 __entry->range_end = wbc->range_end;
820 __entry->writeback_index = inode->i_mapping->writeback_index;
821 __entry->sync_mode = wbc->sync_mode;
822 __entry->for_kupdate = wbc->for_kupdate;
823 __entry->for_background = wbc->for_background;
824 __entry->tagged_writepages = wbc->tagged_writepages;
825 __entry->for_reclaim = wbc->for_reclaim;
826 __entry->range_cyclic = wbc->range_cyclic;
827 __entry->for_sync = wbc->for_sync;
828 ),
829
830 TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, "
831 "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, "
832 "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u",
833 show_dev_ino(__entry),
834 show_block_type(__entry->type),
835 show_file_type(__entry->dir),
836 __entry->nr_to_write,
837 __entry->pages_skipped,
838 __entry->range_start,
839 __entry->range_end,
840 (unsigned long)__entry->writeback_index,
841 __entry->sync_mode,
842 __entry->for_kupdate,
843 __entry->for_background,
844 __entry->tagged_writepages,
845 __entry->for_reclaim,
846 __entry->range_cyclic,
847 __entry->for_sync)
848);
849
708TRACE_EVENT(f2fs_submit_page_mbio, 850TRACE_EVENT(f2fs_submit_page_mbio,
709 851
710 TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), 852 TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),