aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 00:01:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 00:01:44 -0500
commitf9a03ae123c92c1f45cd2ca88d0f6edd787be78c (patch)
treec15c8b9b5732eb36b591bf570de63f3c30d252e1
parent1289ace5b4f70f1e68ce785735b82c7e483de863 (diff)
parent447135a86659c646017b8e707c1243c186bf2dff (diff)
Merge tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This series adds two ioctls to control cached data and fragmented files. Most of the rest fixes missing error cases and bugs that we have not covered so far. Summary: Enhancements: - support an ioctl to execute online file defragmentation - support an ioctl to flush cached data - speed up shrinking of extent_cache entries - handle broken superblock - refector dirty inode management infra - revisit f2fs_map_blocks to handle more cases - reduce global lock coverage - add detecting user's idle time Major bug fixes: - fix data race condition on cached nat entries - fix error cases of volatile and atomic writes" * tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (87 commits) f2fs: should unset atomic flag after successful commit f2fs: fix wrong memory condition check f2fs: monitor the number of background checkpoint f2fs: detect idle time depending on user behavior f2fs: introduce time and interval facility f2fs: skip releasing nodes in chindless extent tree f2fs: use atomic type for node count in extent tree f2fs: recognize encrypted data in f2fs_fiemap f2fs: clean up f2fs_balance_fs f2fs: remove redundant calls f2fs: avoid unnecessary f2fs_balance_fs calls f2fs: check the page status filled from disk f2fs: introduce __get_node_page to reuse common code f2fs: check node id earily when readaheading node page f2fs: read isize while holding i_mutex in fiemap Revert "f2fs: check the node block address of newly allocated nid" f2fs: cover more area with nat_tree_lock f2fs: introduce max_file_blocks in sbi f2fs crypto: check CONFIG_F2FS_FS_XATTR for encrypted symlink f2fs: introduce zombie list for fast shrinking extent trees ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs6
-rw-r--r--Documentation/filesystems/f2fs.txt10
-rw-r--r--fs/f2fs/checkpoint.c177
-rw-r--r--fs/f2fs/data.c377
-rw-r--r--fs/f2fs/debug.c29
-rw-r--r--fs/f2fs/dir.c38
-rw-r--r--fs/f2fs/extent_cache.c122
-rw-r--r--fs/f2fs/f2fs.h142
-rw-r--r--fs/f2fs/file.c344
-rw-r--r--fs/f2fs/gc.c9
-rw-r--r--fs/f2fs/gc.h8
-rw-r--r--fs/f2fs/inline.c9
-rw-r--r--fs/f2fs/inode.c28
-rw-r--r--fs/f2fs/namei.c59
-rw-r--r--fs/f2fs/node.c170
-rw-r--r--fs/f2fs/node.h6
-rw-r--r--fs/f2fs/recovery.c40
-rw-r--r--fs/f2fs/segment.c122
-rw-r--r--fs/f2fs/shrinker.c3
-rw-r--r--fs/f2fs/super.c253
-rw-r--r--fs/f2fs/xattr.c3
-rw-r--r--include/linux/f2fs_fs.h3
-rw-r--r--include/trace/events/f2fs.h38
23 files changed, 1266 insertions, 730 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 0345f2d1c727..e5200f354abf 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
87Description: 87Description:
88 Controls the checkpoint timing. 88 Controls the checkpoint timing.
89 89
90What: /sys/fs/f2fs/<disk>/idle_interval
91Date: January 2016
92Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
93Description:
94 Controls the idle timing.
95
90What: /sys/fs/f2fs/<disk>/ra_nid_pages 96What: /sys/fs/f2fs/<disk>/ra_nid_pages
91Date: October 2015 97Date: October 2015
92Contact: "Chao Yu" <chao2.yu@samsung.com> 98Contact: "Chao Yu" <chao2.yu@samsung.com>
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index b102b436563e..e1c9f0849da6 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
102 collection, triggered in background when I/O subsystem is 102 collection, triggered in background when I/O subsystem is
103 idle. If background_gc=on, it will turn on the garbage 103 idle. If background_gc=on, it will turn on the garbage
104 collection and if background_gc=off, garbage collection 104 collection and if background_gc=off, garbage collection
105 will be truned off. If background_gc=sync, it will turn 105 will be turned off. If background_gc=sync, it will turn
106 on synchronous garbage collection running in background. 106 on synchronous garbage collection running in background.
107 Default value for this option is on. So garbage 107 Default value for this option is on. So garbage
108 collection is on by default. 108 collection is on by default.
@@ -145,10 +145,12 @@ extent_cache Enable an extent cache based on rb-tree, it can cache
145 as many as extent which map between contiguous logical 145 as many as extent which map between contiguous logical
146 address and physical address per inode, resulting in 146 address and physical address per inode, resulting in
147 increasing the cache hit ratio. Set by default. 147 increasing the cache hit ratio. Set by default.
148noextent_cache Diable an extent cache based on rb-tree explicitly, see 148noextent_cache Disable an extent cache based on rb-tree explicitly, see
149 the above extent_cache mount option. 149 the above extent_cache mount option.
150noinline_data Disable the inline data feature, inline data feature is 150noinline_data Disable the inline data feature, inline data feature is
151 enabled by default. 151 enabled by default.
152data_flush Enable data flushing before checkpoint in order to
153 persist data of regular and symlink.
152 154
153================================================================================ 155================================================================================
154DEBUGFS ENTRIES 156DEBUGFS ENTRIES
@@ -192,7 +194,7 @@ Files in /sys/fs/f2fs/<devname>
192 policy for garbage collection. Setting gc_idle = 0 194 policy for garbage collection. Setting gc_idle = 0
193 (default) will disable this option. Setting 195 (default) will disable this option. Setting
194 gc_idle = 1 will select the Cost Benefit approach 196 gc_idle = 1 will select the Cost Benefit approach
195 & setting gc_idle = 2 will select the greedy aproach. 197 & setting gc_idle = 2 will select the greedy approach.
196 198
197 reclaim_segments This parameter controls the number of prefree 199 reclaim_segments This parameter controls the number of prefree
198 segments to be reclaimed. If the number of prefree 200 segments to be reclaimed. If the number of prefree
@@ -298,7 +300,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
298file. Each file is dump_ssa and dump_sit. 300file. Each file is dump_ssa and dump_sit.
299 301
300The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. 302The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
301It shows on-disk inode information reconized by a given inode number, and is 303It shows on-disk inode information recognized by a given inode number, and is
302able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and 304able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
303./dump_sit respectively. 305./dump_sit respectively.
304 306
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f661d80474be..3842af954cd5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
237 dec_page_count(sbi, F2FS_DIRTY_META); 237 dec_page_count(sbi, F2FS_DIRTY_META);
238 unlock_page(page); 238 unlock_page(page);
239 239
240 if (wbc->for_reclaim) 240 if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
241 f2fs_submit_merged_bio(sbi, META, WRITE); 241 f2fs_submit_merged_bio(sbi, META, WRITE);
242 return 0; 242 return 0;
243 243
@@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
410 spin_unlock(&im->ino_lock); 410 spin_unlock(&im->ino_lock);
411} 411}
412 412
413void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) 413void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
414{ 414{
415 /* add new dirty ino entry into list */ 415 /* add new dirty ino entry into list */
416 __add_ino_entry(sbi, ino, type); 416 __add_ino_entry(sbi, ino, type);
417} 417}
418 418
419void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) 419void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
420{ 420{
421 /* remove dirty ino entry from list */ 421 /* remove dirty ino entry from list */
422 __remove_ino_entry(sbi, ino, type); 422 __remove_ino_entry(sbi, ino, type);
@@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
434 return e ? true : false; 434 return e ? true : false;
435} 435}
436 436
437void release_dirty_inode(struct f2fs_sb_info *sbi) 437void release_ino_entry(struct f2fs_sb_info *sbi)
438{ 438{
439 struct ino_entry *e, *tmp; 439 struct ino_entry *e, *tmp;
440 int i; 440 int i;
@@ -722,47 +722,48 @@ fail_no_cp:
722 return -EINVAL; 722 return -EINVAL;
723} 723}
724 724
725static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) 725static void __add_dirty_inode(struct inode *inode, enum inode_type type)
726{ 726{
727 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 727 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
728 struct f2fs_inode_info *fi = F2FS_I(inode);
729 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
728 730
729 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) 731 if (is_inode_flag_set(fi, flag))
730 return -EEXIST; 732 return;
731 733
732 set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); 734 set_inode_flag(fi, flag);
733 F2FS_I(inode)->dirty_dir = new; 735 list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
734 list_add_tail(&new->list, &sbi->dir_inode_list); 736 stat_inc_dirty_inode(sbi, type);
735 stat_inc_dirty_dir(sbi); 737}
736 return 0; 738
739static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
740{
741 struct f2fs_inode_info *fi = F2FS_I(inode);
742 int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
743
744 if (get_dirty_pages(inode) ||
745 !is_inode_flag_set(F2FS_I(inode), flag))
746 return;
747
748 list_del_init(&fi->dirty_list);
749 clear_inode_flag(fi, flag);
750 stat_dec_dirty_inode(F2FS_I_SB(inode), type);
737} 751}
738 752
739void update_dirty_page(struct inode *inode, struct page *page) 753void update_dirty_page(struct inode *inode, struct page *page)
740{ 754{
741 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 755 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
742 struct inode_entry *new; 756 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
743 int ret = 0;
744 757
745 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 758 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
746 !S_ISLNK(inode->i_mode)) 759 !S_ISLNK(inode->i_mode))
747 return; 760 return;
748 761
749 if (!S_ISDIR(inode->i_mode)) { 762 spin_lock(&sbi->inode_lock[type]);
750 inode_inc_dirty_pages(inode); 763 __add_dirty_inode(inode, type);
751 goto out;
752 }
753
754 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
755 new->inode = inode;
756 INIT_LIST_HEAD(&new->list);
757
758 spin_lock(&sbi->dir_inode_lock);
759 ret = __add_dirty_inode(inode, new);
760 inode_inc_dirty_pages(inode); 764 inode_inc_dirty_pages(inode);
761 spin_unlock(&sbi->dir_inode_lock); 765 spin_unlock(&sbi->inode_lock[type]);
762 766
763 if (ret)
764 kmem_cache_free(inode_entry_slab, new);
765out:
766 SetPagePrivate(page); 767 SetPagePrivate(page);
767 f2fs_trace_pid(page); 768 f2fs_trace_pid(page);
768} 769}
@@ -770,70 +771,60 @@ out:
770void add_dirty_dir_inode(struct inode *inode) 771void add_dirty_dir_inode(struct inode *inode)
771{ 772{
772 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 773 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
773 struct inode_entry *new =
774 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
775 int ret = 0;
776
777 new->inode = inode;
778 INIT_LIST_HEAD(&new->list);
779 774
780 spin_lock(&sbi->dir_inode_lock); 775 spin_lock(&sbi->inode_lock[DIR_INODE]);
781 ret = __add_dirty_inode(inode, new); 776 __add_dirty_inode(inode, DIR_INODE);
782 spin_unlock(&sbi->dir_inode_lock); 777 spin_unlock(&sbi->inode_lock[DIR_INODE]);
783
784 if (ret)
785 kmem_cache_free(inode_entry_slab, new);
786} 778}
787 779
788void remove_dirty_dir_inode(struct inode *inode) 780void remove_dirty_inode(struct inode *inode)
789{ 781{
790 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 782 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
791 struct inode_entry *entry; 783 struct f2fs_inode_info *fi = F2FS_I(inode);
792 784 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
793 if (!S_ISDIR(inode->i_mode))
794 return;
795 785
796 spin_lock(&sbi->dir_inode_lock); 786 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
797 if (get_dirty_pages(inode) || 787 !S_ISLNK(inode->i_mode))
798 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
799 spin_unlock(&sbi->dir_inode_lock);
800 return; 788 return;
801 }
802 789
803 entry = F2FS_I(inode)->dirty_dir; 790 spin_lock(&sbi->inode_lock[type]);
804 list_del(&entry->list); 791 __remove_dirty_inode(inode, type);
805 F2FS_I(inode)->dirty_dir = NULL; 792 spin_unlock(&sbi->inode_lock[type]);
806 clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
807 stat_dec_dirty_dir(sbi);
808 spin_unlock(&sbi->dir_inode_lock);
809 kmem_cache_free(inode_entry_slab, entry);
810 793
811 /* Only from the recovery routine */ 794 /* Only from the recovery routine */
812 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 795 if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
813 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 796 clear_inode_flag(fi, FI_DELAY_IPUT);
814 iput(inode); 797 iput(inode);
815 } 798 }
816} 799}
817 800
818void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 801int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
819{ 802{
820 struct list_head *head; 803 struct list_head *head;
821 struct inode_entry *entry;
822 struct inode *inode; 804 struct inode *inode;
805 struct f2fs_inode_info *fi;
806 bool is_dir = (type == DIR_INODE);
807
808 trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
809 get_pages(sbi, is_dir ?
810 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
823retry: 811retry:
824 if (unlikely(f2fs_cp_error(sbi))) 812 if (unlikely(f2fs_cp_error(sbi)))
825 return; 813 return -EIO;
826 814
827 spin_lock(&sbi->dir_inode_lock); 815 spin_lock(&sbi->inode_lock[type]);
828 816
829 head = &sbi->dir_inode_list; 817 head = &sbi->inode_list[type];
830 if (list_empty(head)) { 818 if (list_empty(head)) {
831 spin_unlock(&sbi->dir_inode_lock); 819 spin_unlock(&sbi->inode_lock[type]);
832 return; 820 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
821 get_pages(sbi, is_dir ?
822 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
823 return 0;
833 } 824 }
834 entry = list_entry(head->next, struct inode_entry, list); 825 fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
835 inode = igrab(entry->inode); 826 inode = igrab(&fi->vfs_inode);
836 spin_unlock(&sbi->dir_inode_lock); 827 spin_unlock(&sbi->inode_lock[type]);
837 if (inode) { 828 if (inode) {
838 filemap_fdatawrite(inode->i_mapping); 829 filemap_fdatawrite(inode->i_mapping);
839 iput(inode); 830 iput(inode);
@@ -868,11 +859,9 @@ retry_flush_dents:
868 /* write all the dirty dentry pages */ 859 /* write all the dirty dentry pages */
869 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 860 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
870 f2fs_unlock_all(sbi); 861 f2fs_unlock_all(sbi);
871 sync_dirty_dir_inodes(sbi); 862 err = sync_dirty_inodes(sbi, DIR_INODE);
872 if (unlikely(f2fs_cp_error(sbi))) { 863 if (err)
873 err = -EIO;
874 goto out; 864 goto out;
875 }
876 goto retry_flush_dents; 865 goto retry_flush_dents;
877 } 866 }
878 867
@@ -885,10 +874,9 @@ retry_flush_nodes:
885 874
886 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 875 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
887 up_write(&sbi->node_write); 876 up_write(&sbi->node_write);
888 sync_node_pages(sbi, 0, &wbc); 877 err = sync_node_pages(sbi, 0, &wbc);
889 if (unlikely(f2fs_cp_error(sbi))) { 878 if (err) {
890 f2fs_unlock_all(sbi); 879 f2fs_unlock_all(sbi);
891 err = -EIO;
892 goto out; 880 goto out;
893 } 881 }
894 goto retry_flush_nodes; 882 goto retry_flush_nodes;
@@ -919,7 +907,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
919 finish_wait(&sbi->cp_wait, &wait); 907 finish_wait(&sbi->cp_wait, &wait);
920} 908}
921 909
922static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 910static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
923{ 911{
924 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 912 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
925 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 913 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -945,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
945 while (get_pages(sbi, F2FS_DIRTY_META)) { 933 while (get_pages(sbi, F2FS_DIRTY_META)) {
946 sync_meta_pages(sbi, META, LONG_MAX); 934 sync_meta_pages(sbi, META, LONG_MAX);
947 if (unlikely(f2fs_cp_error(sbi))) 935 if (unlikely(f2fs_cp_error(sbi)))
948 return; 936 return -EIO;
949 } 937 }
950 938
951 next_free_nid(sbi, &last_nid); 939 next_free_nid(sbi, &last_nid);
@@ -1030,7 +1018,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1030 /* need to wait for end_io results */ 1018 /* need to wait for end_io results */
1031 wait_on_all_pages_writeback(sbi); 1019 wait_on_all_pages_writeback(sbi);
1032 if (unlikely(f2fs_cp_error(sbi))) 1020 if (unlikely(f2fs_cp_error(sbi)))
1033 return; 1021 return -EIO;
1034 1022
1035 /* write out checkpoint buffer at block 0 */ 1023 /* write out checkpoint buffer at block 0 */
1036 update_meta_page(sbi, ckpt, start_blk++); 1024 update_meta_page(sbi, ckpt, start_blk++);
@@ -1058,7 +1046,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1058 wait_on_all_pages_writeback(sbi); 1046 wait_on_all_pages_writeback(sbi);
1059 1047
1060 if (unlikely(f2fs_cp_error(sbi))) 1048 if (unlikely(f2fs_cp_error(sbi)))
1061 return; 1049 return -EIO;
1062 1050
1063 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); 1051 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
1064 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); 1052 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
@@ -1081,22 +1069,25 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1081 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1069 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
1082 discard_blk); 1070 discard_blk);
1083 1071
1084 release_dirty_inode(sbi); 1072 release_ino_entry(sbi);
1085 1073
1086 if (unlikely(f2fs_cp_error(sbi))) 1074 if (unlikely(f2fs_cp_error(sbi)))
1087 return; 1075 return -EIO;
1088 1076
1089 clear_prefree_segments(sbi, cpc); 1077 clear_prefree_segments(sbi, cpc);
1090 clear_sbi_flag(sbi, SBI_IS_DIRTY); 1078 clear_sbi_flag(sbi, SBI_IS_DIRTY);
1079
1080 return 0;
1091} 1081}
1092 1082
1093/* 1083/*
1094 * We guarantee that this checkpoint procedure will not fail. 1084 * We guarantee that this checkpoint procedure will not fail.
1095 */ 1085 */
1096void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) 1086int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1097{ 1087{
1098 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1088 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1099 unsigned long long ckpt_ver; 1089 unsigned long long ckpt_ver;
1090 int err = 0;
1100 1091
1101 mutex_lock(&sbi->cp_mutex); 1092 mutex_lock(&sbi->cp_mutex);
1102 1093
@@ -1104,14 +1095,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1104 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || 1095 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
1105 (cpc->reason == CP_DISCARD && !sbi->discard_blks))) 1096 (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
1106 goto out; 1097 goto out;
1107 if (unlikely(f2fs_cp_error(sbi))) 1098 if (unlikely(f2fs_cp_error(sbi))) {
1099 err = -EIO;
1108 goto out; 1100 goto out;
1109 if (f2fs_readonly(sbi->sb)) 1101 }
1102 if (f2fs_readonly(sbi->sb)) {
1103 err = -EROFS;
1110 goto out; 1104 goto out;
1105 }
1111 1106
1112 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); 1107 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1113 1108
1114 if (block_operations(sbi)) 1109 err = block_operations(sbi);
1110 if (err)
1115 goto out; 1111 goto out;
1116 1112
1117 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); 1113 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
@@ -1133,7 +1129,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1133 flush_sit_entries(sbi, cpc); 1129 flush_sit_entries(sbi, cpc);
1134 1130
1135 /* unlock all the fs_lock[] in do_checkpoint() */ 1131 /* unlock all the fs_lock[] in do_checkpoint() */
1136 do_checkpoint(sbi, cpc); 1132 err = do_checkpoint(sbi, cpc);
1137 1133
1138 unblock_operations(sbi); 1134 unblock_operations(sbi);
1139 stat_inc_cp_count(sbi->stat_info); 1135 stat_inc_cp_count(sbi->stat_info);
@@ -1143,10 +1139,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1143 "checkpoint: version = %llx", ckpt_ver); 1139 "checkpoint: version = %llx", ckpt_ver);
1144 1140
1145 /* do checkpoint periodically */ 1141 /* do checkpoint periodically */
1146 sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval); 1142 f2fs_update_time(sbi, CP_TIME);
1143 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1147out: 1144out:
1148 mutex_unlock(&sbi->cp_mutex); 1145 mutex_unlock(&sbi->cp_mutex);
1149 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1146 return err;
1150} 1147}
1151 1148
1152void init_ino_entry_info(struct f2fs_sb_info *sbi) 1149void init_ino_entry_info(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 972eab7ac071..ac9e7c6aac74 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
225 /* Get physical address of data block */ 225 /* Get physical address of data block */
226 addr_array = blkaddr_in_node(rn); 226 addr_array = blkaddr_in_node(rn);
227 addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 227 addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
228 set_page_dirty(node_page); 228 if (set_page_dirty(node_page))
229 dn->node_changed = true;
229} 230}
230 231
231int reserve_new_block(struct dnode_of_data *dn) 232int reserve_new_block(struct dnode_of_data *dn)
@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
412 struct page *page; 413 struct page *page;
413 struct dnode_of_data dn; 414 struct dnode_of_data dn;
414 int err; 415 int err;
415repeat: 416
416 page = f2fs_grab_cache_page(mapping, index, true); 417 page = f2fs_grab_cache_page(mapping, index, true);
417 if (!page) { 418 if (!page) {
418 /* 419 /*
@@ -441,12 +442,11 @@ repeat:
441 } else { 442 } else {
442 f2fs_put_page(page, 1); 443 f2fs_put_page(page, 1);
443 444
444 page = get_read_data_page(inode, index, READ_SYNC, true); 445 /* if ipage exists, blkaddr should be NEW_ADDR */
446 f2fs_bug_on(F2FS_I_SB(inode), ipage);
447 page = get_lock_data_page(inode, index, true);
445 if (IS_ERR(page)) 448 if (IS_ERR(page))
446 goto repeat; 449 return page;
447
448 /* wait for read completion */
449 lock_page(page);
450 } 450 }
451got_it: 451got_it:
452 if (new_i_size && i_size_read(inode) < 452 if (new_i_size && i_size_read(inode) <
@@ -494,14 +494,10 @@ alloc:
494 if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)) 494 if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
495 i_size_write(dn->inode, 495 i_size_write(dn->inode,
496 ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT)); 496 ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
497
498 /* direct IO doesn't use extent cache to maximize the performance */
499 f2fs_drop_largest_extent(dn->inode, fofs);
500
501 return 0; 497 return 0;
502} 498}
503 499
504static void __allocate_data_blocks(struct inode *inode, loff_t offset, 500static int __allocate_data_blocks(struct inode *inode, loff_t offset,
505 size_t count) 501 size_t count)
506{ 502{
507 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 503 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
510 u64 len = F2FS_BYTES_TO_BLK(count); 506 u64 len = F2FS_BYTES_TO_BLK(count);
511 bool allocated; 507 bool allocated;
512 u64 end_offset; 508 u64 end_offset;
509 int err = 0;
513 510
514 while (len) { 511 while (len) {
515 f2fs_balance_fs(sbi);
516 f2fs_lock_op(sbi); 512 f2fs_lock_op(sbi);
517 513
518 /* When reading holes, we need its node page */ 514 /* When reading holes, we need its node page */
519 set_new_dnode(&dn, inode, NULL, NULL, 0); 515 set_new_dnode(&dn, inode, NULL, NULL, 0);
520 if (get_dnode_of_data(&dn, start, ALLOC_NODE)) 516 err = get_dnode_of_data(&dn, start, ALLOC_NODE);
517 if (err)
521 goto out; 518 goto out;
522 519
523 allocated = false; 520 allocated = false;
@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
526 while (dn.ofs_in_node < end_offset && len) { 523 while (dn.ofs_in_node < end_offset && len) {
527 block_t blkaddr; 524 block_t blkaddr;
528 525
529 if (unlikely(f2fs_cp_error(sbi))) 526 if (unlikely(f2fs_cp_error(sbi))) {
527 err = -EIO;
530 goto sync_out; 528 goto sync_out;
529 }
531 530
532 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 531 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
533 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { 532 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
534 if (__allocate_data_block(&dn)) 533 err = __allocate_data_block(&dn);
534 if (err)
535 goto sync_out; 535 goto sync_out;
536 allocated = true; 536 allocated = true;
537 } 537 }
@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
545 545
546 f2fs_put_dnode(&dn); 546 f2fs_put_dnode(&dn);
547 f2fs_unlock_op(sbi); 547 f2fs_unlock_op(sbi);
548
549 f2fs_balance_fs(sbi, dn.node_changed);
548 } 550 }
549 return; 551 return err;
550 552
551sync_out: 553sync_out:
552 if (allocated) 554 if (allocated)
@@ -554,7 +556,8 @@ sync_out:
554 f2fs_put_dnode(&dn); 556 f2fs_put_dnode(&dn);
555out: 557out:
556 f2fs_unlock_op(sbi); 558 f2fs_unlock_op(sbi);
557 return; 559 f2fs_balance_fs(sbi, dn.node_changed);
560 return err;
558} 561}
559 562
560/* 563/*
@@ -566,7 +569,7 @@ out:
566 * b. do not use extent cache for better performance 569 * b. do not use extent cache for better performance
567 * c. give the block addresses to blockdev 570 * c. give the block addresses to blockdev
568 */ 571 */
569static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, 572int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
570 int create, int flag) 573 int create, int flag)
571{ 574{
572 unsigned int maxblocks = map->m_len; 575 unsigned int maxblocks = map->m_len;
@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
577 int err = 0, ofs = 1; 580 int err = 0, ofs = 1;
578 struct extent_info ei; 581 struct extent_info ei;
579 bool allocated = false; 582 bool allocated = false;
583 block_t blkaddr;
580 584
581 map->m_len = 0; 585 map->m_len = 0;
582 map->m_flags = 0; 586 map->m_flags = 0;
@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
592 } 596 }
593 597
594 if (create) 598 if (create)
595 f2fs_lock_op(F2FS_I_SB(inode)); 599 f2fs_lock_op(sbi);
596 600
597 /* When reading holes, we need its node page */ 601 /* When reading holes, we need its node page */
598 set_new_dnode(&dn, inode, NULL, NULL, 0); 602 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
640 pgofs++; 644 pgofs++;
641 645
642get_next: 646get_next:
647 if (map->m_len >= maxblocks)
648 goto sync_out;
649
643 if (dn.ofs_in_node >= end_offset) { 650 if (dn.ofs_in_node >= end_offset) {
644 if (allocated) 651 if (allocated)
645 sync_inode_page(&dn); 652 sync_inode_page(&dn);
646 allocated = false; 653 allocated = false;
647 f2fs_put_dnode(&dn); 654 f2fs_put_dnode(&dn);
648 655
656 if (create) {
657 f2fs_unlock_op(sbi);
658 f2fs_balance_fs(sbi, dn.node_changed);
659 f2fs_lock_op(sbi);
660 }
661
649 set_new_dnode(&dn, inode, NULL, NULL, 0); 662 set_new_dnode(&dn, inode, NULL, NULL, 0);
650 err = get_dnode_of_data(&dn, pgofs, mode); 663 err = get_dnode_of_data(&dn, pgofs, mode);
651 if (err) { 664 if (err) {
@@ -657,52 +670,53 @@ get_next:
657 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 670 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
658 } 671 }
659 672
660 if (maxblocks > map->m_len) { 673 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
661 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
662 674
663 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { 675 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
664 if (create) { 676 if (create) {
665 if (unlikely(f2fs_cp_error(sbi))) { 677 if (unlikely(f2fs_cp_error(sbi))) {
666 err = -EIO; 678 err = -EIO;
667 goto sync_out; 679 goto sync_out;
668 }
669 err = __allocate_data_block(&dn);
670 if (err)
671 goto sync_out;
672 allocated = true;
673 map->m_flags |= F2FS_MAP_NEW;
674 blkaddr = dn.data_blkaddr;
675 } else {
676 /*
677 * we only merge preallocated unwritten blocks
678 * for fiemap.
679 */
680 if (flag != F2FS_GET_BLOCK_FIEMAP ||
681 blkaddr != NEW_ADDR)
682 goto sync_out;
683 } 680 }
681 err = __allocate_data_block(&dn);
682 if (err)
683 goto sync_out;
684 allocated = true;
685 map->m_flags |= F2FS_MAP_NEW;
686 blkaddr = dn.data_blkaddr;
687 } else {
688 /*
689 * we only merge preallocated unwritten blocks
690 * for fiemap.
691 */
692 if (flag != F2FS_GET_BLOCK_FIEMAP ||
693 blkaddr != NEW_ADDR)
694 goto sync_out;
684 } 695 }
696 }
685 697
686 /* Give more consecutive addresses for the readahead */ 698 /* Give more consecutive addresses for the readahead */
687 if ((map->m_pblk != NEW_ADDR && 699 if ((map->m_pblk != NEW_ADDR &&
688 blkaddr == (map->m_pblk + ofs)) || 700 blkaddr == (map->m_pblk + ofs)) ||
689 (map->m_pblk == NEW_ADDR && 701 (map->m_pblk == NEW_ADDR &&
690 blkaddr == NEW_ADDR)) { 702 blkaddr == NEW_ADDR)) {
691 ofs++; 703 ofs++;
692 dn.ofs_in_node++; 704 dn.ofs_in_node++;
693 pgofs++; 705 pgofs++;
694 map->m_len++; 706 map->m_len++;
695 goto get_next; 707 goto get_next;
696 }
697 } 708 }
709
698sync_out: 710sync_out:
699 if (allocated) 711 if (allocated)
700 sync_inode_page(&dn); 712 sync_inode_page(&dn);
701put_out: 713put_out:
702 f2fs_put_dnode(&dn); 714 f2fs_put_dnode(&dn);
703unlock_out: 715unlock_out:
704 if (create) 716 if (create) {
705 f2fs_unlock_op(F2FS_I_SB(inode)); 717 f2fs_unlock_op(sbi);
718 f2fs_balance_fs(sbi, dn.node_changed);
719 }
706out: 720out:
707 trace_f2fs_map_blocks(inode, map, err); 721 trace_f2fs_map_blocks(inode, map, err);
708 return err; 722 return err;
@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
742static int get_data_block_bmap(struct inode *inode, sector_t iblock, 756static int get_data_block_bmap(struct inode *inode, sector_t iblock,
743 struct buffer_head *bh_result, int create) 757 struct buffer_head *bh_result, int create)
744{ 758{
759 /* Block number less than F2FS MAX BLOCKS */
760 if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
761 return -EFBIG;
762
745 return __get_data_block(inode, iblock, bh_result, create, 763 return __get_data_block(inode, iblock, bh_result, create,
746 F2FS_GET_BLOCK_BMAP); 764 F2FS_GET_BLOCK_BMAP);
747} 765}
@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
761{ 779{
762 struct buffer_head map_bh; 780 struct buffer_head map_bh;
763 sector_t start_blk, last_blk; 781 sector_t start_blk, last_blk;
764 loff_t isize = i_size_read(inode); 782 loff_t isize;
765 u64 logical = 0, phys = 0, size = 0; 783 u64 logical = 0, phys = 0, size = 0;
766 u32 flags = 0; 784 u32 flags = 0;
767 bool past_eof = false, whole_file = false;
768 int ret = 0; 785 int ret = 0;
769 786
770 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); 787 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
@@ -779,16 +796,19 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
779 796
780 mutex_lock(&inode->i_mutex); 797 mutex_lock(&inode->i_mutex);
781 798
782 if (len >= isize) { 799 isize = i_size_read(inode);
783 whole_file = true; 800 if (start >= isize)
784 len = isize; 801 goto out;
785 } 802
803 if (start + len > isize)
804 len = isize - start;
786 805
787 if (logical_to_blk(inode, len) == 0) 806 if (logical_to_blk(inode, len) == 0)
788 len = blk_to_logical(inode, 1); 807 len = blk_to_logical(inode, 1);
789 808
790 start_blk = logical_to_blk(inode, start); 809 start_blk = logical_to_blk(inode, start);
791 last_blk = logical_to_blk(inode, start + len - 1); 810 last_blk = logical_to_blk(inode, start + len - 1);
811
792next: 812next:
793 memset(&map_bh, 0, sizeof(struct buffer_head)); 813 memset(&map_bh, 0, sizeof(struct buffer_head));
794 map_bh.b_size = len; 814 map_bh.b_size = len;
@@ -800,59 +820,37 @@ next:
800 820
801 /* HOLE */ 821 /* HOLE */
802 if (!buffer_mapped(&map_bh)) { 822 if (!buffer_mapped(&map_bh)) {
803 start_blk++; 823 /* Go through holes util pass the EOF */
804 824 if (blk_to_logical(inode, start_blk++) < isize)
805 if (!past_eof && blk_to_logical(inode, start_blk) >= isize) 825 goto prep_next;
806 past_eof = 1; 826 /* Found a hole beyond isize means no more extents.
807 827 * Note that the premise is that filesystems don't
808 if (past_eof && size) { 828 * punch holes beyond isize and keep size unchanged.
809 flags |= FIEMAP_EXTENT_LAST; 829 */
810 ret = fiemap_fill_next_extent(fieinfo, logical, 830 flags |= FIEMAP_EXTENT_LAST;
811 phys, size, flags); 831 }
812 } else if (size) {
813 ret = fiemap_fill_next_extent(fieinfo, logical,
814 phys, size, flags);
815 size = 0;
816 }
817 832
818 /* if we have holes up to/past EOF then we're done */ 833 if (size) {
819 if (start_blk > last_blk || past_eof || ret) 834 if (f2fs_encrypted_inode(inode))
820 goto out; 835 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
821 } else {
822 if (start_blk > last_blk && !whole_file) {
823 ret = fiemap_fill_next_extent(fieinfo, logical,
824 phys, size, flags);
825 goto out;
826 }
827 836
828 /* 837 ret = fiemap_fill_next_extent(fieinfo, logical,
829 * if size != 0 then we know we already have an extent 838 phys, size, flags);
830 * to add, so add it. 839 }
831 */
832 if (size) {
833 ret = fiemap_fill_next_extent(fieinfo, logical,
834 phys, size, flags);
835 if (ret)
836 goto out;
837 }
838 840
839 logical = blk_to_logical(inode, start_blk); 841 if (start_blk > last_blk || ret)
840 phys = blk_to_logical(inode, map_bh.b_blocknr); 842 goto out;
841 size = map_bh.b_size;
842 flags = 0;
843 if (buffer_unwritten(&map_bh))
844 flags = FIEMAP_EXTENT_UNWRITTEN;
845 843
846 start_blk += logical_to_blk(inode, size); 844 logical = blk_to_logical(inode, start_blk);
845 phys = blk_to_logical(inode, map_bh.b_blocknr);
846 size = map_bh.b_size;
847 flags = 0;
848 if (buffer_unwritten(&map_bh))
849 flags = FIEMAP_EXTENT_UNWRITTEN;
847 850
848 /* 851 start_blk += logical_to_blk(inode, size);
849 * If we are past the EOF, then we need to make sure as 852
850 * soon as we find a hole that the last extent we found 853prep_next:
851 * is marked with FIEMAP_EXTENT_LAST
852 */
853 if (!past_eof && logical + size >= isize)
854 past_eof = true;
855 }
856 cond_resched(); 854 cond_resched();
857 if (fatal_signal_pending(current)) 855 if (fatal_signal_pending(current))
858 ret = -EINTR; 856 ret = -EINTR;
@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
1083 */ 1081 */
1084 if (unlikely(fio->blk_addr != NEW_ADDR && 1082 if (unlikely(fio->blk_addr != NEW_ADDR &&
1085 !is_cold_data(page) && 1083 !is_cold_data(page) &&
1084 !IS_ATOMIC_WRITTEN_PAGE(page) &&
1086 need_inplace_update(inode))) { 1085 need_inplace_update(inode))) {
1087 rewrite_data_page(fio); 1086 rewrite_data_page(fio);
1088 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1087 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
@@ -1179,10 +1178,11 @@ out:
1179 if (err) 1178 if (err)
1180 ClearPageUptodate(page); 1179 ClearPageUptodate(page);
1181 unlock_page(page); 1180 unlock_page(page);
1182 if (need_balance_fs) 1181 f2fs_balance_fs(sbi, need_balance_fs);
1183 f2fs_balance_fs(sbi); 1182 if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
1184 if (wbc->for_reclaim)
1185 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1183 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1184 remove_dirty_inode(inode);
1185 }
1186 return 0; 1186 return 0;
1187 1187
1188redirty_out: 1188redirty_out:
@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1354 available_free_memory(sbi, DIRTY_DENTS)) 1354 available_free_memory(sbi, DIRTY_DENTS))
1355 goto skip_write; 1355 goto skip_write;
1356 1356
1357 /* skip writing during file defragment */
1358 if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
1359 goto skip_write;
1360
1357 /* during POR, we don't need to trigger writepage at all. */ 1361 /* during POR, we don't need to trigger writepage at all. */
1358 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1362 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1359 goto skip_write; 1363 goto skip_write;
@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1369 if (locked) 1373 if (locked)
1370 mutex_unlock(&sbi->writepages); 1374 mutex_unlock(&sbi->writepages);
1371 1375
1372 remove_dirty_dir_inode(inode); 1376 remove_dirty_inode(inode);
1373 1377
1374 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1378 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1375 return ret; 1379 return ret;
@@ -1382,13 +1386,85 @@ skip_write:
1382static void f2fs_write_failed(struct address_space *mapping, loff_t to) 1386static void f2fs_write_failed(struct address_space *mapping, loff_t to)
1383{ 1387{
1384 struct inode *inode = mapping->host; 1388 struct inode *inode = mapping->host;
1389 loff_t i_size = i_size_read(inode);
1385 1390
1386 if (to > inode->i_size) { 1391 if (to > i_size) {
1387 truncate_pagecache(inode, inode->i_size); 1392 truncate_pagecache(inode, i_size);
1388 truncate_blocks(inode, inode->i_size, true); 1393 truncate_blocks(inode, i_size, true);
1389 } 1394 }
1390} 1395}
1391 1396
1397static int prepare_write_begin(struct f2fs_sb_info *sbi,
1398 struct page *page, loff_t pos, unsigned len,
1399 block_t *blk_addr, bool *node_changed)
1400{
1401 struct inode *inode = page->mapping->host;
1402 pgoff_t index = page->index;
1403 struct dnode_of_data dn;
1404 struct page *ipage;
1405 bool locked = false;
1406 struct extent_info ei;
1407 int err = 0;
1408
1409 if (f2fs_has_inline_data(inode) ||
1410 (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
1411 f2fs_lock_op(sbi);
1412 locked = true;
1413 }
1414restart:
1415 /* check inline_data */
1416 ipage = get_node_page(sbi, inode->i_ino);
1417 if (IS_ERR(ipage)) {
1418 err = PTR_ERR(ipage);
1419 goto unlock_out;
1420 }
1421
1422 set_new_dnode(&dn, inode, ipage, ipage, 0);
1423
1424 if (f2fs_has_inline_data(inode)) {
1425 if (pos + len <= MAX_INLINE_DATA) {
1426 read_inline_data(page, ipage);
1427 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
1428 sync_inode_page(&dn);
1429 } else {
1430 err = f2fs_convert_inline_page(&dn, page);
1431 if (err)
1432 goto out;
1433 if (dn.data_blkaddr == NULL_ADDR)
1434 err = f2fs_get_block(&dn, index);
1435 }
1436 } else if (locked) {
1437 err = f2fs_get_block(&dn, index);
1438 } else {
1439 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1440 dn.data_blkaddr = ei.blk + index - ei.fofs;
1441 } else {
1442 bool restart = false;
1443
1444 /* hole case */
1445 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
1446 if (err || (!err && dn.data_blkaddr == NULL_ADDR))
1447 restart = true;
1448 if (restart) {
1449 f2fs_put_dnode(&dn);
1450 f2fs_lock_op(sbi);
1451 locked = true;
1452 goto restart;
1453 }
1454 }
1455 }
1456
1457 /* convert_inline_page can make node_changed */
1458 *blk_addr = dn.data_blkaddr;
1459 *node_changed = dn.node_changed;
1460out:
1461 f2fs_put_dnode(&dn);
1462unlock_out:
1463 if (locked)
1464 f2fs_unlock_op(sbi);
1465 return err;
1466}
1467
1392static int f2fs_write_begin(struct file *file, struct address_space *mapping, 1468static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1393 loff_t pos, unsigned len, unsigned flags, 1469 loff_t pos, unsigned len, unsigned flags,
1394 struct page **pagep, void **fsdata) 1470 struct page **pagep, void **fsdata)
@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1396 struct inode *inode = mapping->host; 1472 struct inode *inode = mapping->host;
1397 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1473 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1398 struct page *page = NULL; 1474 struct page *page = NULL;
1399 struct page *ipage;
1400 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 1475 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
1401 struct dnode_of_data dn; 1476 bool need_balance = false;
1477 block_t blkaddr = NULL_ADDR;
1402 int err = 0; 1478 int err = 0;
1403 1479
1404 trace_f2fs_write_begin(inode, pos, len, flags); 1480 trace_f2fs_write_begin(inode, pos, len, flags);
1405 1481
1406 f2fs_balance_fs(sbi);
1407
1408 /* 1482 /*
1409 * We should check this at this moment to avoid deadlock on inode page 1483 * We should check this at this moment to avoid deadlock on inode page
1410 * and #0 page. The locking rule for inline_data conversion should be: 1484 * and #0 page. The locking rule for inline_data conversion should be:
@@ -1424,41 +1498,27 @@ repeat:
1424 1498
1425 *pagep = page; 1499 *pagep = page;
1426 1500
1427 f2fs_lock_op(sbi); 1501 err = prepare_write_begin(sbi, page, pos, len,
1428 1502 &blkaddr, &need_balance);
1429 /* check inline_data */ 1503 if (err)
1430 ipage = get_node_page(sbi, inode->i_ino); 1504 goto fail;
1431 if (IS_ERR(ipage)) {
1432 err = PTR_ERR(ipage);
1433 goto unlock_fail;
1434 }
1435
1436 set_new_dnode(&dn, inode, ipage, ipage, 0);
1437 1505
1438 if (f2fs_has_inline_data(inode)) { 1506 if (need_balance && has_not_enough_free_secs(sbi, 0)) {
1439 if (pos + len <= MAX_INLINE_DATA) { 1507 unlock_page(page);
1440 read_inline_data(page, ipage); 1508 f2fs_balance_fs(sbi, true);
1441 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); 1509 lock_page(page);
1442 sync_inode_page(&dn); 1510 if (page->mapping != mapping) {
1443 goto put_next; 1511 /* The page got truncated from under us */
1512 f2fs_put_page(page, 1);
1513 goto repeat;
1444 } 1514 }
1445 err = f2fs_convert_inline_page(&dn, page);
1446 if (err)
1447 goto put_fail;
1448 } 1515 }
1449 1516
1450 err = f2fs_get_block(&dn, index);
1451 if (err)
1452 goto put_fail;
1453put_next:
1454 f2fs_put_dnode(&dn);
1455 f2fs_unlock_op(sbi);
1456
1457 f2fs_wait_on_page_writeback(page, DATA); 1517 f2fs_wait_on_page_writeback(page, DATA);
1458 1518
1459 /* wait for GCed encrypted page writeback */ 1519 /* wait for GCed encrypted page writeback */
1460 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1520 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
1461 f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); 1521 f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
1462 1522
1463 if (len == PAGE_CACHE_SIZE) 1523 if (len == PAGE_CACHE_SIZE)
1464 goto out_update; 1524 goto out_update;
@@ -1474,14 +1534,14 @@ put_next:
1474 goto out_update; 1534 goto out_update;
1475 } 1535 }
1476 1536
1477 if (dn.data_blkaddr == NEW_ADDR) { 1537 if (blkaddr == NEW_ADDR) {
1478 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 1538 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
1479 } else { 1539 } else {
1480 struct f2fs_io_info fio = { 1540 struct f2fs_io_info fio = {
1481 .sbi = sbi, 1541 .sbi = sbi,
1482 .type = DATA, 1542 .type = DATA,
1483 .rw = READ_SYNC, 1543 .rw = READ_SYNC,
1484 .blk_addr = dn.data_blkaddr, 1544 .blk_addr = blkaddr,
1485 .page = page, 1545 .page = page,
1486 .encrypted_page = NULL, 1546 .encrypted_page = NULL,
1487 }; 1547 };
@@ -1512,10 +1572,6 @@ out_clear:
1512 clear_cold_data(page); 1572 clear_cold_data(page);
1513 return 0; 1573 return 0;
1514 1574
1515put_fail:
1516 f2fs_put_dnode(&dn);
1517unlock_fail:
1518 f2fs_unlock_op(sbi);
1519fail: 1575fail:
1520 f2fs_put_page(page, 1); 1576 f2fs_put_page(page, 1);
1521 f2fs_write_failed(mapping, pos + len); 1577 f2fs_write_failed(mapping, pos + len);
@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
1540 } 1596 }
1541 1597
1542 f2fs_put_page(page, 1); 1598 f2fs_put_page(page, 1);
1599 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1543 return copied; 1600 return copied;
1544} 1601}
1545 1602
@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1567 int err; 1624 int err;
1568 1625
1569 /* we don't need to use inline_data strictly */ 1626 /* we don't need to use inline_data strictly */
1570 if (f2fs_has_inline_data(inode)) { 1627 err = f2fs_convert_inline_inode(inode);
1571 err = f2fs_convert_inline_inode(inode); 1628 if (err)
1572 if (err) 1629 return err;
1573 return err;
1574 }
1575 1630
1576 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1631 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
1577 return 0; 1632 return 0;
@@ -1583,11 +1638,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1583 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1638 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1584 1639
1585 if (iov_iter_rw(iter) == WRITE) { 1640 if (iov_iter_rw(iter) == WRITE) {
1586 __allocate_data_blocks(inode, offset, count); 1641 err = __allocate_data_blocks(inode, offset, count);
1587 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 1642 if (err)
1588 err = -EIO;
1589 goto out; 1643 goto out;
1590 }
1591 } 1644 }
1592 1645
1593 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); 1646 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ad1b18a7705b..4fb6ef88a34f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -38,12 +38,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
38 si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); 38 si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
39 si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; 39 si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
40 si->total_ext = atomic64_read(&sbi->total_hit_ext); 40 si->total_ext = atomic64_read(&sbi->total_hit_ext);
41 si->ext_tree = sbi->total_ext_tree; 41 si->ext_tree = atomic_read(&sbi->total_ext_tree);
42 si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
42 si->ext_node = atomic_read(&sbi->total_ext_node); 43 si->ext_node = atomic_read(&sbi->total_ext_node);
43 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 44 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
44 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); 45 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
45 si->ndirty_dirs = sbi->n_dirty_dirs;
46 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); 46 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
47 si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
48 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
49 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
47 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 50 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
48 si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); 51 si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
49 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 52 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -105,7 +108,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
105 108
106 bimodal = 0; 109 bimodal = 0;
107 total_vblocks = 0; 110 total_vblocks = 0;
108 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 111 blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
109 hblks_per_sec = blks_per_sec / 2; 112 hblks_per_sec = blks_per_sec / 2;
110 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { 113 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
111 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 114 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
@@ -189,10 +192,10 @@ get_cache:
189 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 192 si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
190 sizeof(struct nat_entry_set); 193 sizeof(struct nat_entry_set);
191 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 194 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
192 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
193 for (i = 0; i <= UPDATE_INO; i++) 195 for (i = 0; i <= UPDATE_INO; i++)
194 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 196 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
195 si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); 197 si->cache_mem += atomic_read(&sbi->total_ext_tree) *
198 sizeof(struct extent_tree);
196 si->cache_mem += atomic_read(&sbi->total_ext_node) * 199 si->cache_mem += atomic_read(&sbi->total_ext_node) *
197 sizeof(struct extent_node); 200 sizeof(struct extent_node);
198 201
@@ -267,7 +270,8 @@ static int stat_show(struct seq_file *s, void *v)
267 si->dirty_count); 270 si->dirty_count);
268 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", 271 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
269 si->prefree_count, si->free_segs, si->free_secs); 272 si->prefree_count, si->free_segs, si->free_secs);
270 seq_printf(s, "CP calls: %d\n", si->cp_count); 273 seq_printf(s, "CP calls: %d (BG: %d)\n",
274 si->cp_count, si->bg_cp_count);
271 seq_printf(s, "GC calls: %d (BG: %d)\n", 275 seq_printf(s, "GC calls: %d (BG: %d)\n",
272 si->call_count, si->bg_gc); 276 si->call_count, si->bg_gc);
273 seq_printf(s, " - data segments : %d (%d)\n", 277 seq_printf(s, " - data segments : %d (%d)\n",
@@ -288,8 +292,8 @@ static int stat_show(struct seq_file *s, void *v)
288 !si->total_ext ? 0 : 292 !si->total_ext ? 0 :
289 div64_u64(si->hit_total * 100, si->total_ext), 293 div64_u64(si->hit_total * 100, si->total_ext),
290 si->hit_total, si->total_ext); 294 si->hit_total, si->total_ext);
291 seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", 295 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
292 si->ext_tree, si->ext_node); 296 si->ext_tree, si->zombie_tree, si->ext_node);
293 seq_puts(s, "\nBalancing F2FS Async:\n"); 297 seq_puts(s, "\nBalancing F2FS Async:\n");
294 seq_printf(s, " - inmem: %4d, wb: %4d\n", 298 seq_printf(s, " - inmem: %4d, wb: %4d\n",
295 si->inmem_pages, si->wb_pages); 299 si->inmem_pages, si->wb_pages);
@@ -297,6 +301,8 @@ static int stat_show(struct seq_file *s, void *v)
297 si->ndirty_node, si->node_pages); 301 si->ndirty_node, si->node_pages);
298 seq_printf(s, " - dents: %4d in dirs:%4d\n", 302 seq_printf(s, " - dents: %4d in dirs:%4d\n",
299 si->ndirty_dent, si->ndirty_dirs); 303 si->ndirty_dent, si->ndirty_dirs);
304 seq_printf(s, " - datas: %4d in files:%4d\n",
305 si->ndirty_data, si->ndirty_files);
300 seq_printf(s, " - meta: %4d in %4d\n", 306 seq_printf(s, " - meta: %4d in %4d\n",
301 si->ndirty_meta, si->meta_pages); 307 si->ndirty_meta, si->meta_pages);
302 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 308 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
@@ -404,20 +410,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
404 kfree(si); 410 kfree(si);
405} 411}
406 412
407void __init f2fs_create_root_stats(void) 413int __init f2fs_create_root_stats(void)
408{ 414{
409 struct dentry *file; 415 struct dentry *file;
410 416
411 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 417 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
412 if (!f2fs_debugfs_root) 418 if (!f2fs_debugfs_root)
413 return; 419 return -ENOMEM;
414 420
415 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 421 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
416 NULL, &stat_fops); 422 NULL, &stat_fops);
417 if (!file) { 423 if (!file) {
418 debugfs_remove(f2fs_debugfs_root); 424 debugfs_remove(f2fs_debugfs_root);
419 f2fs_debugfs_root = NULL; 425 f2fs_debugfs_root = NULL;
426 return -ENOMEM;
420 } 427 }
428
429 return 0;
421} 430}
422 431
423void f2fs_destroy_root_stats(void) 432void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7c1678ba8f92..faa7495e2d7e 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
172 172
173 namehash = f2fs_dentry_hash(&name); 173 namehash = f2fs_dentry_hash(&name);
174 174
175 f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
176
177 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); 175 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
178 nblock = bucket_blocks(level); 176 nblock = bucket_blocks(level);
179 177
@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
238 goto out; 236 goto out;
239 237
240 max_depth = F2FS_I(dir)->i_current_depth; 238 max_depth = F2FS_I(dir)->i_current_depth;
239 if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
240 f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
241 "Corrupted max_depth of %lu: %u",
242 dir->i_ino, max_depth);
243 max_depth = MAX_DIR_HASH_DEPTH;
244 F2FS_I(dir)->i_current_depth = max_depth;
245 mark_inode_dirty(dir);
246 }
241 247
242 for (level = 0; level < max_depth; level++) { 248 for (level = 0; level < max_depth; level++) {
243 de = find_in_level(dir, level, &fname, res_page); 249 de = find_in_level(dir, level, &fname, res_page);
@@ -444,7 +450,7 @@ error:
444 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 450 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
445 truncate_inode_pages(&inode->i_data, 0); 451 truncate_inode_pages(&inode->i_data, 0);
446 truncate_blocks(inode, 0, false); 452 truncate_blocks(inode, 0, false);
447 remove_dirty_dir_inode(inode); 453 remove_dirty_inode(inode);
448 remove_inode_page(inode); 454 remove_inode_page(inode);
449 return ERR_PTR(err); 455 return ERR_PTR(err);
450} 456}
@@ -630,6 +636,7 @@ fail:
630 f2fs_put_page(dentry_page, 1); 636 f2fs_put_page(dentry_page, 1);
631out: 637out:
632 f2fs_fname_free_filename(&fname); 638 f2fs_fname_free_filename(&fname);
639 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
633 return err; 640 return err;
634} 641}
635 642
@@ -651,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
651 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); 658 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
652fail: 659fail:
653 up_write(&F2FS_I(inode)->i_sem); 660 up_write(&F2FS_I(inode)->i_sem);
661 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
654 return err; 662 return err;
655} 663}
656 664
@@ -695,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
695 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 703 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
696 int i; 704 int i;
697 705
706 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
707
698 if (f2fs_has_inline_dentry(dir)) 708 if (f2fs_has_inline_dentry(dir))
699 return f2fs_delete_inline_entry(dentry, page, dir, inode); 709 return f2fs_delete_inline_entry(dentry, page, dir, inode);
700 710
@@ -855,25 +865,27 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
855 865
856 for (; n < npages; n++) { 866 for (; n < npages; n++) {
857 dentry_page = get_lock_data_page(inode, n, false); 867 dentry_page = get_lock_data_page(inode, n, false);
858 if (IS_ERR(dentry_page)) 868 if (IS_ERR(dentry_page)) {
859 continue; 869 err = PTR_ERR(dentry_page);
870 if (err == -ENOENT)
871 continue;
872 else
873 goto out;
874 }
860 875
861 dentry_blk = kmap(dentry_page); 876 dentry_blk = kmap(dentry_page);
862 877
863 make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); 878 make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
864 879
865 if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) 880 if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
866 goto stop; 881 kunmap(dentry_page);
882 f2fs_put_page(dentry_page, 1);
883 break;
884 }
867 885
868 ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; 886 ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
869 kunmap(dentry_page); 887 kunmap(dentry_page);
870 f2fs_put_page(dentry_page, 1); 888 f2fs_put_page(dentry_page, 1);
871 dentry_page = NULL;
872 }
873stop:
874 if (dentry_page && !IS_ERR(dentry_page)) {
875 kunmap(dentry_page);
876 f2fs_put_page(dentry_page, 1);
877 } 889 }
878out: 890out:
879 f2fs_fname_crypto_free_buffer(&fstr); 891 f2fs_fname_crypto_free_buffer(&fstr);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 7ddba812e11b..ccd5c636d3fe 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
36 36
37 rb_link_node(&en->rb_node, parent, p); 37 rb_link_node(&en->rb_node, parent, p);
38 rb_insert_color(&en->rb_node, &et->root); 38 rb_insert_color(&en->rb_node, &et->root);
39 et->count++; 39 atomic_inc(&et->node_cnt);
40 atomic_inc(&sbi->total_ext_node); 40 atomic_inc(&sbi->total_ext_node);
41 return en; 41 return en;
42} 42}
@@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
45 struct extent_tree *et, struct extent_node *en) 45 struct extent_tree *et, struct extent_node *en)
46{ 46{
47 rb_erase(&en->rb_node, &et->root); 47 rb_erase(&en->rb_node, &et->root);
48 et->count--; 48 atomic_dec(&et->node_cnt);
49 atomic_dec(&sbi->total_ext_node); 49 atomic_dec(&sbi->total_ext_node);
50 50
51 if (et->cached_en == en) 51 if (et->cached_en == en)
@@ -68,11 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
68 et->root = RB_ROOT; 68 et->root = RB_ROOT;
69 et->cached_en = NULL; 69 et->cached_en = NULL;
70 rwlock_init(&et->lock); 70 rwlock_init(&et->lock);
71 atomic_set(&et->refcount, 0); 71 INIT_LIST_HEAD(&et->list);
72 et->count = 0; 72 atomic_set(&et->node_cnt, 0);
73 sbi->total_ext_tree++; 73 atomic_inc(&sbi->total_ext_tree);
74 } else {
75 atomic_dec(&sbi->total_zombie_tree);
76 list_del_init(&et->list);
74 } 77 }
75 atomic_inc(&et->refcount);
76 up_write(&sbi->extent_tree_lock); 78 up_write(&sbi->extent_tree_lock);
77 79
78 /* never died until evict_inode */ 80 /* never died until evict_inode */
@@ -131,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
131{ 133{
132 struct rb_node *node, *next; 134 struct rb_node *node, *next;
133 struct extent_node *en; 135 struct extent_node *en;
134 unsigned int count = et->count; 136 unsigned int count = atomic_read(&et->node_cnt);
135 137
136 node = rb_first(&et->root); 138 node = rb_first(&et->root);
137 while (node) { 139 while (node) {
@@ -152,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
152 node = next; 154 node = next;
153 } 155 }
154 156
155 return count - et->count; 157 return count - atomic_read(&et->node_cnt);
156} 158}
157 159
158static void __drop_largest_extent(struct inode *inode, 160static void __drop_largest_extent(struct inode *inode,
@@ -164,34 +166,33 @@ static void __drop_largest_extent(struct inode *inode,
164 largest->len = 0; 166 largest->len = 0;
165} 167}
166 168
167void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) 169/* return true, if inode page is changed */
168{ 170bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
169 if (!f2fs_may_extent_tree(inode))
170 return;
171
172 __drop_largest_extent(inode, fofs, 1);
173}
174
175void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
176{ 171{
177 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 172 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
178 struct extent_tree *et; 173 struct extent_tree *et;
179 struct extent_node *en; 174 struct extent_node *en;
180 struct extent_info ei; 175 struct extent_info ei;
181 176
182 if (!f2fs_may_extent_tree(inode)) 177 if (!f2fs_may_extent_tree(inode)) {
183 return; 178 /* drop largest extent */
179 if (i_ext && i_ext->len) {
180 i_ext->len = 0;
181 return true;
182 }
183 return false;
184 }
184 185
185 et = __grab_extent_tree(inode); 186 et = __grab_extent_tree(inode);
186 187
187 if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) 188 if (!i_ext || !i_ext->len)
188 return; 189 return false;
189 190
190 set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 191 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
191 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); 192 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
192 193
193 write_lock(&et->lock); 194 write_lock(&et->lock);
194 if (et->count) 195 if (atomic_read(&et->node_cnt))
195 goto out; 196 goto out;
196 197
197 en = __init_extent_tree(sbi, et, &ei); 198 en = __init_extent_tree(sbi, et, &ei);
@@ -202,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
202 } 203 }
203out: 204out:
204 write_unlock(&et->lock); 205 write_unlock(&et->lock);
206 return false;
205} 207}
206 208
207static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 209static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
@@ -549,45 +551,44 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
549unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 551unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
550{ 552{
551 struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; 553 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
554 struct extent_tree *et, *next;
552 struct extent_node *en, *tmp; 555 struct extent_node *en, *tmp;
553 unsigned long ino = F2FS_ROOT_INO(sbi); 556 unsigned long ino = F2FS_ROOT_INO(sbi);
554 struct radix_tree_root *root = &sbi->extent_tree_root;
555 unsigned int found; 557 unsigned int found;
556 unsigned int node_cnt = 0, tree_cnt = 0; 558 unsigned int node_cnt = 0, tree_cnt = 0;
557 int remained; 559 int remained;
560 bool do_free = false;
558 561
559 if (!test_opt(sbi, EXTENT_CACHE)) 562 if (!test_opt(sbi, EXTENT_CACHE))
560 return 0; 563 return 0;
561 564
565 if (!atomic_read(&sbi->total_zombie_tree))
566 goto free_node;
567
562 if (!down_write_trylock(&sbi->extent_tree_lock)) 568 if (!down_write_trylock(&sbi->extent_tree_lock))
563 goto out; 569 goto out;
564 570
565 /* 1. remove unreferenced extent tree */ 571 /* 1. remove unreferenced extent tree */
566 while ((found = radix_tree_gang_lookup(root, 572 list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
567 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 573 if (atomic_read(&et->node_cnt)) {
568 unsigned i; 574 write_lock(&et->lock);
569 575 node_cnt += __free_extent_tree(sbi, et, true);
570 ino = treevec[found - 1]->ino + 1; 576 write_unlock(&et->lock);
571 for (i = 0; i < found; i++) { 577 }
572 struct extent_tree *et = treevec[i];
573
574 if (!atomic_read(&et->refcount)) {
575 write_lock(&et->lock);
576 node_cnt += __free_extent_tree(sbi, et, true);
577 write_unlock(&et->lock);
578 578
579 radix_tree_delete(root, et->ino); 579 list_del_init(&et->list);
580 kmem_cache_free(extent_tree_slab, et); 580 radix_tree_delete(&sbi->extent_tree_root, et->ino);
581 sbi->total_ext_tree--; 581 kmem_cache_free(extent_tree_slab, et);
582 tree_cnt++; 582 atomic_dec(&sbi->total_ext_tree);
583 atomic_dec(&sbi->total_zombie_tree);
584 tree_cnt++;
583 585
584 if (node_cnt + tree_cnt >= nr_shrink) 586 if (node_cnt + tree_cnt >= nr_shrink)
585 goto unlock_out; 587 goto unlock_out;
586 }
587 }
588 } 588 }
589 up_write(&sbi->extent_tree_lock); 589 up_write(&sbi->extent_tree_lock);
590 590
591free_node:
591 /* 2. remove LRU extent entries */ 592 /* 2. remove LRU extent entries */
592 if (!down_write_trylock(&sbi->extent_tree_lock)) 593 if (!down_write_trylock(&sbi->extent_tree_lock))
593 goto out; 594 goto out;
@@ -599,15 +600,19 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
599 if (!remained--) 600 if (!remained--)
600 break; 601 break;
601 list_del_init(&en->list); 602 list_del_init(&en->list);
603 do_free = true;
602 } 604 }
603 spin_unlock(&sbi->extent_lock); 605 spin_unlock(&sbi->extent_lock);
604 606
607 if (do_free == false)
608 goto unlock_out;
609
605 /* 610 /*
606 * reset ino for searching victims from beginning of global extent tree. 611 * reset ino for searching victims from beginning of global extent tree.
607 */ 612 */
608 ino = F2FS_ROOT_INO(sbi); 613 ino = F2FS_ROOT_INO(sbi);
609 614
610 while ((found = radix_tree_gang_lookup(root, 615 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
611 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { 616 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
612 unsigned i; 617 unsigned i;
613 618
@@ -615,9 +620,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
615 for (i = 0; i < found; i++) { 620 for (i = 0; i < found; i++) {
616 struct extent_tree *et = treevec[i]; 621 struct extent_tree *et = treevec[i];
617 622
618 write_lock(&et->lock); 623 if (!atomic_read(&et->node_cnt))
619 node_cnt += __free_extent_tree(sbi, et, false); 624 continue;
620 write_unlock(&et->lock); 625
626 if (write_trylock(&et->lock)) {
627 node_cnt += __free_extent_tree(sbi, et, false);
628 write_unlock(&et->lock);
629 }
621 630
622 if (node_cnt + tree_cnt >= nr_shrink) 631 if (node_cnt + tree_cnt >= nr_shrink)
623 goto unlock_out; 632 goto unlock_out;
@@ -637,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
637 struct extent_tree *et = F2FS_I(inode)->extent_tree; 646 struct extent_tree *et = F2FS_I(inode)->extent_tree;
638 unsigned int node_cnt = 0; 647 unsigned int node_cnt = 0;
639 648
640 if (!et) 649 if (!et || !atomic_read(&et->node_cnt))
641 return 0; 650 return 0;
642 651
643 write_lock(&et->lock); 652 write_lock(&et->lock);
@@ -656,8 +665,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
656 if (!et) 665 if (!et)
657 return; 666 return;
658 667
659 if (inode->i_nlink && !is_bad_inode(inode) && et->count) { 668 if (inode->i_nlink && !is_bad_inode(inode) &&
660 atomic_dec(&et->refcount); 669 atomic_read(&et->node_cnt)) {
670 down_write(&sbi->extent_tree_lock);
671 list_add_tail(&et->list, &sbi->zombie_list);
672 atomic_inc(&sbi->total_zombie_tree);
673 up_write(&sbi->extent_tree_lock);
661 return; 674 return;
662 } 675 }
663 676
@@ -666,11 +679,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
666 679
667 /* delete extent tree entry in radix tree */ 680 /* delete extent tree entry in radix tree */
668 down_write(&sbi->extent_tree_lock); 681 down_write(&sbi->extent_tree_lock);
669 atomic_dec(&et->refcount); 682 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
670 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
671 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); 683 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
672 kmem_cache_free(extent_tree_slab, et); 684 kmem_cache_free(extent_tree_slab, et);
673 sbi->total_ext_tree--; 685 atomic_dec(&sbi->total_ext_tree);
674 up_write(&sbi->extent_tree_lock); 686 up_write(&sbi->extent_tree_lock);
675 687
676 F2FS_I(inode)->extent_tree = NULL; 688 F2FS_I(inode)->extent_tree = NULL;
@@ -722,7 +734,9 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
722 init_rwsem(&sbi->extent_tree_lock); 734 init_rwsem(&sbi->extent_tree_lock);
723 INIT_LIST_HEAD(&sbi->extent_list); 735 INIT_LIST_HEAD(&sbi->extent_list);
724 spin_lock_init(&sbi->extent_lock); 736 spin_lock_init(&sbi->extent_lock);
725 sbi->total_ext_tree = 0; 737 atomic_set(&sbi->total_ext_tree, 0);
738 INIT_LIST_HEAD(&sbi->zombie_list);
739 atomic_set(&sbi->total_zombie_tree, 0);
726 atomic_set(&sbi->total_ext_node, 0); 740 atomic_set(&sbi->total_ext_node, 0);
727} 741}
728 742
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ec6067c33a3f..ff79054c6cf6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -21,6 +21,7 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/bio.h> 23#include <linux/bio.h>
24#include <linux/blkdev.h>
24 25
25#ifdef CONFIG_F2FS_CHECK_FS 26#ifdef CONFIG_F2FS_CHECK_FS
26#define f2fs_bug_on(sbi, condition) BUG_ON(condition) 27#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -54,6 +55,7 @@
54#define F2FS_MOUNT_FASTBOOT 0x00001000 55#define F2FS_MOUNT_FASTBOOT 0x00001000
55#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 56#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
56#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 57#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
58#define F2FS_MOUNT_DATA_FLUSH 0x00008000
57 59
58#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 60#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
59#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 61#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -125,6 +127,7 @@ enum {
125#define BATCHED_TRIM_BLOCKS(sbi) \ 127#define BATCHED_TRIM_BLOCKS(sbi) \
126 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) 128 (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
127#define DEF_CP_INTERVAL 60 /* 60 secs */ 129#define DEF_CP_INTERVAL 60 /* 60 secs */
130#define DEF_IDLE_INTERVAL 120 /* 2 mins */
128 131
129struct cp_control { 132struct cp_control {
130 int reason; 133 int reason;
@@ -158,13 +161,7 @@ struct ino_entry {
158 nid_t ino; /* inode number */ 161 nid_t ino; /* inode number */
159}; 162};
160 163
161/* 164/* for the list of inodes to be GCed */
162 * for the list of directory inodes or gc inodes.
163 * NOTE: there are two slab users for this structure, if we add/modify/delete
164 * fields in structure for one of slab users, it may affect fields or size of
165 * other one, in this condition, it's better to split both of slab and related
166 * data structure.
167 */
168struct inode_entry { 165struct inode_entry {
169 struct list_head list; /* list head */ 166 struct list_head list; /* list head */
170 struct inode *inode; /* vfs inode pointer */ 167 struct inode *inode; /* vfs inode pointer */
@@ -234,6 +231,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
234#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 231#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
235#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) 232#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
236#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) 233#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
234#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
237 235
238#define F2FS_IOC_SET_ENCRYPTION_POLICY \ 236#define F2FS_IOC_SET_ENCRYPTION_POLICY \
239 _IOR('f', 19, struct f2fs_encryption_policy) 237 _IOR('f', 19, struct f2fs_encryption_policy)
@@ -256,10 +254,16 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
256/* 254/*
257 * ioctl commands in 32 bit emulation 255 * ioctl commands in 32 bit emulation
258 */ 256 */
259#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS 257#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
260#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS 258#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
259#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
261#endif 260#endif
262 261
262struct f2fs_defragment {
263 u64 start;
264 u64 len;
265};
266
263/* 267/*
264 * For INODE and NODE manager 268 * For INODE and NODE manager
265 */ 269 */
@@ -357,9 +361,9 @@ struct extent_tree {
357 struct rb_root root; /* root of extent info rb-tree */ 361 struct rb_root root; /* root of extent info rb-tree */
358 struct extent_node *cached_en; /* recently accessed extent node */ 362 struct extent_node *cached_en; /* recently accessed extent node */
359 struct extent_info largest; /* largested extent info */ 363 struct extent_info largest; /* largested extent info */
364 struct list_head list; /* to be used by sbi->zombie_list */
360 rwlock_t lock; /* protect extent info rb-tree */ 365 rwlock_t lock; /* protect extent info rb-tree */
361 atomic_t refcount; /* reference count of rb-tree */ 366 atomic_t node_cnt; /* # of extent node in rb-tree*/
362 unsigned int count; /* # of extent node in rb-tree*/
363}; 367};
364 368
365/* 369/*
@@ -434,8 +438,8 @@ struct f2fs_inode_info {
434 unsigned int clevel; /* maximum level of given file name */ 438 unsigned int clevel; /* maximum level of given file name */
435 nid_t i_xattr_nid; /* node id that contains xattrs */ 439 nid_t i_xattr_nid; /* node id that contains xattrs */
436 unsigned long long xattr_ver; /* cp version of xattr modification */ 440 unsigned long long xattr_ver; /* cp version of xattr modification */
437 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
438 441
442 struct list_head dirty_list; /* linked in global dirty list */
439 struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 443 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
440 struct mutex inmem_lock; /* lock for inmemory pages */ 444 struct mutex inmem_lock; /* lock for inmemory pages */
441 445
@@ -544,6 +548,7 @@ struct dnode_of_data {
544 nid_t nid; /* node id of the direct node block */ 548 nid_t nid; /* node id of the direct node block */
545 unsigned int ofs_in_node; /* data offset in the node page */ 549 unsigned int ofs_in_node; /* data offset in the node page */
546 bool inode_page_locked; /* inode page is locked or not */ 550 bool inode_page_locked; /* inode page is locked or not */
551 bool node_changed; /* is node block changed */
547 block_t data_blkaddr; /* block address of the node block */ 552 block_t data_blkaddr; /* block address of the node block */
548}; 553};
549 554
@@ -647,6 +652,7 @@ struct f2fs_sm_info {
647enum count_type { 652enum count_type {
648 F2FS_WRITEBACK, 653 F2FS_WRITEBACK,
649 F2FS_DIRTY_DENTS, 654 F2FS_DIRTY_DENTS,
655 F2FS_DIRTY_DATA,
650 F2FS_DIRTY_NODES, 656 F2FS_DIRTY_NODES,
651 F2FS_DIRTY_META, 657 F2FS_DIRTY_META,
652 F2FS_INMEM_PAGES, 658 F2FS_INMEM_PAGES,
@@ -695,6 +701,12 @@ struct f2fs_bio_info {
695 struct rw_semaphore io_rwsem; /* blocking op for bio */ 701 struct rw_semaphore io_rwsem; /* blocking op for bio */
696}; 702};
697 703
704enum inode_type {
705 DIR_INODE, /* for dirty dir inode */
706 FILE_INODE, /* for dirty regular/symlink inode */
707 NR_INODE_TYPE,
708};
709
698/* for inner inode cache management */ 710/* for inner inode cache management */
699struct inode_management { 711struct inode_management {
700 struct radix_tree_root ino_root; /* ino entry array */ 712 struct radix_tree_root ino_root; /* ino entry array */
@@ -711,11 +723,17 @@ enum {
711 SBI_POR_DOING, /* recovery is doing or not */ 723 SBI_POR_DOING, /* recovery is doing or not */
712}; 724};
713 725
726enum {
727 CP_TIME,
728 REQ_TIME,
729 MAX_TIME,
730};
731
714struct f2fs_sb_info { 732struct f2fs_sb_info {
715 struct super_block *sb; /* pointer to VFS super block */ 733 struct super_block *sb; /* pointer to VFS super block */
716 struct proc_dir_entry *s_proc; /* proc entry */ 734 struct proc_dir_entry *s_proc; /* proc entry */
717 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
718 struct f2fs_super_block *raw_super; /* raw super block pointer */ 735 struct f2fs_super_block *raw_super; /* raw super block pointer */
736 int valid_super_block; /* valid super block no */
719 int s_flag; /* flags for sbi */ 737 int s_flag; /* flags for sbi */
720 738
721 /* for node-related operations */ 739 /* for node-related operations */
@@ -737,23 +755,26 @@ struct f2fs_sb_info {
737 struct rw_semaphore node_write; /* locking node writes */ 755 struct rw_semaphore node_write; /* locking node writes */
738 struct mutex writepages; /* mutex for writepages() */ 756 struct mutex writepages; /* mutex for writepages() */
739 wait_queue_head_t cp_wait; 757 wait_queue_head_t cp_wait;
740 long cp_expires, cp_interval; /* next expected periodic cp */ 758 unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
759 long interval_time[MAX_TIME]; /* to store thresholds */
741 760
742 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 761 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
743 762
744 /* for orphan inode, use 0'th array */ 763 /* for orphan inode, use 0'th array */
745 unsigned int max_orphans; /* max orphan inodes */ 764 unsigned int max_orphans; /* max orphan inodes */
746 765
747 /* for directory inode management */ 766 /* for inode management */
748 struct list_head dir_inode_list; /* dir inode list */ 767 struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
749 spinlock_t dir_inode_lock; /* for dir inode list lock */ 768 spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
750 769
751 /* for extent tree cache */ 770 /* for extent tree cache */
752 struct radix_tree_root extent_tree_root;/* cache extent cache entries */ 771 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
753 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ 772 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
754 struct list_head extent_list; /* lru list for shrinker */ 773 struct list_head extent_list; /* lru list for shrinker */
755 spinlock_t extent_lock; /* locking extent lru list */ 774 spinlock_t extent_lock; /* locking extent lru list */
756 int total_ext_tree; /* extent tree count */ 775 atomic_t total_ext_tree; /* extent tree count */
776 struct list_head zombie_list; /* extent zombie tree list */
777 atomic_t total_zombie_tree; /* extent zombie tree count */
757 atomic_t total_ext_node; /* extent info count */ 778 atomic_t total_ext_node; /* extent info count */
758 779
759 /* basic filesystem units */ 780 /* basic filesystem units */
@@ -771,6 +792,7 @@ struct f2fs_sb_info {
771 unsigned int total_node_count; /* total node block count */ 792 unsigned int total_node_count; /* total node block count */
772 unsigned int total_valid_node_count; /* valid node block count */ 793 unsigned int total_valid_node_count; /* valid node block count */
773 unsigned int total_valid_inode_count; /* valid inode count */ 794 unsigned int total_valid_inode_count; /* valid inode count */
795 loff_t max_file_blocks; /* max block index of file */
774 int active_logs; /* # of active logs */ 796 int active_logs; /* # of active logs */
775 int dir_level; /* directory level */ 797 int dir_level; /* directory level */
776 798
@@ -809,7 +831,7 @@ struct f2fs_sb_info {
809 atomic_t inline_inode; /* # of inline_data inodes */ 831 atomic_t inline_inode; /* # of inline_data inodes */
810 atomic_t inline_dir; /* # of inline_dentry inodes */ 832 atomic_t inline_dir; /* # of inline_dentry inodes */
811 int bg_gc; /* background gc calls */ 833 int bg_gc; /* background gc calls */
812 unsigned int n_dirty_dirs; /* # of dir inodes */ 834 unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
813#endif 835#endif
814 unsigned int last_victim[2]; /* last victim segment # */ 836 unsigned int last_victim[2]; /* last victim segment # */
815 spinlock_t stat_lock; /* lock for stat operations */ 837 spinlock_t stat_lock; /* lock for stat operations */
@@ -824,6 +846,31 @@ struct f2fs_sb_info {
824 unsigned int shrinker_run_no; 846 unsigned int shrinker_run_no;
825}; 847};
826 848
849static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
850{
851 sbi->last_time[type] = jiffies;
852}
853
854static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
855{
856 struct timespec ts = {sbi->interval_time[type], 0};
857 unsigned long interval = timespec_to_jiffies(&ts);
858
859 return time_after(jiffies, sbi->last_time[type] + interval);
860}
861
862static inline bool is_idle(struct f2fs_sb_info *sbi)
863{
864 struct block_device *bdev = sbi->sb->s_bdev;
865 struct request_queue *q = bdev_get_queue(bdev);
866 struct request_list *rl = &q->root_rl;
867
868 if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
869 return 0;
870
871 return f2fs_time_over(sbi, REQ_TIME);
872}
873
827/* 874/*
828 * Inline functions 875 * Inline functions
829 */ 876 */
@@ -1059,8 +1106,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
1059static inline void inode_inc_dirty_pages(struct inode *inode) 1106static inline void inode_inc_dirty_pages(struct inode *inode)
1060{ 1107{
1061 atomic_inc(&F2FS_I(inode)->dirty_pages); 1108 atomic_inc(&F2FS_I(inode)->dirty_pages);
1062 if (S_ISDIR(inode->i_mode)) 1109 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1063 inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); 1110 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1064} 1111}
1065 1112
1066static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1113static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1075,9 +1122,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
1075 return; 1122 return;
1076 1123
1077 atomic_dec(&F2FS_I(inode)->dirty_pages); 1124 atomic_dec(&F2FS_I(inode)->dirty_pages);
1078 1125 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1079 if (S_ISDIR(inode->i_mode)) 1126 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1080 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
1081} 1127}
1082 1128
1083static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 1129static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1092,8 +1138,7 @@ static inline int get_dirty_pages(struct inode *inode)
1092 1138
1093static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 1139static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
1094{ 1140{
1095 unsigned int pages_per_sec = sbi->segs_per_sec * 1141 unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
1096 (1 << sbi->log_blocks_per_seg);
1097 return ((get_pages(sbi, block_type) + pages_per_sec - 1) 1142 return ((get_pages(sbi, block_type) + pages_per_sec - 1)
1098 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; 1143 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
1099} 1144}
@@ -1416,6 +1461,8 @@ enum {
1416 FI_DROP_CACHE, /* drop dirty page cache */ 1461 FI_DROP_CACHE, /* drop dirty page cache */
1417 FI_DATA_EXIST, /* indicate data exists */ 1462 FI_DATA_EXIST, /* indicate data exists */
1418 FI_INLINE_DOTS, /* indicate inline dot dentries */ 1463 FI_INLINE_DOTS, /* indicate inline dot dentries */
1464 FI_DO_DEFRAG, /* indicate defragment is running */
1465 FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
1419}; 1466};
1420 1467
1421static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1468static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1659,8 +1706,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
1659void f2fs_set_inode_flags(struct inode *); 1706void f2fs_set_inode_flags(struct inode *);
1660struct inode *f2fs_iget(struct super_block *, unsigned long); 1707struct inode *f2fs_iget(struct super_block *, unsigned long);
1661int try_to_free_nats(struct f2fs_sb_info *, int); 1708int try_to_free_nats(struct f2fs_sb_info *, int);
1662void update_inode(struct inode *, struct page *); 1709int update_inode(struct inode *, struct page *);
1663void update_inode_page(struct inode *); 1710int update_inode_page(struct inode *);
1664int f2fs_write_inode(struct inode *, struct writeback_control *); 1711int f2fs_write_inode(struct inode *, struct writeback_control *);
1665void f2fs_evict_inode(struct inode *); 1712void f2fs_evict_inode(struct inode *);
1666void handle_failed_inode(struct inode *); 1713void handle_failed_inode(struct inode *);
@@ -1765,7 +1812,7 @@ void destroy_node_manager_caches(void);
1765 */ 1812 */
1766void register_inmem_page(struct inode *, struct page *); 1813void register_inmem_page(struct inode *, struct page *);
1767int commit_inmem_pages(struct inode *, bool); 1814int commit_inmem_pages(struct inode *, bool);
1768void f2fs_balance_fs(struct f2fs_sb_info *); 1815void f2fs_balance_fs(struct f2fs_sb_info *, bool);
1769void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1816void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1770int f2fs_issue_flush(struct f2fs_sb_info *); 1817int f2fs_issue_flush(struct f2fs_sb_info *);
1771int create_flush_cmd_control(struct f2fs_sb_info *); 1818int create_flush_cmd_control(struct f2fs_sb_info *);
@@ -1811,9 +1858,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
1811int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool); 1858int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
1812void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); 1859void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
1813long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1860long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1814void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1861void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
1815void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1862void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
1816void release_dirty_inode(struct f2fs_sb_info *); 1863void release_ino_entry(struct f2fs_sb_info *);
1817bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1864bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1818int acquire_orphan_inode(struct f2fs_sb_info *); 1865int acquire_orphan_inode(struct f2fs_sb_info *);
1819void release_orphan_inode(struct f2fs_sb_info *); 1866void release_orphan_inode(struct f2fs_sb_info *);
@@ -1823,9 +1870,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *);
1823int get_valid_checkpoint(struct f2fs_sb_info *); 1870int get_valid_checkpoint(struct f2fs_sb_info *);
1824void update_dirty_page(struct inode *, struct page *); 1871void update_dirty_page(struct inode *, struct page *);
1825void add_dirty_dir_inode(struct inode *); 1872void add_dirty_dir_inode(struct inode *);
1826void remove_dirty_dir_inode(struct inode *); 1873void remove_dirty_inode(struct inode *);
1827void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1874int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
1828void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); 1875int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
1829void init_ino_entry_info(struct f2fs_sb_info *); 1876void init_ino_entry_info(struct f2fs_sb_info *);
1830int __init create_checkpoint_caches(void); 1877int __init create_checkpoint_caches(void);
1831void destroy_checkpoint_caches(void); 1878void destroy_checkpoint_caches(void);
@@ -1845,6 +1892,7 @@ struct page *find_data_page(struct inode *, pgoff_t);
1845struct page *get_lock_data_page(struct inode *, pgoff_t, bool); 1892struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
1846struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1893struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1847int do_write_data_page(struct f2fs_io_info *); 1894int do_write_data_page(struct f2fs_io_info *);
1895int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
1848int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1896int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1849void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1897void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1850int f2fs_release_page(struct page *, gfp_t); 1898int f2fs_release_page(struct page *, gfp_t);
@@ -1875,8 +1923,9 @@ struct f2fs_stat_info {
1875 int main_area_segs, main_area_sections, main_area_zones; 1923 int main_area_segs, main_area_sections, main_area_zones;
1876 unsigned long long hit_largest, hit_cached, hit_rbtree; 1924 unsigned long long hit_largest, hit_cached, hit_rbtree;
1877 unsigned long long hit_total, total_ext; 1925 unsigned long long hit_total, total_ext;
1878 int ext_tree, ext_node; 1926 int ext_tree, zombie_tree, ext_node;
1879 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1927 int ndirty_node, ndirty_meta;
1928 int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
1880 int nats, dirty_nats, sits, dirty_sits, fnids; 1929 int nats, dirty_nats, sits, dirty_sits, fnids;
1881 int total_count, utilization; 1930 int total_count, utilization;
1882 int bg_gc, inmem_pages, wb_pages; 1931 int bg_gc, inmem_pages, wb_pages;
@@ -1886,7 +1935,7 @@ struct f2fs_stat_info {
1886 int util_free, util_valid, util_invalid; 1935 int util_free, util_valid, util_invalid;
1887 int rsvd_segs, overp_segs; 1936 int rsvd_segs, overp_segs;
1888 int dirty_count, node_pages, meta_pages; 1937 int dirty_count, node_pages, meta_pages;
1889 int prefree_count, call_count, cp_count; 1938 int prefree_count, call_count, cp_count, bg_cp_count;
1890 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1939 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1891 int bg_node_segs, bg_data_segs; 1940 int bg_node_segs, bg_data_segs;
1892 int tot_blks, data_blks, node_blks; 1941 int tot_blks, data_blks, node_blks;
@@ -1907,10 +1956,11 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1907} 1956}
1908 1957
1909#define stat_inc_cp_count(si) ((si)->cp_count++) 1958#define stat_inc_cp_count(si) ((si)->cp_count++)
1959#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
1910#define stat_inc_call_count(si) ((si)->call_count++) 1960#define stat_inc_call_count(si) ((si)->call_count++)
1911#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1961#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1912#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1962#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
1913#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1963#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
1914#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) 1964#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
1915#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) 1965#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
1916#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) 1966#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
@@ -1985,14 +2035,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1985 2035
1986int f2fs_build_stats(struct f2fs_sb_info *); 2036int f2fs_build_stats(struct f2fs_sb_info *);
1987void f2fs_destroy_stats(struct f2fs_sb_info *); 2037void f2fs_destroy_stats(struct f2fs_sb_info *);
1988void __init f2fs_create_root_stats(void); 2038int __init f2fs_create_root_stats(void);
1989void f2fs_destroy_root_stats(void); 2039void f2fs_destroy_root_stats(void);
1990#else 2040#else
1991#define stat_inc_cp_count(si) 2041#define stat_inc_cp_count(si)
2042#define stat_inc_bg_cp_count(si)
1992#define stat_inc_call_count(si) 2043#define stat_inc_call_count(si)
1993#define stat_inc_bggc_count(si) 2044#define stat_inc_bggc_count(si)
1994#define stat_inc_dirty_dir(sbi) 2045#define stat_inc_dirty_inode(sbi, type)
1995#define stat_dec_dirty_dir(sbi) 2046#define stat_dec_dirty_inode(sbi, type)
1996#define stat_inc_total_hit(sb) 2047#define stat_inc_total_hit(sb)
1997#define stat_inc_rbtree_node_hit(sb) 2048#define stat_inc_rbtree_node_hit(sb)
1998#define stat_inc_largest_node_hit(sbi) 2049#define stat_inc_largest_node_hit(sbi)
@@ -2013,7 +2064,7 @@ void f2fs_destroy_root_stats(void);
2013 2064
2014static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 2065static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
2015static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 2066static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
2016static inline void __init f2fs_create_root_stats(void) { } 2067static inline int __init f2fs_create_root_stats(void) { return 0; }
2017static inline void f2fs_destroy_root_stats(void) { } 2068static inline void f2fs_destroy_root_stats(void) { }
2018#endif 2069#endif
2019 2070
@@ -2067,8 +2118,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *);
2067 * extent_cache.c 2118 * extent_cache.c
2068 */ 2119 */
2069unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); 2120unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
2070void f2fs_drop_largest_extent(struct inode *, pgoff_t); 2121bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
2071void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
2072unsigned int f2fs_destroy_extent_node(struct inode *); 2122unsigned int f2fs_destroy_extent_node(struct inode *);
2073void f2fs_destroy_extent_tree(struct inode *); 2123void f2fs_destroy_extent_tree(struct inode *);
2074bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); 2124bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a197215ad52b..18ddb1e5182a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
40 struct dnode_of_data dn; 40 struct dnode_of_data dn;
41 int err; 41 int err;
42 42
43 f2fs_balance_fs(sbi);
44
45 sb_start_pagefault(inode->i_sb); 43 sb_start_pagefault(inode->i_sb);
46 44
47 f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); 45 f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -57,6 +55,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
57 f2fs_put_dnode(&dn); 55 f2fs_put_dnode(&dn);
58 f2fs_unlock_op(sbi); 56 f2fs_unlock_op(sbi);
59 57
58 f2fs_balance_fs(sbi, dn.node_changed);
59
60 file_update_time(vma->vm_file); 60 file_update_time(vma->vm_file);
61 lock_page(page); 61 lock_page(page);
62 if (unlikely(page->mapping != inode->i_mapping || 62 if (unlikely(page->mapping != inode->i_mapping ||
@@ -96,6 +96,7 @@ mapped:
96 clear_cold_data(page); 96 clear_cold_data(page);
97out: 97out:
98 sb_end_pagefault(inode->i_sb); 98 sb_end_pagefault(inode->i_sb);
99 f2fs_update_time(sbi, REQ_TIME);
99 return block_page_mkwrite_return(err); 100 return block_page_mkwrite_return(err);
100} 101}
101 102
@@ -201,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
201 trace_f2fs_sync_file_enter(inode); 202 trace_f2fs_sync_file_enter(inode);
202 203
203 /* if fdatasync is triggered, let's do in-place-update */ 204 /* if fdatasync is triggered, let's do in-place-update */
204 if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) 205 if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
205 set_inode_flag(fi, FI_NEED_IPU); 206 set_inode_flag(fi, FI_NEED_IPU);
206 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 207 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
207 clear_inode_flag(fi, FI_NEED_IPU); 208 clear_inode_flag(fi, FI_NEED_IPU);
@@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
233 goto out; 234 goto out;
234 } 235 }
235go_write: 236go_write:
236 /* guarantee free sections for fsync */
237 f2fs_balance_fs(sbi);
238
239 /* 237 /*
240 * Both of fdatasync() and fsync() are able to be recovered from 238 * Both of fdatasync() and fsync() are able to be recovered from
241 * sudden-power-off. 239 * sudden-power-off.
@@ -261,8 +259,10 @@ sync_nodes:
261 sync_node_pages(sbi, ino, &wbc); 259 sync_node_pages(sbi, ino, &wbc);
262 260
263 /* if cp_error was enabled, we should avoid infinite loop */ 261 /* if cp_error was enabled, we should avoid infinite loop */
264 if (unlikely(f2fs_cp_error(sbi))) 262 if (unlikely(f2fs_cp_error(sbi))) {
263 ret = -EIO;
265 goto out; 264 goto out;
265 }
266 266
267 if (need_inode_block_update(sbi, ino)) { 267 if (need_inode_block_update(sbi, ino)) {
268 mark_inode_dirty_sync(inode); 268 mark_inode_dirty_sync(inode);
@@ -275,12 +275,13 @@ sync_nodes:
275 goto out; 275 goto out;
276 276
277 /* once recovery info is written, don't need to tack this */ 277 /* once recovery info is written, don't need to tack this */
278 remove_dirty_inode(sbi, ino, APPEND_INO); 278 remove_ino_entry(sbi, ino, APPEND_INO);
279 clear_inode_flag(fi, FI_APPEND_WRITE); 279 clear_inode_flag(fi, FI_APPEND_WRITE);
280flush_out: 280flush_out:
281 remove_dirty_inode(sbi, ino, UPDATE_INO); 281 remove_ino_entry(sbi, ino, UPDATE_INO);
282 clear_inode_flag(fi, FI_UPDATE_WRITE); 282 clear_inode_flag(fi, FI_UPDATE_WRITE);
283 ret = f2fs_issue_flush(sbi); 283 ret = f2fs_issue_flush(sbi);
284 f2fs_update_time(sbi, REQ_TIME);
284out: 285out:
285 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 286 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
286 f2fs_trace_ios(NULL, 1); 287 f2fs_trace_ios(NULL, 1);
@@ -418,19 +419,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
418static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) 419static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
419{ 420{
420 struct inode *inode = file_inode(file); 421 struct inode *inode = file_inode(file);
422 int err;
421 423
422 if (f2fs_encrypted_inode(inode)) { 424 if (f2fs_encrypted_inode(inode)) {
423 int err = f2fs_get_encryption_info(inode); 425 err = f2fs_get_encryption_info(inode);
424 if (err) 426 if (err)
425 return 0; 427 return 0;
426 } 428 }
427 429
428 /* we don't need to use inline_data strictly */ 430 /* we don't need to use inline_data strictly */
429 if (f2fs_has_inline_data(inode)) { 431 err = f2fs_convert_inline_inode(inode);
430 int err = f2fs_convert_inline_inode(inode); 432 if (err)
431 if (err) 433 return err;
432 return err;
433 }
434 434
435 file_accessed(file); 435 file_accessed(file);
436 vma->vm_ops = &f2fs_file_vm_ops; 436 vma->vm_ops = &f2fs_file_vm_ops;
@@ -483,11 +483,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
483 F2FS_I(dn->inode)) + ofs; 483 F2FS_I(dn->inode)) + ofs;
484 f2fs_update_extent_cache_range(dn, fofs, 0, len); 484 f2fs_update_extent_cache_range(dn, fofs, 0, len);
485 dec_valid_block_count(sbi, dn->inode, nr_free); 485 dec_valid_block_count(sbi, dn->inode, nr_free);
486 set_page_dirty(dn->node_page);
487 sync_inode_page(dn); 486 sync_inode_page(dn);
488 } 487 }
489 dn->ofs_in_node = ofs; 488 dn->ofs_in_node = ofs;
490 489
490 f2fs_update_time(sbi, REQ_TIME);
491 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, 491 trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
492 dn->ofs_in_node, nr_free); 492 dn->ofs_in_node, nr_free);
493 return nr_free; 493 return nr_free;
@@ -604,7 +604,7 @@ int f2fs_truncate(struct inode *inode, bool lock)
604 trace_f2fs_truncate(inode); 604 trace_f2fs_truncate(inode);
605 605
606 /* we should check inline_data size */ 606 /* we should check inline_data size */
607 if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { 607 if (!f2fs_may_inline_data(inode)) {
608 err = f2fs_convert_inline_inode(inode); 608 err = f2fs_convert_inline_inode(inode);
609 if (err) 609 if (err)
610 return err; 610 return err;
@@ -679,13 +679,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
679 err = f2fs_truncate(inode, true); 679 err = f2fs_truncate(inode, true);
680 if (err) 680 if (err)
681 return err; 681 return err;
682 f2fs_balance_fs(F2FS_I_SB(inode)); 682 f2fs_balance_fs(F2FS_I_SB(inode), true);
683 } else { 683 } else {
684 /* 684 /*
685 * do not trim all blocks after i_size if target size is 685 * do not trim all blocks after i_size if target size is
686 * larger than i_size. 686 * larger than i_size.
687 */ 687 */
688 truncate_setsize(inode, attr->ia_size); 688 truncate_setsize(inode, attr->ia_size);
689
690 /* should convert inline inode here */
691 if (!f2fs_may_inline_data(inode)) {
692 err = f2fs_convert_inline_inode(inode);
693 if (err)
694 return err;
695 }
689 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 696 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
690 } 697 }
691 } 698 }
@@ -727,7 +734,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
727 if (!len) 734 if (!len)
728 return 0; 735 return 0;
729 736
730 f2fs_balance_fs(sbi); 737 f2fs_balance_fs(sbi, true);
731 738
732 f2fs_lock_op(sbi); 739 f2fs_lock_op(sbi);
733 page = get_new_data_page(inode, NULL, index, false); 740 page = get_new_data_page(inode, NULL, index, false);
@@ -778,13 +785,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
778{ 785{
779 pgoff_t pg_start, pg_end; 786 pgoff_t pg_start, pg_end;
780 loff_t off_start, off_end; 787 loff_t off_start, off_end;
781 int ret = 0; 788 int ret;
782 789
783 if (f2fs_has_inline_data(inode)) { 790 ret = f2fs_convert_inline_inode(inode);
784 ret = f2fs_convert_inline_inode(inode); 791 if (ret)
785 if (ret) 792 return ret;
786 return ret;
787 }
788 793
789 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 794 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
790 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; 795 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -815,7 +820,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
815 loff_t blk_start, blk_end; 820 loff_t blk_start, blk_end;
816 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 821 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
817 822
818 f2fs_balance_fs(sbi); 823 f2fs_balance_fs(sbi, true);
819 824
820 blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT; 825 blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
821 blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT; 826 blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
@@ -918,7 +923,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
918 int ret = 0; 923 int ret = 0;
919 924
920 for (; end < nrpages; start++, end++) { 925 for (; end < nrpages; start++, end++) {
921 f2fs_balance_fs(sbi); 926 f2fs_balance_fs(sbi, true);
922 f2fs_lock_op(sbi); 927 f2fs_lock_op(sbi);
923 ret = __exchange_data_block(inode, end, start, true); 928 ret = __exchange_data_block(inode, end, start, true);
924 f2fs_unlock_op(sbi); 929 f2fs_unlock_op(sbi);
@@ -941,13 +946,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
941 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 946 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
942 return -EINVAL; 947 return -EINVAL;
943 948
944 f2fs_balance_fs(F2FS_I_SB(inode)); 949 ret = f2fs_convert_inline_inode(inode);
945 950 if (ret)
946 if (f2fs_has_inline_data(inode)) { 951 return ret;
947 ret = f2fs_convert_inline_inode(inode);
948 if (ret)
949 return ret;
950 }
951 952
952 pg_start = offset >> PAGE_CACHE_SHIFT; 953 pg_start = offset >> PAGE_CACHE_SHIFT;
953 pg_end = (offset + len) >> PAGE_CACHE_SHIFT; 954 pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
@@ -991,13 +992,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
991 if (ret) 992 if (ret)
992 return ret; 993 return ret;
993 994
994 f2fs_balance_fs(sbi); 995 ret = f2fs_convert_inline_inode(inode);
995 996 if (ret)
996 if (f2fs_has_inline_data(inode)) { 997 return ret;
997 ret = f2fs_convert_inline_inode(inode);
998 if (ret)
999 return ret;
1000 }
1001 998
1002 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); 999 ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1003 if (ret) 1000 if (ret)
@@ -1104,13 +1101,11 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1104 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 1101 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1105 return -EINVAL; 1102 return -EINVAL;
1106 1103
1107 f2fs_balance_fs(sbi); 1104 ret = f2fs_convert_inline_inode(inode);
1105 if (ret)
1106 return ret;
1108 1107
1109 if (f2fs_has_inline_data(inode)) { 1108 f2fs_balance_fs(sbi, true);
1110 ret = f2fs_convert_inline_inode(inode);
1111 if (ret)
1112 return ret;
1113 }
1114 1109
1115 ret = truncate_blocks(inode, i_size_read(inode), true); 1110 ret = truncate_blocks(inode, i_size_read(inode), true);
1116 if (ret) 1111 if (ret)
@@ -1154,17 +1149,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1154 loff_t off_start, off_end; 1149 loff_t off_start, off_end;
1155 int ret = 0; 1150 int ret = 0;
1156 1151
1157 f2fs_balance_fs(sbi);
1158
1159 ret = inode_newsize_ok(inode, (len + offset)); 1152 ret = inode_newsize_ok(inode, (len + offset));
1160 if (ret) 1153 if (ret)
1161 return ret; 1154 return ret;
1162 1155
1163 if (f2fs_has_inline_data(inode)) { 1156 ret = f2fs_convert_inline_inode(inode);
1164 ret = f2fs_convert_inline_inode(inode); 1157 if (ret)
1165 if (ret) 1158 return ret;
1166 return ret; 1159
1167 } 1160 f2fs_balance_fs(sbi, true);
1168 1161
1169 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 1162 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
1170 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; 1163 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -1246,6 +1239,7 @@ static long f2fs_fallocate(struct file *file, int mode,
1246 if (!ret) { 1239 if (!ret) {
1247 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1240 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1248 mark_inode_dirty(inode); 1241 mark_inode_dirty(inode);
1242 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1249 } 1243 }
1250 1244
1251out: 1245out:
@@ -1353,8 +1347,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1353 if (!inode_owner_or_capable(inode)) 1347 if (!inode_owner_or_capable(inode))
1354 return -EACCES; 1348 return -EACCES;
1355 1349
1356 f2fs_balance_fs(F2FS_I_SB(inode));
1357
1358 if (f2fs_is_atomic_file(inode)) 1350 if (f2fs_is_atomic_file(inode))
1359 return 0; 1351 return 0;
1360 1352
@@ -1363,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1363 return ret; 1355 return ret;
1364 1356
1365 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1357 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1358 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1359
1366 return 0; 1360 return 0;
1367} 1361}
1368 1362
@@ -1384,8 +1378,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1384 if (f2fs_is_atomic_file(inode)) { 1378 if (f2fs_is_atomic_file(inode)) {
1385 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1379 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1386 ret = commit_inmem_pages(inode, false); 1380 ret = commit_inmem_pages(inode, false);
1387 if (ret) 1381 if (ret) {
1382 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1388 goto err_out; 1383 goto err_out;
1384 }
1389 } 1385 }
1390 1386
1391 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1387 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
@@ -1410,6 +1406,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
1410 return ret; 1406 return ret;
1411 1407
1412 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1408 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1409 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1413 return 0; 1410 return 0;
1414} 1411}
1415 1412
@@ -1441,13 +1438,17 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1441 if (ret) 1438 if (ret)
1442 return ret; 1439 return ret;
1443 1440
1444 f2fs_balance_fs(F2FS_I_SB(inode)); 1441 if (f2fs_is_atomic_file(inode)) {
1445 1442 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1446 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1443 commit_inmem_pages(inode, true);
1447 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1444 }
1448 commit_inmem_pages(inode, true); 1445 if (f2fs_is_volatile_file(inode)) {
1446 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1447 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
1448 }
1449 1449
1450 mnt_drop_write_file(filp); 1450 mnt_drop_write_file(filp);
1451 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1451 return ret; 1452 return ret;
1452} 1453}
1453 1454
@@ -1487,6 +1488,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1487 default: 1488 default:
1488 return -EINVAL; 1489 return -EINVAL;
1489 } 1490 }
1491 f2fs_update_time(sbi, REQ_TIME);
1490 return 0; 1492 return 0;
1491} 1493}
1492 1494
@@ -1517,6 +1519,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1517 if (copy_to_user((struct fstrim_range __user *)arg, &range, 1519 if (copy_to_user((struct fstrim_range __user *)arg, &range,
1518 sizeof(range))) 1520 sizeof(range)))
1519 return -EFAULT; 1521 return -EFAULT;
1522 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1520 return 0; 1523 return 0;
1521} 1524}
1522 1525
@@ -1540,6 +1543,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
1540 sizeof(policy))) 1543 sizeof(policy)))
1541 return -EFAULT; 1544 return -EFAULT;
1542 1545
1546 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1543 return f2fs_process_policy(&policy, inode); 1547 return f2fs_process_policy(&policy, inode);
1544#else 1548#else
1545 return -EOPNOTSUPP; 1549 return -EOPNOTSUPP;
@@ -1586,13 +1590,13 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
1586 generate_random_uuid(sbi->raw_super->encrypt_pw_salt); 1590 generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
1587 1591
1588 err = f2fs_commit_super(sbi, false); 1592 err = f2fs_commit_super(sbi, false);
1589
1590 mnt_drop_write_file(filp);
1591 if (err) { 1593 if (err) {
1592 /* undo new data */ 1594 /* undo new data */
1593 memset(sbi->raw_super->encrypt_pw_salt, 0, 16); 1595 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
1596 mnt_drop_write_file(filp);
1594 return err; 1597 return err;
1595 } 1598 }
1599 mnt_drop_write_file(filp);
1596got_it: 1600got_it:
1597 if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 1601 if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
1598 16)) 1602 16))
@@ -1629,7 +1633,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
1629{ 1633{
1630 struct inode *inode = file_inode(filp); 1634 struct inode *inode = file_inode(filp);
1631 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1635 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1632 struct cp_control cpc;
1633 1636
1634 if (!capable(CAP_SYS_ADMIN)) 1637 if (!capable(CAP_SYS_ADMIN))
1635 return -EPERM; 1638 return -EPERM;
@@ -1637,13 +1640,196 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
1637 if (f2fs_readonly(sbi->sb)) 1640 if (f2fs_readonly(sbi->sb))
1638 return -EROFS; 1641 return -EROFS;
1639 1642
1640 cpc.reason = __get_cp_reason(sbi); 1643 return f2fs_sync_fs(sbi->sb, 1);
1644}
1641 1645
1642 mutex_lock(&sbi->gc_mutex); 1646static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
1643 write_checkpoint(sbi, &cpc); 1647 struct file *filp,
1644 mutex_unlock(&sbi->gc_mutex); 1648 struct f2fs_defragment *range)
1649{
1650 struct inode *inode = file_inode(filp);
1651 struct f2fs_map_blocks map;
1652 struct extent_info ei;
1653 pgoff_t pg_start, pg_end;
1654 unsigned int blk_per_seg = sbi->blocks_per_seg;
1655 unsigned int total = 0, sec_num;
1656 unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
1657 block_t blk_end = 0;
1658 bool fragmented = false;
1659 int err;
1645 1660
1646 return 0; 1661 /* if in-place-update policy is enabled, don't waste time here */
1662 if (need_inplace_update(inode))
1663 return -EINVAL;
1664
1665 pg_start = range->start >> PAGE_CACHE_SHIFT;
1666 pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
1667
1668 f2fs_balance_fs(sbi, true);
1669
1670 mutex_lock(&inode->i_mutex);
1671
1672 /* writeback all dirty pages in the range */
1673 err = filemap_write_and_wait_range(inode->i_mapping, range->start,
1674 range->start + range->len - 1);
1675 if (err)
1676 goto out;
1677
1678 /*
1679 * lookup mapping info in extent cache, skip defragmenting if physical
1680 * block addresses are continuous.
1681 */
1682 if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
1683 if (ei.fofs + ei.len >= pg_end)
1684 goto out;
1685 }
1686
1687 map.m_lblk = pg_start;
1688
1689 /*
1690 * lookup mapping info in dnode page cache, skip defragmenting if all
1691 * physical block addresses are continuous even if there are hole(s)
1692 * in logical blocks.
1693 */
1694 while (map.m_lblk < pg_end) {
1695 map.m_len = pg_end - map.m_lblk;
1696 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
1697 if (err)
1698 goto out;
1699
1700 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
1701 map.m_lblk++;
1702 continue;
1703 }
1704
1705 if (blk_end && blk_end != map.m_pblk) {
1706 fragmented = true;
1707 break;
1708 }
1709 blk_end = map.m_pblk + map.m_len;
1710
1711 map.m_lblk += map.m_len;
1712 }
1713
1714 if (!fragmented)
1715 goto out;
1716
1717 map.m_lblk = pg_start;
1718 map.m_len = pg_end - pg_start;
1719
1720 sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
1721
1722 /*
1723 * make sure there are enough free section for LFS allocation, this can
1724 * avoid defragment running in SSR mode when free section are allocated
1725 * intensively
1726 */
1727 if (has_not_enough_free_secs(sbi, sec_num)) {
1728 err = -EAGAIN;
1729 goto out;
1730 }
1731
1732 while (map.m_lblk < pg_end) {
1733 pgoff_t idx;
1734 int cnt = 0;
1735
1736do_map:
1737 map.m_len = pg_end - map.m_lblk;
1738 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
1739 if (err)
1740 goto clear_out;
1741
1742 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
1743 map.m_lblk++;
1744 continue;
1745 }
1746
1747 set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
1748
1749 idx = map.m_lblk;
1750 while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
1751 struct page *page;
1752
1753 page = get_lock_data_page(inode, idx, true);
1754 if (IS_ERR(page)) {
1755 err = PTR_ERR(page);
1756 goto clear_out;
1757 }
1758
1759 set_page_dirty(page);
1760 f2fs_put_page(page, 1);
1761
1762 idx++;
1763 cnt++;
1764 total++;
1765 }
1766
1767 map.m_lblk = idx;
1768
1769 if (idx < pg_end && cnt < blk_per_seg)
1770 goto do_map;
1771
1772 clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
1773
1774 err = filemap_fdatawrite(inode->i_mapping);
1775 if (err)
1776 goto out;
1777 }
1778clear_out:
1779 clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
1780out:
1781 mutex_unlock(&inode->i_mutex);
1782 if (!err)
1783 range->len = (u64)total << PAGE_CACHE_SHIFT;
1784 return err;
1785}
1786
1787static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
1788{
1789 struct inode *inode = file_inode(filp);
1790 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1791 struct f2fs_defragment range;
1792 int err;
1793
1794 if (!capable(CAP_SYS_ADMIN))
1795 return -EPERM;
1796
1797 if (!S_ISREG(inode->i_mode))
1798 return -EINVAL;
1799
1800 err = mnt_want_write_file(filp);
1801 if (err)
1802 return err;
1803
1804 if (f2fs_readonly(sbi->sb)) {
1805 err = -EROFS;
1806 goto out;
1807 }
1808
1809 if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
1810 sizeof(range))) {
1811 err = -EFAULT;
1812 goto out;
1813 }
1814
1815 /* verify alignment of offset & size */
1816 if (range.start & (F2FS_BLKSIZE - 1) ||
1817 range.len & (F2FS_BLKSIZE - 1)) {
1818 err = -EINVAL;
1819 goto out;
1820 }
1821
1822 err = f2fs_defragment_range(sbi, filp, &range);
1823 f2fs_update_time(sbi, REQ_TIME);
1824 if (err < 0)
1825 goto out;
1826
1827 if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
1828 sizeof(range)))
1829 err = -EFAULT;
1830out:
1831 mnt_drop_write_file(filp);
1832 return err;
1647} 1833}
1648 1834
1649long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 1835long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -1679,6 +1865,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1679 return f2fs_ioc_gc(filp, arg); 1865 return f2fs_ioc_gc(filp, arg);
1680 case F2FS_IOC_WRITE_CHECKPOINT: 1866 case F2FS_IOC_WRITE_CHECKPOINT:
1681 return f2fs_ioc_write_checkpoint(filp, arg); 1867 return f2fs_ioc_write_checkpoint(filp, arg);
1868 case F2FS_IOC_DEFRAGMENT:
1869 return f2fs_ioc_defragment(filp, arg);
1682 default: 1870 default:
1683 return -ENOTTY; 1871 return -ENOTTY;
1684 } 1872 }
@@ -1706,6 +1894,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1706 case F2FS_IOC32_SETFLAGS: 1894 case F2FS_IOC32_SETFLAGS:
1707 cmd = F2FS_IOC_SETFLAGS; 1895 cmd = F2FS_IOC_SETFLAGS;
1708 break; 1896 break;
1897 case F2FS_IOC32_GETVERSION:
1898 cmd = F2FS_IOC_GETVERSION;
1899 break;
1900 case F2FS_IOC_START_ATOMIC_WRITE:
1901 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
1902 case F2FS_IOC_START_VOLATILE_WRITE:
1903 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
1904 case F2FS_IOC_ABORT_VOLATILE_WRITE:
1905 case F2FS_IOC_SHUTDOWN:
1906 case F2FS_IOC_SET_ENCRYPTION_POLICY:
1907 case F2FS_IOC_GET_ENCRYPTION_PWSALT:
1908 case F2FS_IOC_GET_ENCRYPTION_POLICY:
1909 case F2FS_IOC_GARBAGE_COLLECT:
1910 case F2FS_IOC_WRITE_CHECKPOINT:
1911 case F2FS_IOC_DEFRAGMENT:
1912 break;
1709 default: 1913 default:
1710 return -ENOIOCTLCMD; 1914 return -ENOIOCTLCMD;
1711 } 1915 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index fedbf67a0842..f610c2a9bdde 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -16,7 +16,6 @@
16#include <linux/kthread.h> 16#include <linux/kthread.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/blkdev.h>
20 19
21#include "f2fs.h" 20#include "f2fs.h"
22#include "node.h" 21#include "node.h"
@@ -173,9 +172,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
173{ 172{
174 /* SSR allocates in a segment unit */ 173 /* SSR allocates in a segment unit */
175 if (p->alloc_mode == SSR) 174 if (p->alloc_mode == SSR)
176 return 1 << sbi->log_blocks_per_seg; 175 return sbi->blocks_per_seg;
177 if (p->gc_mode == GC_GREEDY) 176 if (p->gc_mode == GC_GREEDY)
178 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; 177 return sbi->blocks_per_seg * p->ofs_unit;
179 else if (p->gc_mode == GC_CB) 178 else if (p->gc_mode == GC_CB)
180 return UINT_MAX; 179 return UINT_MAX;
181 else /* No other gc_mode */ 180 else /* No other gc_mode */
@@ -832,8 +831,10 @@ gc_more:
832 831
833 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 832 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
834 goto stop; 833 goto stop;
835 if (unlikely(f2fs_cp_error(sbi))) 834 if (unlikely(f2fs_cp_error(sbi))) {
835 ret = -EIO;
836 goto stop; 836 goto stop;
837 }
837 838
838 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { 839 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
839 gc_type = FG_GC; 840 gc_type = FG_GC;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b4a65be9f7d3..a993967dcdb9 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
100 return true; 100 return true;
101 return false; 101 return false;
102} 102}
103
104static inline int is_idle(struct f2fs_sb_info *sbi)
105{
106 struct block_device *bdev = sbi->sb->s_bdev;
107 struct request_queue *q = bdev_get_queue(bdev);
108 struct request_list *rl = &q->root_rl;
109 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
110}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bda7126466c0..c3f0b7d4cfca 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -16,9 +16,6 @@
16 16
17bool f2fs_may_inline_data(struct inode *inode) 17bool f2fs_may_inline_data(struct inode *inode)
18{ 18{
19 if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
20 return false;
21
22 if (f2fs_is_atomic_file(inode)) 19 if (f2fs_is_atomic_file(inode))
23 return false; 20 return false;
24 21
@@ -177,6 +174,9 @@ int f2fs_convert_inline_inode(struct inode *inode)
177 struct page *ipage, *page; 174 struct page *ipage, *page;
178 int err = 0; 175 int err = 0;
179 176
177 if (!f2fs_has_inline_data(inode))
178 return 0;
179
180 page = grab_cache_page(inode->i_mapping, 0); 180 page = grab_cache_page(inode->i_mapping, 0);
181 if (!page) 181 if (!page)
182 return -ENOMEM; 182 return -ENOMEM;
@@ -199,6 +199,9 @@ out:
199 f2fs_unlock_op(sbi); 199 f2fs_unlock_op(sbi);
200 200
201 f2fs_put_page(page, 1); 201 f2fs_put_page(page, 1);
202
203 f2fs_balance_fs(sbi, dn.node_changed);
204
202 return err; 205 return err;
203} 206}
204 207
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 5528801a5baf..2adeff26be11 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode)
138 fi->i_pino = le32_to_cpu(ri->i_pino); 138 fi->i_pino = le32_to_cpu(ri->i_pino);
139 fi->i_dir_level = ri->i_dir_level; 139 fi->i_dir_level = ri->i_dir_level;
140 140
141 f2fs_init_extent_tree(inode, &ri->i_ext); 141 if (f2fs_init_extent_tree(inode, &ri->i_ext))
142 set_page_dirty(node_page);
142 143
143 get_inline_info(fi, ri); 144 get_inline_info(fi, ri);
144 145
@@ -222,7 +223,7 @@ bad_inode:
222 return ERR_PTR(ret); 223 return ERR_PTR(ret);
223} 224}
224 225
225void update_inode(struct inode *inode, struct page *node_page) 226int update_inode(struct inode *inode, struct page *node_page)
226{ 227{
227 struct f2fs_inode *ri; 228 struct f2fs_inode *ri;
228 229
@@ -260,15 +261,16 @@ void update_inode(struct inode *inode, struct page *node_page)
260 261
261 __set_inode_rdev(inode, ri); 262 __set_inode_rdev(inode, ri);
262 set_cold_node(inode, node_page); 263 set_cold_node(inode, node_page);
263 set_page_dirty(node_page);
264
265 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); 264 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
265
266 return set_page_dirty(node_page);
266} 267}
267 268
268void update_inode_page(struct inode *inode) 269int update_inode_page(struct inode *inode)
269{ 270{
270 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 271 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
271 struct page *node_page; 272 struct page *node_page;
273 int ret = 0;
272retry: 274retry:
273 node_page = get_node_page(sbi, inode->i_ino); 275 node_page = get_node_page(sbi, inode->i_ino);
274 if (IS_ERR(node_page)) { 276 if (IS_ERR(node_page)) {
@@ -279,10 +281,11 @@ retry:
279 } else if (err != -ENOENT) { 281 } else if (err != -ENOENT) {
280 f2fs_stop_checkpoint(sbi); 282 f2fs_stop_checkpoint(sbi);
281 } 283 }
282 return; 284 return 0;
283 } 285 }
284 update_inode(inode, node_page); 286 ret = update_inode(inode, node_page);
285 f2fs_put_page(node_page, 1); 287 f2fs_put_page(node_page, 1);
288 return ret;
286} 289}
287 290
288int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 291int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -300,9 +303,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
300 * We need to balance fs here to prevent from producing dirty node pages 303 * We need to balance fs here to prevent from producing dirty node pages
301 * during the urgent cleaning time when runing out of free sections. 304 * during the urgent cleaning time when runing out of free sections.
302 */ 305 */
303 update_inode_page(inode); 306 if (update_inode_page(inode))
304 307 f2fs_balance_fs(sbi, true);
305 f2fs_balance_fs(sbi);
306 return 0; 308 return 0;
307} 309}
308 310
@@ -328,7 +330,7 @@ void f2fs_evict_inode(struct inode *inode)
328 goto out_clear; 330 goto out_clear;
329 331
330 f2fs_bug_on(sbi, get_dirty_pages(inode)); 332 f2fs_bug_on(sbi, get_dirty_pages(inode));
331 remove_dirty_dir_inode(inode); 333 remove_dirty_inode(inode);
332 334
333 f2fs_destroy_extent_tree(inode); 335 f2fs_destroy_extent_tree(inode);
334 336
@@ -358,9 +360,9 @@ no_delete:
358 if (xnid) 360 if (xnid)
359 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 361 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
360 if (is_inode_flag_set(fi, FI_APPEND_WRITE)) 362 if (is_inode_flag_set(fi, FI_APPEND_WRITE))
361 add_dirty_inode(sbi, inode->i_ino, APPEND_INO); 363 add_ino_entry(sbi, inode->i_ino, APPEND_INO);
362 if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) 364 if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
363 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 365 add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
364 if (is_inode_flag_set(fi, FI_FREE_NID)) { 366 if (is_inode_flag_set(fi, FI_FREE_NID)) {
365 if (err && err != -ENOENT) 367 if (err && err != -ENOENT)
366 alloc_nid_done(sbi, inode->i_ino); 368 alloc_nid_done(sbi, inode->i_ino);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e7587fce1b80..6f944e5eb76e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
60 if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) 60 if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
61 f2fs_set_encrypted_inode(inode); 61 f2fs_set_encrypted_inode(inode);
62 62
63 if (f2fs_may_inline_data(inode)) 63 if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
64 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 64 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
65 if (f2fs_may_inline_dentry(inode)) 65 if (f2fs_may_inline_dentry(inode))
66 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); 66 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
@@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
128 nid_t ino = 0; 128 nid_t ino = 0;
129 int err; 129 int err;
130 130
131 f2fs_balance_fs(sbi);
132
133 inode = f2fs_new_inode(dir, mode); 131 inode = f2fs_new_inode(dir, mode);
134 if (IS_ERR(inode)) 132 if (IS_ERR(inode))
135 return PTR_ERR(inode); 133 return PTR_ERR(inode);
@@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
142 inode->i_mapping->a_ops = &f2fs_dblock_aops; 140 inode->i_mapping->a_ops = &f2fs_dblock_aops;
143 ino = inode->i_ino; 141 ino = inode->i_ino;
144 142
143 f2fs_balance_fs(sbi, true);
144
145 f2fs_lock_op(sbi); 145 f2fs_lock_op(sbi);
146 err = f2fs_add_link(dentry, inode); 146 err = f2fs_add_link(dentry, inode);
147 if (err) 147 if (err)
@@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
172 !f2fs_is_child_context_consistent_with_parent(dir, inode)) 172 !f2fs_is_child_context_consistent_with_parent(dir, inode))
173 return -EPERM; 173 return -EPERM;
174 174
175 f2fs_balance_fs(sbi); 175 f2fs_balance_fs(sbi, true);
176 176
177 inode->i_ctime = CURRENT_TIME; 177 inode->i_ctime = CURRENT_TIME;
178 ihold(inode); 178 ihold(inode);
@@ -214,6 +214,15 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
214 struct page *page; 214 struct page *page;
215 int err = 0; 215 int err = 0;
216 216
217 if (f2fs_readonly(sbi->sb)) {
218 f2fs_msg(sbi->sb, KERN_INFO,
219 "skip recovering inline_dots inode (ino:%lu, pino:%u) "
220 "in readonly mountpoint", dir->i_ino, pino);
221 return 0;
222 }
223
224 f2fs_balance_fs(sbi, true);
225
217 f2fs_lock_op(sbi); 226 f2fs_lock_op(sbi);
218 227
219 de = f2fs_find_entry(dir, &dot, &page); 228 de = f2fs_find_entry(dir, &dot, &page);
@@ -288,12 +297,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
288 int err = -ENOENT; 297 int err = -ENOENT;
289 298
290 trace_f2fs_unlink_enter(dir, dentry); 299 trace_f2fs_unlink_enter(dir, dentry);
291 f2fs_balance_fs(sbi);
292 300
293 de = f2fs_find_entry(dir, &dentry->d_name, &page); 301 de = f2fs_find_entry(dir, &dentry->d_name, &page);
294 if (!de) 302 if (!de)
295 goto fail; 303 goto fail;
296 304
305 f2fs_balance_fs(sbi, true);
306
297 f2fs_lock_op(sbi); 307 f2fs_lock_op(sbi);
298 err = acquire_orphan_inode(sbi); 308 err = acquire_orphan_inode(sbi);
299 if (err) { 309 if (err) {
@@ -344,8 +354,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
344 if (len > dir->i_sb->s_blocksize) 354 if (len > dir->i_sb->s_blocksize)
345 return -ENAMETOOLONG; 355 return -ENAMETOOLONG;
346 356
347 f2fs_balance_fs(sbi);
348
349 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); 357 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
350 if (IS_ERR(inode)) 358 if (IS_ERR(inode))
351 return PTR_ERR(inode); 359 return PTR_ERR(inode);
@@ -357,6 +365,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
357 inode_nohighmem(inode); 365 inode_nohighmem(inode);
358 inode->i_mapping->a_ops = &f2fs_dblock_aops; 366 inode->i_mapping->a_ops = &f2fs_dblock_aops;
359 367
368 f2fs_balance_fs(sbi, true);
369
360 f2fs_lock_op(sbi); 370 f2fs_lock_op(sbi);
361 err = f2fs_add_link(dentry, inode); 371 err = f2fs_add_link(dentry, inode);
362 if (err) 372 if (err)
@@ -437,8 +447,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
437 struct inode *inode; 447 struct inode *inode;
438 int err; 448 int err;
439 449
440 f2fs_balance_fs(sbi);
441
442 inode = f2fs_new_inode(dir, S_IFDIR | mode); 450 inode = f2fs_new_inode(dir, S_IFDIR | mode);
443 if (IS_ERR(inode)) 451 if (IS_ERR(inode))
444 return PTR_ERR(inode); 452 return PTR_ERR(inode);
@@ -448,6 +456,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
448 inode->i_mapping->a_ops = &f2fs_dblock_aops; 456 inode->i_mapping->a_ops = &f2fs_dblock_aops;
449 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); 457 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
450 458
459 f2fs_balance_fs(sbi, true);
460
451 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 461 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
452 f2fs_lock_op(sbi); 462 f2fs_lock_op(sbi);
453 err = f2fs_add_link(dentry, inode); 463 err = f2fs_add_link(dentry, inode);
@@ -485,8 +495,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
485 struct inode *inode; 495 struct inode *inode;
486 int err = 0; 496 int err = 0;
487 497
488 f2fs_balance_fs(sbi);
489
490 inode = f2fs_new_inode(dir, mode); 498 inode = f2fs_new_inode(dir, mode);
491 if (IS_ERR(inode)) 499 if (IS_ERR(inode))
492 return PTR_ERR(inode); 500 return PTR_ERR(inode);
@@ -494,6 +502,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
494 init_special_inode(inode, inode->i_mode, rdev); 502 init_special_inode(inode, inode->i_mode, rdev);
495 inode->i_op = &f2fs_special_inode_operations; 503 inode->i_op = &f2fs_special_inode_operations;
496 504
505 f2fs_balance_fs(sbi, true);
506
497 f2fs_lock_op(sbi); 507 f2fs_lock_op(sbi);
498 err = f2fs_add_link(dentry, inode); 508 err = f2fs_add_link(dentry, inode);
499 if (err) 509 if (err)
@@ -520,9 +530,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
520 struct inode *inode; 530 struct inode *inode;
521 int err; 531 int err;
522 532
523 if (!whiteout)
524 f2fs_balance_fs(sbi);
525
526 inode = f2fs_new_inode(dir, mode); 533 inode = f2fs_new_inode(dir, mode);
527 if (IS_ERR(inode)) 534 if (IS_ERR(inode))
528 return PTR_ERR(inode); 535 return PTR_ERR(inode);
@@ -536,6 +543,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
536 inode->i_mapping->a_ops = &f2fs_dblock_aops; 543 inode->i_mapping->a_ops = &f2fs_dblock_aops;
537 } 544 }
538 545
546 f2fs_balance_fs(sbi, true);
547
539 f2fs_lock_op(sbi); 548 f2fs_lock_op(sbi);
540 err = acquire_orphan_inode(sbi); 549 err = acquire_orphan_inode(sbi);
541 if (err) 550 if (err)
@@ -608,8 +617,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
608 goto out; 617 goto out;
609 } 618 }
610 619
611 f2fs_balance_fs(sbi);
612
613 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 620 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
614 if (!old_entry) 621 if (!old_entry)
615 goto out; 622 goto out;
@@ -639,6 +646,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
639 if (!new_entry) 646 if (!new_entry)
640 goto out_whiteout; 647 goto out_whiteout;
641 648
649 f2fs_balance_fs(sbi, true);
650
642 f2fs_lock_op(sbi); 651 f2fs_lock_op(sbi);
643 652
644 err = acquire_orphan_inode(sbi); 653 err = acquire_orphan_inode(sbi);
@@ -670,6 +679,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
670 update_inode_page(old_inode); 679 update_inode_page(old_inode);
671 update_inode_page(new_inode); 680 update_inode_page(new_inode);
672 } else { 681 } else {
682 f2fs_balance_fs(sbi, true);
683
673 f2fs_lock_op(sbi); 684 f2fs_lock_op(sbi);
674 685
675 err = f2fs_add_link(new_dentry, old_inode); 686 err = f2fs_add_link(new_dentry, old_inode);
@@ -767,8 +778,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
767 new_inode))) 778 new_inode)))
768 return -EPERM; 779 return -EPERM;
769 780
770 f2fs_balance_fs(sbi);
771
772 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 781 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
773 if (!old_entry) 782 if (!old_entry)
774 goto out; 783 goto out;
@@ -811,6 +820,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
811 goto out_new_dir; 820 goto out_new_dir;
812 } 821 }
813 822
823 f2fs_balance_fs(sbi, true);
824
814 f2fs_lock_op(sbi); 825 f2fs_lock_op(sbi);
815 826
816 err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); 827 err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
@@ -933,7 +944,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
933{ 944{
934 struct page *cpage = NULL; 945 struct page *cpage = NULL;
935 char *caddr, *paddr = NULL; 946 char *caddr, *paddr = NULL;
936 struct f2fs_str cstr; 947 struct f2fs_str cstr = FSTR_INIT(NULL, 0);
937 struct f2fs_str pstr = FSTR_INIT(NULL, 0); 948 struct f2fs_str pstr = FSTR_INIT(NULL, 0);
938 struct f2fs_encrypted_symlink_data *sd; 949 struct f2fs_encrypted_symlink_data *sd;
939 loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); 950 loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
@@ -956,6 +967,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
956 /* Symlink is encrypted */ 967 /* Symlink is encrypted */
957 sd = (struct f2fs_encrypted_symlink_data *)caddr; 968 sd = (struct f2fs_encrypted_symlink_data *)caddr;
958 cstr.len = le16_to_cpu(sd->len); 969 cstr.len = le16_to_cpu(sd->len);
970
971 /* this is broken symlink case */
972 if (unlikely(cstr.len == 0)) {
973 res = -ENOENT;
974 goto errout;
975 }
959 cstr.name = kmalloc(cstr.len, GFP_NOFS); 976 cstr.name = kmalloc(cstr.len, GFP_NOFS);
960 if (!cstr.name) { 977 if (!cstr.name) {
961 res = -ENOMEM; 978 res = -ENOMEM;
@@ -964,7 +981,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
964 memcpy(cstr.name, sd->encrypted_path, cstr.len); 981 memcpy(cstr.name, sd->encrypted_path, cstr.len);
965 982
966 /* this is broken symlink case */ 983 /* this is broken symlink case */
967 if (cstr.name[0] == 0 && cstr.len == 0) { 984 if (unlikely(cstr.name[0] == 0)) {
968 res = -ENOENT; 985 res = -ENOENT;
969 goto errout; 986 goto errout;
970 } 987 }
@@ -1005,10 +1022,12 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
1005 .get_link = f2fs_encrypted_get_link, 1022 .get_link = f2fs_encrypted_get_link,
1006 .getattr = f2fs_getattr, 1023 .getattr = f2fs_getattr,
1007 .setattr = f2fs_setattr, 1024 .setattr = f2fs_setattr,
1025#ifdef CONFIG_F2FS_FS_XATTR
1008 .setxattr = generic_setxattr, 1026 .setxattr = generic_setxattr,
1009 .getxattr = generic_getxattr, 1027 .getxattr = generic_getxattr,
1010 .listxattr = f2fs_listxattr, 1028 .listxattr = f2fs_listxattr,
1011 .removexattr = generic_removexattr, 1029 .removexattr = generic_removexattr,
1030#endif
1012}; 1031};
1013#endif 1032#endif
1014 1033
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 7bcbc6e9c40d..342597a5897f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -65,13 +65,14 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
65 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; 65 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
67 } else if (type == EXTENT_CACHE) { 67 } else if (type == EXTENT_CACHE) {
68 mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + 68 mem_size = (atomic_read(&sbi->total_ext_tree) *
69 sizeof(struct extent_tree) +
69 atomic_read(&sbi->total_ext_node) * 70 atomic_read(&sbi->total_ext_node) *
70 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; 71 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
71 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 72 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
72 } else { 73 } else {
73 if (sbi->sb->s_bdi->wb.dirty_exceeded) 74 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
74 return false; 75 return true;
75 } 76 }
76 return res; 77 return res;
77} 78}
@@ -261,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
261{ 262{
262 struct nat_entry *e; 263 struct nat_entry *e;
263 264
264 down_write(&nm_i->nat_tree_lock);
265 e = __lookup_nat_cache(nm_i, nid); 265 e = __lookup_nat_cache(nm_i, nid);
266 if (!e) { 266 if (!e) {
267 e = grab_nat_entry(nm_i, nid); 267 e = grab_nat_entry(nm_i, nid);
268 node_info_from_raw_nat(&e->ni, ne); 268 node_info_from_raw_nat(&e->ni, ne);
269 } 269 }
270 up_write(&nm_i->nat_tree_lock);
271} 270}
272 271
273static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 272static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -379,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
379 378
380 memset(&ne, 0, sizeof(struct f2fs_nat_entry)); 379 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
381 380
381 down_write(&nm_i->nat_tree_lock);
382
382 /* Check current segment summary */ 383 /* Check current segment summary */
383 mutex_lock(&curseg->curseg_mutex); 384 mutex_lock(&curseg->curseg_mutex);
384 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); 385 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -399,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
399cache: 400cache:
400 /* cache nat entry */ 401 /* cache nat entry */
401 cache_nat_entry(NM_I(sbi), nid, &ne); 402 cache_nat_entry(NM_I(sbi), nid, &ne);
403 up_write(&nm_i->nat_tree_lock);
402} 404}
403 405
404/* 406/*
@@ -676,7 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
676 ret = truncate_dnode(&rdn); 678 ret = truncate_dnode(&rdn);
677 if (ret < 0) 679 if (ret < 0)
678 goto out_err; 680 goto out_err;
679 set_nid(page, i, 0, false); 681 if (set_nid(page, i, 0, false))
682 dn->node_changed = true;
680 } 683 }
681 } else { 684 } else {
682 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1; 685 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
@@ -689,7 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
689 rdn.nid = child_nid; 692 rdn.nid = child_nid;
690 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); 693 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
691 if (ret == (NIDS_PER_BLOCK + 1)) { 694 if (ret == (NIDS_PER_BLOCK + 1)) {
692 set_nid(page, i, 0, false); 695 if (set_nid(page, i, 0, false))
696 dn->node_changed = true;
693 child_nofs += ret; 697 child_nofs += ret;
694 } else if (ret < 0 && ret != -ENOENT) { 698 } else if (ret < 0 && ret != -ENOENT) {
695 goto out_err; 699 goto out_err;
@@ -750,7 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
750 err = truncate_dnode(dn); 754 err = truncate_dnode(dn);
751 if (err < 0) 755 if (err < 0)
752 goto fail; 756 goto fail;
753 set_nid(pages[idx], i, 0, false); 757 if (set_nid(pages[idx], i, 0, false))
758 dn->node_changed = true;
754 } 759 }
755 760
756 if (offset[idx + 1] == 0) { 761 if (offset[idx + 1] == 0) {
@@ -975,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
975 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 980 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
976 set_cold_node(dn->inode, page); 981 set_cold_node(dn->inode, page);
977 SetPageUptodate(page); 982 SetPageUptodate(page);
978 set_page_dirty(page); 983 if (set_page_dirty(page))
984 dn->node_changed = true;
979 985
980 if (f2fs_has_xattr_block(ofs)) 986 if (f2fs_has_xattr_block(ofs))
981 F2FS_I(dn->inode)->i_xattr_nid = dn->nid; 987 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
@@ -1035,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1035 struct page *apage; 1041 struct page *apage;
1036 int err; 1042 int err;
1037 1043
1044 if (!nid)
1045 return;
1046 f2fs_bug_on(sbi, check_nid_range(sbi, nid));
1047
1038 apage = find_get_page(NODE_MAPPING(sbi), nid); 1048 apage = find_get_page(NODE_MAPPING(sbi), nid);
1039 if (apage && PageUptodate(apage)) { 1049 if (apage && PageUptodate(apage)) {
1040 f2fs_put_page(apage, 0); 1050 f2fs_put_page(apage, 0);
@@ -1050,51 +1060,38 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1050 f2fs_put_page(apage, err ? 1 : 0); 1060 f2fs_put_page(apage, err ? 1 : 0);
1051} 1061}
1052 1062
1053struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 1063/*
1064 * readahead MAX_RA_NODE number of node pages.
1065 */
1066void ra_node_pages(struct page *parent, int start)
1054{ 1067{
1055 struct page *page; 1068 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1056 int err; 1069 struct blk_plug plug;
1057repeat: 1070 int i, end;
1058 page = grab_cache_page(NODE_MAPPING(sbi), nid); 1071 nid_t nid;
1059 if (!page)
1060 return ERR_PTR(-ENOMEM);
1061 1072
1062 err = read_node_page(page, READ_SYNC); 1073 blk_start_plug(&plug);
1063 if (err < 0) {
1064 f2fs_put_page(page, 1);
1065 return ERR_PTR(err);
1066 } else if (err != LOCKED_PAGE) {
1067 lock_page(page);
1068 }
1069 1074
1070 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { 1075 /* Then, try readahead for siblings of the desired node */
1071 ClearPageUptodate(page); 1076 end = start + MAX_RA_NODE;
1072 f2fs_put_page(page, 1); 1077 end = min(end, NIDS_PER_BLOCK);
1073 return ERR_PTR(-EIO); 1078 for (i = start; i < end; i++) {
1074 } 1079 nid = get_nid(parent, i, false);
1075 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1080 ra_node_page(sbi, nid);
1076 f2fs_put_page(page, 1);
1077 goto repeat;
1078 } 1081 }
1079 return page; 1082
1083 blk_finish_plug(&plug);
1080} 1084}
1081 1085
1082/* 1086struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1083 * Return a locked page for the desired node page. 1087 struct page *parent, int start)
1084 * And, readahead MAX_RA_NODE number of node pages.
1085 */
1086struct page *get_node_page_ra(struct page *parent, int start)
1087{ 1088{
1088 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1089 struct blk_plug plug;
1090 struct page *page; 1089 struct page *page;
1091 int err, i, end; 1090 int err;
1092 nid_t nid;
1093 1091
1094 /* First, try getting the desired direct node. */
1095 nid = get_nid(parent, start, false);
1096 if (!nid) 1092 if (!nid)
1097 return ERR_PTR(-ENOENT); 1093 return ERR_PTR(-ENOENT);
1094 f2fs_bug_on(sbi, check_nid_range(sbi, nid));
1098repeat: 1095repeat:
1099 page = grab_cache_page(NODE_MAPPING(sbi), nid); 1096 page = grab_cache_page(NODE_MAPPING(sbi), nid);
1100 if (!page) 1097 if (!page)
@@ -1108,46 +1105,53 @@ repeat:
1108 goto page_hit; 1105 goto page_hit;
1109 } 1106 }
1110 1107
1111 blk_start_plug(&plug); 1108 if (parent)
1112 1109 ra_node_pages(parent, start + 1);
1113 /* Then, try readahead for siblings of the desired node */
1114 end = start + MAX_RA_NODE;
1115 end = min(end, NIDS_PER_BLOCK);
1116 for (i = start + 1; i < end; i++) {
1117 nid = get_nid(parent, i, false);
1118 if (!nid)
1119 continue;
1120 ra_node_page(sbi, nid);
1121 }
1122
1123 blk_finish_plug(&plug);
1124 1110
1125 lock_page(page); 1111 lock_page(page);
1112
1113 if (unlikely(!PageUptodate(page))) {
1114 f2fs_put_page(page, 1);
1115 return ERR_PTR(-EIO);
1116 }
1126 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1117 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1127 f2fs_put_page(page, 1); 1118 f2fs_put_page(page, 1);
1128 goto repeat; 1119 goto repeat;
1129 } 1120 }
1130page_hit: 1121page_hit:
1131 if (unlikely(!PageUptodate(page))) { 1122 f2fs_bug_on(sbi, nid != nid_of_node(page));
1132 f2fs_put_page(page, 1);
1133 return ERR_PTR(-EIO);
1134 }
1135 return page; 1123 return page;
1136} 1124}
1137 1125
1126struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1127{
1128 return __get_node_page(sbi, nid, NULL, 0);
1129}
1130
1131struct page *get_node_page_ra(struct page *parent, int start)
1132{
1133 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1134 nid_t nid = get_nid(parent, start, false);
1135
1136 return __get_node_page(sbi, nid, parent, start);
1137}
1138
1138void sync_inode_page(struct dnode_of_data *dn) 1139void sync_inode_page(struct dnode_of_data *dn)
1139{ 1140{
1141 int ret = 0;
1142
1140 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { 1143 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
1141 update_inode(dn->inode, dn->node_page); 1144 ret = update_inode(dn->inode, dn->node_page);
1142 } else if (dn->inode_page) { 1145 } else if (dn->inode_page) {
1143 if (!dn->inode_page_locked) 1146 if (!dn->inode_page_locked)
1144 lock_page(dn->inode_page); 1147 lock_page(dn->inode_page);
1145 update_inode(dn->inode, dn->inode_page); 1148 ret = update_inode(dn->inode, dn->inode_page);
1146 if (!dn->inode_page_locked) 1149 if (!dn->inode_page_locked)
1147 unlock_page(dn->inode_page); 1150 unlock_page(dn->inode_page);
1148 } else { 1151 } else {
1149 update_inode_page(dn->inode); 1152 ret = update_inode_page(dn->inode);
1150 } 1153 }
1154 dn->node_changed = ret ? true: false;
1151} 1155}
1152 1156
1153int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1157int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
@@ -1175,6 +1179,11 @@ next_step:
1175 for (i = 0; i < nr_pages; i++) { 1179 for (i = 0; i < nr_pages; i++) {
1176 struct page *page = pvec.pages[i]; 1180 struct page *page = pvec.pages[i];
1177 1181
1182 if (unlikely(f2fs_cp_error(sbi))) {
1183 pagevec_release(&pvec);
1184 return -EIO;
1185 }
1186
1178 /* 1187 /*
1179 * flushing sequence with step: 1188 * flushing sequence with step:
1180 * 0. indirect nodes 1189 * 0. indirect nodes
@@ -1349,7 +1358,7 @@ static int f2fs_write_node_page(struct page *page,
1349 up_read(&sbi->node_write); 1358 up_read(&sbi->node_write);
1350 unlock_page(page); 1359 unlock_page(page);
1351 1360
1352 if (wbc->for_reclaim) 1361 if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
1353 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1362 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1354 1363
1355 return 0; 1364 return 0;
@@ -1440,13 +1449,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1440 1449
1441 if (build) { 1450 if (build) {
1442 /* do not add allocated nids */ 1451 /* do not add allocated nids */
1443 down_read(&nm_i->nat_tree_lock);
1444 ne = __lookup_nat_cache(nm_i, nid); 1452 ne = __lookup_nat_cache(nm_i, nid);
1445 if (ne && 1453 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1446 (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1447 nat_get_blkaddr(ne) != NULL_ADDR)) 1454 nat_get_blkaddr(ne) != NULL_ADDR))
1448 allocated = true; 1455 allocated = true;
1449 up_read(&nm_i->nat_tree_lock);
1450 if (allocated) 1456 if (allocated)
1451 return 0; 1457 return 0;
1452 } 1458 }
@@ -1532,6 +1538,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1532 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, 1538 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
1533 META_NAT, true); 1539 META_NAT, true);
1534 1540
1541 down_read(&nm_i->nat_tree_lock);
1542
1535 while (1) { 1543 while (1) {
1536 struct page *page = get_current_nat_page(sbi, nid); 1544 struct page *page = get_current_nat_page(sbi, nid);
1537 1545
@@ -1560,6 +1568,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1560 remove_free_nid(nm_i, nid); 1568 remove_free_nid(nm_i, nid);
1561 } 1569 }
1562 mutex_unlock(&curseg->curseg_mutex); 1570 mutex_unlock(&curseg->curseg_mutex);
1571 up_read(&nm_i->nat_tree_lock);
1563 1572
1564 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), 1573 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
1565 nm_i->ra_nid_pages, META_NAT, false); 1574 nm_i->ra_nid_pages, META_NAT, false);
@@ -1582,8 +1591,6 @@ retry:
1582 1591
1583 /* We should not use stale free nids created by build_free_nids */ 1592 /* We should not use stale free nids created by build_free_nids */
1584 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1593 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1585 struct node_info ni;
1586
1587 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1594 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1588 list_for_each_entry(i, &nm_i->free_nid_list, list) 1595 list_for_each_entry(i, &nm_i->free_nid_list, list)
1589 if (i->state == NID_NEW) 1596 if (i->state == NID_NEW)
@@ -1594,13 +1601,6 @@ retry:
1594 i->state = NID_ALLOC; 1601 i->state = NID_ALLOC;
1595 nm_i->fcnt--; 1602 nm_i->fcnt--;
1596 spin_unlock(&nm_i->free_nid_list_lock); 1603 spin_unlock(&nm_i->free_nid_list_lock);
1597
1598 /* check nid is allocated already */
1599 get_node_info(sbi, *nid, &ni);
1600 if (ni.blk_addr != NULL_ADDR) {
1601 alloc_nid_done(sbi, *nid);
1602 goto retry;
1603 }
1604 return true; 1604 return true;
1605 } 1605 }
1606 spin_unlock(&nm_i->free_nid_list_lock); 1606 spin_unlock(&nm_i->free_nid_list_lock);
@@ -1842,14 +1842,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1842 1842
1843 raw_ne = nat_in_journal(sum, i); 1843 raw_ne = nat_in_journal(sum, i);
1844 1844
1845 down_write(&nm_i->nat_tree_lock);
1846 ne = __lookup_nat_cache(nm_i, nid); 1845 ne = __lookup_nat_cache(nm_i, nid);
1847 if (!ne) { 1846 if (!ne) {
1848 ne = grab_nat_entry(nm_i, nid); 1847 ne = grab_nat_entry(nm_i, nid);
1849 node_info_from_raw_nat(&ne->ni, &raw_ne); 1848 node_info_from_raw_nat(&ne->ni, &raw_ne);
1850 } 1849 }
1851 __set_nat_cache_dirty(nm_i, ne); 1850 __set_nat_cache_dirty(nm_i, ne);
1852 up_write(&nm_i->nat_tree_lock);
1853 } 1851 }
1854 update_nats_in_cursum(sum, -i); 1852 update_nats_in_cursum(sum, -i);
1855 mutex_unlock(&curseg->curseg_mutex); 1853 mutex_unlock(&curseg->curseg_mutex);
@@ -1883,7 +1881,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1883 struct f2fs_nat_block *nat_blk; 1881 struct f2fs_nat_block *nat_blk;
1884 struct nat_entry *ne, *cur; 1882 struct nat_entry *ne, *cur;
1885 struct page *page = NULL; 1883 struct page *page = NULL;
1886 struct f2fs_nm_info *nm_i = NM_I(sbi);
1887 1884
1888 /* 1885 /*
1889 * there are two steps to flush nat entries: 1886 * there are two steps to flush nat entries:
@@ -1920,12 +1917,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1920 raw_ne = &nat_blk->entries[nid - start_nid]; 1917 raw_ne = &nat_blk->entries[nid - start_nid];
1921 } 1918 }
1922 raw_nat_from_node_info(raw_ne, &ne->ni); 1919 raw_nat_from_node_info(raw_ne, &ne->ni);
1923
1924 down_write(&NM_I(sbi)->nat_tree_lock);
1925 nat_reset_flag(ne); 1920 nat_reset_flag(ne);
1926 __clear_nat_cache_dirty(NM_I(sbi), ne); 1921 __clear_nat_cache_dirty(NM_I(sbi), ne);
1927 up_write(&NM_I(sbi)->nat_tree_lock);
1928
1929 if (nat_get_blkaddr(ne) == NULL_ADDR) 1922 if (nat_get_blkaddr(ne) == NULL_ADDR)
1930 add_free_nid(sbi, nid, false); 1923 add_free_nid(sbi, nid, false);
1931 } 1924 }
@@ -1937,9 +1930,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1937 1930
1938 f2fs_bug_on(sbi, set->entry_cnt); 1931 f2fs_bug_on(sbi, set->entry_cnt);
1939 1932
1940 down_write(&nm_i->nat_tree_lock);
1941 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 1933 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1942 up_write(&nm_i->nat_tree_lock);
1943 kmem_cache_free(nat_entry_set_slab, set); 1934 kmem_cache_free(nat_entry_set_slab, set);
1944} 1935}
1945 1936
@@ -1959,6 +1950,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1959 1950
1960 if (!nm_i->dirty_nat_cnt) 1951 if (!nm_i->dirty_nat_cnt)
1961 return; 1952 return;
1953
1954 down_write(&nm_i->nat_tree_lock);
1955
1962 /* 1956 /*
1963 * if there are no enough space in journal to store dirty nat 1957 * if there are no enough space in journal to store dirty nat
1964 * entries, remove all entries from journal and merge them 1958 * entries, remove all entries from journal and merge them
@@ -1967,7 +1961,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1967 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 1961 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1968 remove_nats_in_journal(sbi); 1962 remove_nats_in_journal(sbi);
1969 1963
1970 down_write(&nm_i->nat_tree_lock);
1971 while ((found = __gang_lookup_nat_set(nm_i, 1964 while ((found = __gang_lookup_nat_set(nm_i,
1972 set_idx, SETVEC_SIZE, setvec))) { 1965 set_idx, SETVEC_SIZE, setvec))) {
1973 unsigned idx; 1966 unsigned idx;
@@ -1976,12 +1969,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1976 __adjust_nat_entry_set(setvec[idx], &sets, 1969 __adjust_nat_entry_set(setvec[idx], &sets,
1977 MAX_NAT_JENTRIES(sum)); 1970 MAX_NAT_JENTRIES(sum));
1978 } 1971 }
1979 up_write(&nm_i->nat_tree_lock);
1980 1972
1981 /* flush dirty nats in nat entry set */ 1973 /* flush dirty nats in nat entry set */
1982 list_for_each_entry_safe(set, tmp, &sets, set_list) 1974 list_for_each_entry_safe(set, tmp, &sets, set_list)
1983 __flush_nat_entry_set(sbi, set); 1975 __flush_nat_entry_set(sbi, set);
1984 1976
1977 up_write(&nm_i->nat_tree_lock);
1978
1985 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 1979 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
1986} 1980}
1987 1981
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e4fffd2d98c4..d4d1f636fe1c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
183 183
184 block_addr = (pgoff_t)(nm_i->nat_blkaddr + 184 block_addr = (pgoff_t)(nm_i->nat_blkaddr +
185 (seg_off << sbi->log_blocks_per_seg << 1) + 185 (seg_off << sbi->log_blocks_per_seg << 1) +
186 (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); 186 (block_off & (sbi->blocks_per_seg - 1)));
187 187
188 if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) 188 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
189 block_addr += sbi->blocks_per_seg; 189 block_addr += sbi->blocks_per_seg;
@@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page)
317 return true; 317 return true;
318} 318}
319 319
320static inline void set_nid(struct page *p, int off, nid_t nid, bool i) 320static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
321{ 321{
322 struct f2fs_node *rn = F2FS_NODE(p); 322 struct f2fs_node *rn = F2FS_NODE(p);
323 323
@@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
327 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); 327 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
328 else 328 else
329 rn->in.nid[off] = cpu_to_le32(nid); 329 rn->in.nid[off] = cpu_to_le32(nid);
330 set_page_dirty(p); 330 return set_page_dirty(p);
331} 331}
332 332
333static inline nid_t get_nid(struct page *p, int off, bool i) 333static inline nid_t get_nid(struct page *p, int off, bool i)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index cbf74f47cce8..589b20b8677b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page)
168 ino_of_node(page), name); 168 ino_of_node(page), name);
169} 169}
170 170
171static bool is_same_inode(struct inode *inode, struct page *ipage)
172{
173 struct f2fs_inode *ri = F2FS_INODE(ipage);
174 struct timespec disk;
175
176 if (!IS_INODE(ipage))
177 return true;
178
179 disk.tv_sec = le64_to_cpu(ri->i_ctime);
180 disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
181 if (timespec_compare(&inode->i_ctime, &disk) > 0)
182 return false;
183
184 disk.tv_sec = le64_to_cpu(ri->i_atime);
185 disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
186 if (timespec_compare(&inode->i_atime, &disk) > 0)
187 return false;
188
189 disk.tv_sec = le64_to_cpu(ri->i_mtime);
190 disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
191 if (timespec_compare(&inode->i_mtime, &disk) > 0)
192 return false;
193
194 return true;
195}
196
171static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) 197static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
172{ 198{
173 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 199 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
197 goto next; 223 goto next;
198 224
199 entry = get_fsync_inode(head, ino_of_node(page)); 225 entry = get_fsync_inode(head, ino_of_node(page));
200 if (!entry) { 226 if (entry) {
227 if (!is_same_inode(entry->inode, page))
228 goto next;
229 } else {
201 if (IS_INODE(page) && is_dent_dnode(page)) { 230 if (IS_INODE(page) && is_dent_dnode(page)) {
202 err = recover_inode_page(sbi, page); 231 err = recover_inode_page(sbi, page);
203 if (err) 232 if (err)
@@ -459,8 +488,7 @@ out:
459 return err; 488 return err;
460} 489}
461 490
462static int recover_data(struct f2fs_sb_info *sbi, 491static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
463 struct list_head *head, int type)
464{ 492{
465 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 493 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
466 struct curseg_info *curseg; 494 struct curseg_info *curseg;
@@ -469,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
469 block_t blkaddr; 497 block_t blkaddr;
470 498
471 /* get node pages in the current segment */ 499 /* get node pages in the current segment */
472 curseg = CURSEG_I(sbi, type); 500 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
473 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 501 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
474 502
475 while (1) { 503 while (1) {
@@ -556,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
556 need_writecp = true; 584 need_writecp = true;
557 585
558 /* step #2: recover data */ 586 /* step #2: recover data */
559 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 587 err = recover_data(sbi, &inode_list);
560 if (!err) 588 if (!err)
561 f2fs_bug_on(sbi, !list_empty(&inode_list)); 589 f2fs_bug_on(sbi, !list_empty(&inode_list));
562out: 590out:
@@ -595,7 +623,7 @@ out:
595 .reason = CP_RECOVERY, 623 .reason = CP_RECOVERY,
596 }; 624 };
597 mutex_unlock(&sbi->cp_mutex); 625 mutex_unlock(&sbi->cp_mutex);
598 write_checkpoint(sbi, &cpc); 626 err = write_checkpoint(sbi, &cpc);
599 } else { 627 } else {
600 mutex_unlock(&sbi->cp_mutex); 628 mutex_unlock(&sbi->cp_mutex);
601 } 629 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f77b3258454a..5904a411c86f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
86/* 86/*
87 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because 87 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
88 * f2fs_set_bit makes MSB and LSB reversed in a byte. 88 * f2fs_set_bit makes MSB and LSB reversed in a byte.
89 * @size must be integral times of unsigned long.
89 * Example: 90 * Example:
90 * MSB <--> LSB 91 * MSB <--> LSB
91 * f2fs_set_bit(0, bitmap) => 1000 0000 92 * f2fs_set_bit(0, bitmap) => 1000 0000
@@ -95,94 +96,73 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
95 unsigned long size, unsigned long offset) 96 unsigned long size, unsigned long offset)
96{ 97{
97 const unsigned long *p = addr + BIT_WORD(offset); 98 const unsigned long *p = addr + BIT_WORD(offset);
98 unsigned long result = offset & ~(BITS_PER_LONG - 1); 99 unsigned long result = size;
99 unsigned long tmp; 100 unsigned long tmp;
100 101
101 if (offset >= size) 102 if (offset >= size)
102 return size; 103 return size;
103 104
104 size -= result; 105 size -= (offset & ~(BITS_PER_LONG - 1));
105 offset %= BITS_PER_LONG; 106 offset %= BITS_PER_LONG;
106 if (!offset) 107
107 goto aligned; 108 while (1) {
108 109 if (*p == 0)
109 tmp = __reverse_ulong((unsigned char *)p); 110 goto pass;
110 tmp &= ~0UL >> offset; 111
111
112 if (size < BITS_PER_LONG)
113 goto found_first;
114 if (tmp)
115 goto found_middle;
116
117 size -= BITS_PER_LONG;
118 result += BITS_PER_LONG;
119 p++;
120aligned:
121 while (size & ~(BITS_PER_LONG-1)) {
122 tmp = __reverse_ulong((unsigned char *)p); 112 tmp = __reverse_ulong((unsigned char *)p);
113
114 tmp &= ~0UL >> offset;
115 if (size < BITS_PER_LONG)
116 tmp &= (~0UL << (BITS_PER_LONG - size));
123 if (tmp) 117 if (tmp)
124 goto found_middle; 118 goto found;
125 result += BITS_PER_LONG; 119pass:
120 if (size <= BITS_PER_LONG)
121 break;
126 size -= BITS_PER_LONG; 122 size -= BITS_PER_LONG;
123 offset = 0;
127 p++; 124 p++;
128 } 125 }
129 if (!size) 126 return result;
130 return result; 127found:
131 128 return result - size + __reverse_ffs(tmp);
132 tmp = __reverse_ulong((unsigned char *)p);
133found_first:
134 tmp &= (~0UL << (BITS_PER_LONG - size));
135 if (!tmp) /* Are any bits set? */
136 return result + size; /* Nope. */
137found_middle:
138 return result + __reverse_ffs(tmp);
139} 129}
140 130
141static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, 131static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
142 unsigned long size, unsigned long offset) 132 unsigned long size, unsigned long offset)
143{ 133{
144 const unsigned long *p = addr + BIT_WORD(offset); 134 const unsigned long *p = addr + BIT_WORD(offset);
145 unsigned long result = offset & ~(BITS_PER_LONG - 1); 135 unsigned long result = size;
146 unsigned long tmp; 136 unsigned long tmp;
147 137
148 if (offset >= size) 138 if (offset >= size)
149 return size; 139 return size;
150 140
151 size -= result; 141 size -= (offset & ~(BITS_PER_LONG - 1));
152 offset %= BITS_PER_LONG; 142 offset %= BITS_PER_LONG;
153 if (!offset) 143
154 goto aligned; 144 while (1) {
155 145 if (*p == ~0UL)
156 tmp = __reverse_ulong((unsigned char *)p); 146 goto pass;
157 tmp |= ~((~0UL << offset) >> offset); 147
158
159 if (size < BITS_PER_LONG)
160 goto found_first;
161 if (tmp != ~0UL)
162 goto found_middle;
163
164 size -= BITS_PER_LONG;
165 result += BITS_PER_LONG;
166 p++;
167aligned:
168 while (size & ~(BITS_PER_LONG - 1)) {
169 tmp = __reverse_ulong((unsigned char *)p); 148 tmp = __reverse_ulong((unsigned char *)p);
149
150 if (offset)
151 tmp |= ~0UL << (BITS_PER_LONG - offset);
152 if (size < BITS_PER_LONG)
153 tmp |= ~0UL >> size;
170 if (tmp != ~0UL) 154 if (tmp != ~0UL)
171 goto found_middle; 155 goto found;
172 result += BITS_PER_LONG; 156pass:
157 if (size <= BITS_PER_LONG)
158 break;
173 size -= BITS_PER_LONG; 159 size -= BITS_PER_LONG;
160 offset = 0;
174 p++; 161 p++;
175 } 162 }
176 if (!size) 163 return result;
177 return result; 164found:
178 165 return result - size + __reverse_ffz(tmp);
179 tmp = __reverse_ulong((unsigned char *)p);
180found_first:
181 tmp |= ~(~0UL << (BITS_PER_LONG - size));
182 if (tmp == ~0UL) /* Are any bits zero? */
183 return result + size; /* Nope. */
184found_middle:
185 return result + __reverse_ffz(tmp);
186} 166}
187 167
188void register_inmem_page(struct inode *inode, struct page *page) 168void register_inmem_page(struct inode *inode, struct page *page)
@@ -233,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
233 * inode becomes free by iget_locked in f2fs_iget. 213 * inode becomes free by iget_locked in f2fs_iget.
234 */ 214 */
235 if (!abort) { 215 if (!abort) {
236 f2fs_balance_fs(sbi); 216 f2fs_balance_fs(sbi, true);
237 f2fs_lock_op(sbi); 217 f2fs_lock_op(sbi);
238 } 218 }
239 219
@@ -257,6 +237,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
257 submit_bio = true; 237 submit_bio = true;
258 } 238 }
259 } else { 239 } else {
240 ClearPageUptodate(cur->page);
260 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); 241 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
261 } 242 }
262 set_page_private(cur->page, 0); 243 set_page_private(cur->page, 0);
@@ -281,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort)
281 * This function balances dirty node and dentry pages. 262 * This function balances dirty node and dentry pages.
282 * In addition, it controls garbage collection. 263 * In addition, it controls garbage collection.
283 */ 264 */
284void f2fs_balance_fs(struct f2fs_sb_info *sbi) 265void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
285{ 266{
267 if (!need)
268 return;
286 /* 269 /*
287 * We should do GC or end up with checkpoint, if there are so many dirty 270 * We should do GC or end up with checkpoint, if there are so many dirty
288 * dir/node pages without enough free segments. 271 * dir/node pages without enough free segments.
@@ -310,8 +293,12 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
310 if (!available_free_memory(sbi, NAT_ENTRIES) || 293 if (!available_free_memory(sbi, NAT_ENTRIES) ||
311 excess_prefree_segs(sbi) || 294 excess_prefree_segs(sbi) ||
312 !available_free_memory(sbi, INO_ENTRIES) || 295 !available_free_memory(sbi, INO_ENTRIES) ||
313 jiffies > sbi->cp_expires) 296 (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
297 if (test_opt(sbi, DATA_FLUSH))
298 sync_dirty_inodes(sbi, FILE_INODE);
314 f2fs_sync_fs(sbi->sb, true); 299 f2fs_sync_fs(sbi->sb, true);
300 stat_inc_bg_cp_count(sbi->stat_info);
301 }
315} 302}
316 303
317static int issue_flush_thread(void *data) 304static int issue_flush_thread(void *data)
@@ -1134,6 +1121,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1134 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; 1121 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1135 unsigned int start_segno, end_segno; 1122 unsigned int start_segno, end_segno;
1136 struct cp_control cpc; 1123 struct cp_control cpc;
1124 int err = 0;
1137 1125
1138 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) 1126 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1139 return -EINVAL; 1127 return -EINVAL;
@@ -1164,12 +1152,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1164 sbi->segs_per_sec) - 1, end_segno); 1152 sbi->segs_per_sec) - 1, end_segno);
1165 1153
1166 mutex_lock(&sbi->gc_mutex); 1154 mutex_lock(&sbi->gc_mutex);
1167 write_checkpoint(sbi, &cpc); 1155 err = write_checkpoint(sbi, &cpc);
1168 mutex_unlock(&sbi->gc_mutex); 1156 mutex_unlock(&sbi->gc_mutex);
1169 } 1157 }
1170out: 1158out:
1171 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); 1159 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1172 return 0; 1160 return err;
1173} 1161}
1174 1162
1175static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 1163static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
@@ -1749,13 +1737,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1749 if (le32_to_cpu(nid_in_journal(sum, i)) == val) 1737 if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1750 return i; 1738 return i;
1751 } 1739 }
1752 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) 1740 if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
1753 return update_nats_in_cursum(sum, 1); 1741 return update_nats_in_cursum(sum, 1);
1754 } else if (type == SIT_JOURNAL) { 1742 } else if (type == SIT_JOURNAL) {
1755 for (i = 0; i < sits_in_cursum(sum); i++) 1743 for (i = 0; i < sits_in_cursum(sum); i++)
1756 if (le32_to_cpu(segno_in_journal(sum, i)) == val) 1744 if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1757 return i; 1745 return i;
1758 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES) 1746 if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
1759 return update_sits_in_cursum(sum, 1); 1747 return update_sits_in_cursum(sum, 1);
1760 } 1748 }
1761 return -1; 1749 return -1;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index da0d8e0b55a5..93606f281bf9 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
32 32
33static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) 33static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
34{ 34{
35 return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node); 35 return atomic_read(&sbi->total_zombie_tree) +
36 atomic_read(&sbi->total_ext_node);
36} 37}
37 38
38unsigned long f2fs_shrink_count(struct shrinker *shrink, 39unsigned long f2fs_shrink_count(struct shrinker *shrink,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3a65e0132352..3bf990b80026 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -67,6 +67,7 @@ enum {
67 Opt_extent_cache, 67 Opt_extent_cache,
68 Opt_noextent_cache, 68 Opt_noextent_cache,
69 Opt_noinline_data, 69 Opt_noinline_data,
70 Opt_data_flush,
70 Opt_err, 71 Opt_err,
71}; 72};
72 73
@@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = {
91 {Opt_extent_cache, "extent_cache"}, 92 {Opt_extent_cache, "extent_cache"},
92 {Opt_noextent_cache, "noextent_cache"}, 93 {Opt_noextent_cache, "noextent_cache"},
93 {Opt_noinline_data, "noinline_data"}, 94 {Opt_noinline_data, "noinline_data"},
95 {Opt_data_flush, "data_flush"},
94 {Opt_err, NULL}, 96 {Opt_err, NULL},
95}; 97};
96 98
@@ -216,7 +218,8 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
216F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); 218F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
217F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 219F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
218F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 220F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
219F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval); 221F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
222F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
220 223
221#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 224#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
222static struct attribute *f2fs_attrs[] = { 225static struct attribute *f2fs_attrs[] = {
@@ -235,6 +238,7 @@ static struct attribute *f2fs_attrs[] = {
235 ATTR_LIST(ram_thresh), 238 ATTR_LIST(ram_thresh),
236 ATTR_LIST(ra_nid_pages), 239 ATTR_LIST(ra_nid_pages),
237 ATTR_LIST(cp_interval), 240 ATTR_LIST(cp_interval),
241 ATTR_LIST(idle_interval),
238 NULL, 242 NULL,
239}; 243};
240 244
@@ -406,6 +410,9 @@ static int parse_options(struct super_block *sb, char *options)
406 case Opt_noinline_data: 410 case Opt_noinline_data:
407 clear_opt(sbi, INLINE_DATA); 411 clear_opt(sbi, INLINE_DATA);
408 break; 412 break;
413 case Opt_data_flush:
414 set_opt(sbi, DATA_FLUSH);
415 break;
409 default: 416 default:
410 f2fs_msg(sb, KERN_ERR, 417 f2fs_msg(sb, KERN_ERR,
411 "Unrecognized mount option \"%s\" or missing value", 418 "Unrecognized mount option \"%s\" or missing value",
@@ -432,6 +439,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
432 fi->i_current_depth = 1; 439 fi->i_current_depth = 1;
433 fi->i_advise = 0; 440 fi->i_advise = 0;
434 init_rwsem(&fi->i_sem); 441 init_rwsem(&fi->i_sem);
442 INIT_LIST_HEAD(&fi->dirty_list);
435 INIT_LIST_HEAD(&fi->inmem_pages); 443 INIT_LIST_HEAD(&fi->inmem_pages);
436 mutex_init(&fi->inmem_lock); 444 mutex_init(&fi->inmem_lock);
437 445
@@ -548,7 +556,7 @@ static void f2fs_put_super(struct super_block *sb)
548 * normally superblock is clean, so we need to release this. 556 * normally superblock is clean, so we need to release this.
549 * In addition, EIO will skip do checkpoint, we need this as well. 557 * In addition, EIO will skip do checkpoint, we need this as well.
550 */ 558 */
551 release_dirty_inode(sbi); 559 release_ino_entry(sbi);
552 release_discard_addrs(sbi); 560 release_discard_addrs(sbi);
553 561
554 f2fs_leave_shrinker(sbi); 562 f2fs_leave_shrinker(sbi);
@@ -566,13 +574,14 @@ static void f2fs_put_super(struct super_block *sb)
566 wait_for_completion(&sbi->s_kobj_unregister); 574 wait_for_completion(&sbi->s_kobj_unregister);
567 575
568 sb->s_fs_info = NULL; 576 sb->s_fs_info = NULL;
569 brelse(sbi->raw_super_buf); 577 kfree(sbi->raw_super);
570 kfree(sbi); 578 kfree(sbi);
571} 579}
572 580
573int f2fs_sync_fs(struct super_block *sb, int sync) 581int f2fs_sync_fs(struct super_block *sb, int sync)
574{ 582{
575 struct f2fs_sb_info *sbi = F2FS_SB(sb); 583 struct f2fs_sb_info *sbi = F2FS_SB(sb);
584 int err = 0;
576 585
577 trace_f2fs_sync_fs(sb, sync); 586 trace_f2fs_sync_fs(sb, sync);
578 587
@@ -582,14 +591,12 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
582 cpc.reason = __get_cp_reason(sbi); 591 cpc.reason = __get_cp_reason(sbi);
583 592
584 mutex_lock(&sbi->gc_mutex); 593 mutex_lock(&sbi->gc_mutex);
585 write_checkpoint(sbi, &cpc); 594 err = write_checkpoint(sbi, &cpc);
586 mutex_unlock(&sbi->gc_mutex); 595 mutex_unlock(&sbi->gc_mutex);
587 } else {
588 f2fs_balance_fs(sbi);
589 } 596 }
590 f2fs_trace_ios(NULL, 1); 597 f2fs_trace_ios(NULL, 1);
591 598
592 return 0; 599 return err;
593} 600}
594 601
595static int f2fs_freeze(struct super_block *sb) 602static int f2fs_freeze(struct super_block *sb)
@@ -686,6 +693,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
686 seq_puts(seq, ",extent_cache"); 693 seq_puts(seq, ",extent_cache");
687 else 694 else
688 seq_puts(seq, ",noextent_cache"); 695 seq_puts(seq, ",noextent_cache");
696 if (test_opt(sbi, DATA_FLUSH))
697 seq_puts(seq, ",data_flush");
689 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 698 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
690 699
691 return 0; 700 return 0;
@@ -898,7 +907,7 @@ static const struct export_operations f2fs_export_ops = {
898 .get_parent = f2fs_get_parent, 907 .get_parent = f2fs_get_parent,
899}; 908};
900 909
901static loff_t max_file_size(unsigned bits) 910static loff_t max_file_blocks(void)
902{ 911{
903 loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); 912 loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
904 loff_t leaf_count = ADDRS_PER_BLOCK; 913 loff_t leaf_count = ADDRS_PER_BLOCK;
@@ -914,10 +923,82 @@ static loff_t max_file_size(unsigned bits)
914 leaf_count *= NIDS_PER_BLOCK; 923 leaf_count *= NIDS_PER_BLOCK;
915 result += leaf_count; 924 result += leaf_count;
916 925
917 result <<= bits;
918 return result; 926 return result;
919} 927}
920 928
929static inline bool sanity_check_area_boundary(struct super_block *sb,
930 struct f2fs_super_block *raw_super)
931{
932 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
933 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
934 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
935 u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
936 u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
937 u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
938 u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
939 u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
940 u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
941 u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
942 u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
943 u32 segment_count = le32_to_cpu(raw_super->segment_count);
944 u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
945
946 if (segment0_blkaddr != cp_blkaddr) {
947 f2fs_msg(sb, KERN_INFO,
948 "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
949 segment0_blkaddr, cp_blkaddr);
950 return true;
951 }
952
953 if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
954 sit_blkaddr) {
955 f2fs_msg(sb, KERN_INFO,
956 "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
957 cp_blkaddr, sit_blkaddr,
958 segment_count_ckpt << log_blocks_per_seg);
959 return true;
960 }
961
962 if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
963 nat_blkaddr) {
964 f2fs_msg(sb, KERN_INFO,
965 "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
966 sit_blkaddr, nat_blkaddr,
967 segment_count_sit << log_blocks_per_seg);
968 return true;
969 }
970
971 if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
972 ssa_blkaddr) {
973 f2fs_msg(sb, KERN_INFO,
974 "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
975 nat_blkaddr, ssa_blkaddr,
976 segment_count_nat << log_blocks_per_seg);
977 return true;
978 }
979
980 if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
981 main_blkaddr) {
982 f2fs_msg(sb, KERN_INFO,
983 "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
984 ssa_blkaddr, main_blkaddr,
985 segment_count_ssa << log_blocks_per_seg);
986 return true;
987 }
988
989 if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
990 segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
991 f2fs_msg(sb, KERN_INFO,
992 "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
993 main_blkaddr,
994 segment0_blkaddr + (segment_count << log_blocks_per_seg),
995 segment_count_main << log_blocks_per_seg);
996 return true;
997 }
998
999 return false;
1000}
1001
921static int sanity_check_raw_super(struct super_block *sb, 1002static int sanity_check_raw_super(struct super_block *sb,
922 struct f2fs_super_block *raw_super) 1003 struct f2fs_super_block *raw_super)
923{ 1004{
@@ -947,6 +1028,14 @@ static int sanity_check_raw_super(struct super_block *sb,
947 return 1; 1028 return 1;
948 } 1029 }
949 1030
1031 /* check log blocks per segment */
1032 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1033 f2fs_msg(sb, KERN_INFO,
1034 "Invalid log blocks per segment (%u)\n",
1035 le32_to_cpu(raw_super->log_blocks_per_seg));
1036 return 1;
1037 }
1038
950 /* Currently, support 512/1024/2048/4096 bytes sector size */ 1039 /* Currently, support 512/1024/2048/4096 bytes sector size */
951 if (le32_to_cpu(raw_super->log_sectorsize) > 1040 if (le32_to_cpu(raw_super->log_sectorsize) >
952 F2FS_MAX_LOG_SECTOR_SIZE || 1041 F2FS_MAX_LOG_SECTOR_SIZE ||
@@ -965,6 +1054,23 @@ static int sanity_check_raw_super(struct super_block *sb,
965 le32_to_cpu(raw_super->log_sectorsize)); 1054 le32_to_cpu(raw_super->log_sectorsize));
966 return 1; 1055 return 1;
967 } 1056 }
1057
1058 /* check reserved ino info */
1059 if (le32_to_cpu(raw_super->node_ino) != 1 ||
1060 le32_to_cpu(raw_super->meta_ino) != 2 ||
1061 le32_to_cpu(raw_super->root_ino) != 3) {
1062 f2fs_msg(sb, KERN_INFO,
1063 "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1064 le32_to_cpu(raw_super->node_ino),
1065 le32_to_cpu(raw_super->meta_ino),
1066 le32_to_cpu(raw_super->root_ino));
1067 return 1;
1068 }
1069
1070 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1071 if (sanity_check_area_boundary(sb, raw_super))
1072 return 1;
1073
968 return 0; 1074 return 0;
969} 1075}
970 1076
@@ -1018,7 +1124,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1018 atomic_set(&sbi->nr_pages[i], 0); 1124 atomic_set(&sbi->nr_pages[i], 0);
1019 1125
1020 sbi->dir_level = DEF_DIR_LEVEL; 1126 sbi->dir_level = DEF_DIR_LEVEL;
1021 sbi->cp_interval = DEF_CP_INTERVAL; 1127 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1128 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1022 clear_sbi_flag(sbi, SBI_NEED_FSCK); 1129 clear_sbi_flag(sbi, SBI_NEED_FSCK);
1023 1130
1024 INIT_LIST_HEAD(&sbi->s_list); 1131 INIT_LIST_HEAD(&sbi->s_list);
@@ -1032,111 +1139,114 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1032 */ 1139 */
1033static int read_raw_super_block(struct super_block *sb, 1140static int read_raw_super_block(struct super_block *sb,
1034 struct f2fs_super_block **raw_super, 1141 struct f2fs_super_block **raw_super,
1035 struct buffer_head **raw_super_buf, 1142 int *valid_super_block, int *recovery)
1036 int *recovery)
1037{ 1143{
1038 int block = 0; 1144 int block = 0;
1039 struct buffer_head *buffer; 1145 struct buffer_head *bh;
1040 struct f2fs_super_block *super; 1146 struct f2fs_super_block *super, *buf;
1041 int err = 0; 1147 int err = 0;
1042 1148
1149 super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1150 if (!super)
1151 return -ENOMEM;
1043retry: 1152retry:
1044 buffer = sb_bread(sb, block); 1153 bh = sb_bread(sb, block);
1045 if (!buffer) { 1154 if (!bh) {
1046 *recovery = 1; 1155 *recovery = 1;
1047 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", 1156 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1048 block + 1); 1157 block + 1);
1049 if (block == 0) { 1158 err = -EIO;
1050 block++; 1159 goto next;
1051 goto retry;
1052 } else {
1053 err = -EIO;
1054 goto out;
1055 }
1056 } 1160 }
1057 1161
1058 super = (struct f2fs_super_block *) 1162 buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET);
1059 ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
1060 1163
1061 /* sanity checking of raw super */ 1164 /* sanity checking of raw super */
1062 if (sanity_check_raw_super(sb, super)) { 1165 if (sanity_check_raw_super(sb, buf)) {
1063 brelse(buffer); 1166 brelse(bh);
1064 *recovery = 1; 1167 *recovery = 1;
1065 f2fs_msg(sb, KERN_ERR, 1168 f2fs_msg(sb, KERN_ERR,
1066 "Can't find valid F2FS filesystem in %dth superblock", 1169 "Can't find valid F2FS filesystem in %dth superblock",
1067 block + 1); 1170 block + 1);
1068 if (block == 0) { 1171 err = -EINVAL;
1069 block++; 1172 goto next;
1070 goto retry;
1071 } else {
1072 err = -EINVAL;
1073 goto out;
1074 }
1075 } 1173 }
1076 1174
1077 if (!*raw_super) { 1175 if (!*raw_super) {
1078 *raw_super_buf = buffer; 1176 memcpy(super, buf, sizeof(*super));
1177 *valid_super_block = block;
1079 *raw_super = super; 1178 *raw_super = super;
1080 } else {
1081 /* already have a valid superblock */
1082 brelse(buffer);
1083 } 1179 }
1180 brelse(bh);
1084 1181
1182next:
1085 /* check the validity of the second superblock */ 1183 /* check the validity of the second superblock */
1086 if (block == 0) { 1184 if (block == 0) {
1087 block++; 1185 block++;
1088 goto retry; 1186 goto retry;
1089 } 1187 }
1090 1188
1091out:
1092 /* No valid superblock */ 1189 /* No valid superblock */
1093 if (!*raw_super) 1190 if (!*raw_super) {
1191 kfree(super);
1094 return err; 1192 return err;
1193 }
1095 1194
1096 return 0; 1195 return 0;
1097} 1196}
1098 1197
1198static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
1199{
1200 struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
1201 struct buffer_head *bh;
1202 int err;
1203
1204 bh = sb_getblk(sbi->sb, block);
1205 if (!bh)
1206 return -EIO;
1207
1208 lock_buffer(bh);
1209 memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1210 set_buffer_uptodate(bh);
1211 set_buffer_dirty(bh);
1212 unlock_buffer(bh);
1213
1214 /* it's rare case, we can do fua all the time */
1215 err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
1216 brelse(bh);
1217
1218 return err;
1219}
1220
1099int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) 1221int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1100{ 1222{
1101 struct buffer_head *sbh = sbi->raw_super_buf;
1102 sector_t block = sbh->b_blocknr;
1103 int err; 1223 int err;
1104 1224
1105 /* write back-up superblock first */ 1225 /* write back-up superblock first */
1106 sbh->b_blocknr = block ? 0 : 1; 1226 err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
1107 mark_buffer_dirty(sbh);
1108 err = sync_dirty_buffer(sbh);
1109
1110 sbh->b_blocknr = block;
1111 1227
1112 /* if we are in recovery path, skip writing valid superblock */ 1228 /* if we are in recovery path, skip writing valid superblock */
1113 if (recover || err) 1229 if (recover || err)
1114 goto out; 1230 return err;
1115 1231
1116 /* write current valid superblock */ 1232 /* write current valid superblock */
1117 mark_buffer_dirty(sbh); 1233 return __f2fs_commit_super(sbi, sbi->valid_super_block);
1118 err = sync_dirty_buffer(sbh);
1119out:
1120 clear_buffer_write_io_error(sbh);
1121 set_buffer_uptodate(sbh);
1122 return err;
1123} 1234}
1124 1235
1125static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 1236static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1126{ 1237{
1127 struct f2fs_sb_info *sbi; 1238 struct f2fs_sb_info *sbi;
1128 struct f2fs_super_block *raw_super; 1239 struct f2fs_super_block *raw_super;
1129 struct buffer_head *raw_super_buf;
1130 struct inode *root; 1240 struct inode *root;
1131 long err; 1241 long err;
1132 bool retry = true, need_fsck = false; 1242 bool retry = true, need_fsck = false;
1133 char *options = NULL; 1243 char *options = NULL;
1134 int recovery, i; 1244 int recovery, i, valid_super_block;
1135 1245
1136try_onemore: 1246try_onemore:
1137 err = -EINVAL; 1247 err = -EINVAL;
1138 raw_super = NULL; 1248 raw_super = NULL;
1139 raw_super_buf = NULL; 1249 valid_super_block = -1;
1140 recovery = 0; 1250 recovery = 0;
1141 1251
1142 /* allocate memory for f2fs-specific super block info */ 1252 /* allocate memory for f2fs-specific super block info */
@@ -1150,7 +1260,8 @@ try_onemore:
1150 goto free_sbi; 1260 goto free_sbi;
1151 } 1261 }
1152 1262
1153 err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery); 1263 err = read_raw_super_block(sb, &raw_super, &valid_super_block,
1264 &recovery);
1154 if (err) 1265 if (err)
1155 goto free_sbi; 1266 goto free_sbi;
1156 1267
@@ -1167,7 +1278,9 @@ try_onemore:
1167 if (err) 1278 if (err)
1168 goto free_options; 1279 goto free_options;
1169 1280
1170 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 1281 sbi->max_file_blocks = max_file_blocks();
1282 sb->s_maxbytes = sbi->max_file_blocks <<
1283 le32_to_cpu(raw_super->log_blocksize);
1171 sb->s_max_links = F2FS_LINK_MAX; 1284 sb->s_max_links = F2FS_LINK_MAX;
1172 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1285 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1173 1286
@@ -1183,7 +1296,7 @@ try_onemore:
1183 /* init f2fs-specific super block info */ 1296 /* init f2fs-specific super block info */
1184 sbi->sb = sb; 1297 sbi->sb = sb;
1185 sbi->raw_super = raw_super; 1298 sbi->raw_super = raw_super;
1186 sbi->raw_super_buf = raw_super_buf; 1299 sbi->valid_super_block = valid_super_block;
1187 mutex_init(&sbi->gc_mutex); 1300 mutex_init(&sbi->gc_mutex);
1188 mutex_init(&sbi->writepages); 1301 mutex_init(&sbi->writepages);
1189 mutex_init(&sbi->cp_mutex); 1302 mutex_init(&sbi->cp_mutex);
@@ -1236,8 +1349,10 @@ try_onemore:
1236 le64_to_cpu(sbi->ckpt->valid_block_count); 1349 le64_to_cpu(sbi->ckpt->valid_block_count);
1237 sbi->last_valid_block_count = sbi->total_valid_block_count; 1350 sbi->last_valid_block_count = sbi->total_valid_block_count;
1238 sbi->alloc_valid_block_count = 0; 1351 sbi->alloc_valid_block_count = 0;
1239 INIT_LIST_HEAD(&sbi->dir_inode_list); 1352 for (i = 0; i < NR_INODE_TYPE; i++) {
1240 spin_lock_init(&sbi->dir_inode_lock); 1353 INIT_LIST_HEAD(&sbi->inode_list[i]);
1354 spin_lock_init(&sbi->inode_lock[i]);
1355 }
1241 1356
1242 init_extent_cache_info(sbi); 1357 init_extent_cache_info(sbi);
1243 1358
@@ -1355,12 +1470,14 @@ try_onemore:
1355 f2fs_commit_super(sbi, true); 1470 f2fs_commit_super(sbi, true);
1356 } 1471 }
1357 1472
1358 sbi->cp_expires = round_jiffies_up(jiffies); 1473 f2fs_update_time(sbi, CP_TIME);
1359 1474 f2fs_update_time(sbi, REQ_TIME);
1360 return 0; 1475 return 0;
1361 1476
1362free_kobj: 1477free_kobj:
1363 kobject_del(&sbi->s_kobj); 1478 kobject_del(&sbi->s_kobj);
1479 kobject_put(&sbi->s_kobj);
1480 wait_for_completion(&sbi->s_kobj_unregister);
1364free_proc: 1481free_proc:
1365 if (sbi->s_proc) { 1482 if (sbi->s_proc) {
1366 remove_proc_entry("segment_info", sbi->s_proc); 1483 remove_proc_entry("segment_info", sbi->s_proc);
@@ -1387,7 +1504,7 @@ free_meta_inode:
1387free_options: 1504free_options:
1388 kfree(options); 1505 kfree(options);
1389free_sb_buf: 1506free_sb_buf:
1390 brelse(raw_super_buf); 1507 kfree(raw_super);
1391free_sbi: 1508free_sbi:
1392 kfree(sbi); 1509 kfree(sbi);
1393 1510
@@ -1478,10 +1595,14 @@ static int __init init_f2fs_fs(void)
1478 err = register_filesystem(&f2fs_fs_type); 1595 err = register_filesystem(&f2fs_fs_type);
1479 if (err) 1596 if (err)
1480 goto free_shrinker; 1597 goto free_shrinker;
1481 f2fs_create_root_stats(); 1598 err = f2fs_create_root_stats();
1599 if (err)
1600 goto free_filesystem;
1482 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); 1601 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
1483 return 0; 1602 return 0;
1484 1603
1604free_filesystem:
1605 unregister_filesystem(&f2fs_fs_type);
1485free_shrinker: 1606free_shrinker:
1486 unregister_shrinker(&f2fs_shrinker_info); 1607 unregister_shrinker(&f2fs_shrinker_info);
1487free_crypto: 1608free_crypto:
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 036952a945fa..10f1e784fa23 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -571,7 +571,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
571 if (ipage) 571 if (ipage)
572 return __f2fs_setxattr(inode, index, name, value, 572 return __f2fs_setxattr(inode, index, name, value,
573 size, ipage, flags); 573 size, ipage, flags);
574 f2fs_balance_fs(sbi); 574 f2fs_balance_fs(sbi, true);
575 575
576 f2fs_lock_op(sbi); 576 f2fs_lock_op(sbi);
577 /* protect xattr_ver */ 577 /* protect xattr_ver */
@@ -580,5 +580,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
580 up_write(&F2FS_I(inode)->i_sem); 580 up_write(&F2FS_I(inode)->i_sem);
581 f2fs_unlock_op(sbi); 581 f2fs_unlock_op(sbi);
582 582
583 f2fs_update_time(sbi, REQ_TIME);
583 return err; 584 return err;
584} 585}
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 25c6324a0dd0..e59c3be92106 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -51,6 +51,7 @@
51#define MAX_ACTIVE_DATA_LOGS 8 51#define MAX_ACTIVE_DATA_LOGS 8
52 52
53#define VERSION_LEN 256 53#define VERSION_LEN 256
54#define MAX_VOLUME_NAME 512
54 55
55/* 56/*
56 * For superblock 57 * For superblock
@@ -84,7 +85,7 @@ struct f2fs_super_block {
84 __le32 node_ino; /* node inode number */ 85 __le32 node_ino; /* node inode number */
85 __le32 meta_ino; /* meta inode number */ 86 __le32 meta_ino; /* meta inode number */
86 __u8 uuid[16]; /* 128-bit uuid for volume */ 87 __u8 uuid[16]; /* 128-bit uuid for volume */
87 __le16 volume_name[512]; /* volume name */ 88 __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */
88 __le32 extension_count; /* # of extensions below */ 89 __le32 extension_count; /* # of extensions below */
89 __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ 90 __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
90 __le32 cp_payload; 91 __le32 cp_payload;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 00b4a6308249..a1b488809f06 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1265,6 +1265,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
1265 __entry->node_cnt) 1265 __entry->node_cnt)
1266); 1266);
1267 1267
1268DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
1269
1270 TP_PROTO(struct super_block *sb, int type, int count),
1271
1272 TP_ARGS(sb, type, count),
1273
1274 TP_STRUCT__entry(
1275 __field(dev_t, dev)
1276 __field(int, type)
1277 __field(int, count)
1278 ),
1279
1280 TP_fast_assign(
1281 __entry->dev = sb->s_dev;
1282 __entry->type = type;
1283 __entry->count = count;
1284 ),
1285
1286 TP_printk("dev = (%d,%d), %s, dirty count = %d",
1287 show_dev(__entry),
1288 show_file_type(__entry->type),
1289 __entry->count)
1290);
1291
1292DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter,
1293
1294 TP_PROTO(struct super_block *sb, int type, int count),
1295
1296 TP_ARGS(sb, type, count)
1297);
1298
1299DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
1300
1301 TP_PROTO(struct super_block *sb, int type, int count),
1302
1303 TP_ARGS(sb, type, count)
1304);
1305
1268#endif /* _TRACE_F2FS_H */ 1306#endif /* _TRACE_F2FS_H */
1269 1307
1270 /* This part must be outside protection */ 1308 /* This part must be outside protection */