aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorZheng Liu <wenqing.lz@taobao.com>2014-11-25 11:45:37 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-11-25 11:45:37 -0500
commitedaa53cac8fd4b96ed4b8f96c4933158ff2dd337 (patch)
treef414417ca79fe9678743ea5af75bd5afb70ad8cd /fs
parent2f8e0a7c6c89f850ebd5d6c0b9a08317030d1b89 (diff)
ext4: change LRU to round-robin in extent status tree shrinker
In this commit we discard the lru algorithm for inodes with extent status tree because it takes significant effort to maintain a lru list in extent status tree shrinker and the shrinker can take a long time to scan this lru list in order to reclaim some objects. We replace the lru ordering with a simple round-robin. After that we never need to keep a lru list. That means that the list needn't be sorted if the shrinker can not reclaim any objects in the first round. Cc: Andreas Dilger <adilger.kernel@dilger.ca> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/extents_status.c224
-rw-r--r--fs/ext4/extents_status.h7
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/super.c7
7 files changed, 114 insertions, 146 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 98da4cda9d18..ab6caf55f5bf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -878,10 +878,9 @@ struct ext4_inode_info {
878 /* extents status tree */ 878 /* extents status tree */
879 struct ext4_es_tree i_es_tree; 879 struct ext4_es_tree i_es_tree;
880 rwlock_t i_es_lock; 880 rwlock_t i_es_lock;
881 struct list_head i_es_lru; 881 struct list_head i_es_list;
882 unsigned int i_es_all_nr; /* protected by i_es_lock */ 882 unsigned int i_es_all_nr; /* protected by i_es_lock */
883 unsigned int i_es_lru_nr; /* protected by i_es_lock */ 883 unsigned int i_es_shk_nr; /* protected by i_es_lock */
884 unsigned long i_touch_when; /* jiffies of last accessing */
885 884
886 /* ialloc */ 885 /* ialloc */
887 ext4_group_t i_last_alloc_group; 886 ext4_group_t i_last_alloc_group;
@@ -1322,10 +1321,11 @@ struct ext4_sb_info {
1322 1321
1323 /* Reclaim extents from extent status tree */ 1322 /* Reclaim extents from extent status tree */
1324 struct shrinker s_es_shrinker; 1323 struct shrinker s_es_shrinker;
1325 struct list_head s_es_lru; 1324 struct list_head s_es_list;
1325 long s_es_nr_inode;
1326 struct ext4_es_stats s_es_stats; 1326 struct ext4_es_stats s_es_stats;
1327 struct mb_cache *s_mb_cache; 1327 struct mb_cache *s_mb_cache;
1328 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1328 spinlock_t s_es_lock ____cacheline_aligned_in_smp;
1329 1329
1330 /* Ratelimit ext4 messages. */ 1330 /* Ratelimit ext4 messages. */
1331 struct ratelimit_state s_err_ratelimit_state; 1331 struct ratelimit_state s_err_ratelimit_state;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1ee24d74270f..e406f66a903f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4632,7 +4632,7 @@ out2:
4632 4632
4633 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4633 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4634 err ? err : allocated); 4634 err ? err : allocated);
4635 ext4_es_lru_add(inode); 4635 ext4_es_list_add(inode);
4636 return err ? err : allocated; 4636 return err ? err : allocated;
4637} 4637}
4638 4638
@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
5191 error = ext4_fill_fiemap_extents(inode, start_blk, 5191 error = ext4_fill_fiemap_extents(inode, start_blk,
5192 len_blks, fieinfo); 5192 len_blks, fieinfo);
5193 } 5193 }
5194 ext4_es_lru_add(inode); 5194 ext4_es_list_add(inode);
5195 return error; 5195 return error;
5196} 5196}
5197 5197
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 94e7855ae71b..0193ca107396 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -149,8 +149,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end); 149 ext4_lblk_t end);
150static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 150static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
151 int nr_to_scan); 151 int nr_to_scan);
152static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 152static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
153 struct ext4_inode_info *locked_ei); 153 struct ext4_inode_info *locked_ei);
154 154
155int __init ext4_init_es(void) 155int __init ext4_init_es(void)
156{ 156{
@@ -298,6 +298,36 @@ out:
298 trace_ext4_es_find_delayed_extent_range_exit(inode, es); 298 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
299} 299}
300 300
301void ext4_es_list_add(struct inode *inode)
302{
303 struct ext4_inode_info *ei = EXT4_I(inode);
304 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
305
306 if (!list_empty(&ei->i_es_list))
307 return;
308
309 spin_lock(&sbi->s_es_lock);
310 if (list_empty(&ei->i_es_list)) {
311 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
312 sbi->s_es_nr_inode++;
313 }
314 spin_unlock(&sbi->s_es_lock);
315}
316
317void ext4_es_list_del(struct inode *inode)
318{
319 struct ext4_inode_info *ei = EXT4_I(inode);
320 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
321
322 spin_lock(&sbi->s_es_lock);
323 if (!list_empty(&ei->i_es_list)) {
324 list_del_init(&ei->i_es_list);
325 sbi->s_es_nr_inode--;
326 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
327 }
328 spin_unlock(&sbi->s_es_lock);
329}
330
301static struct extent_status * 331static struct extent_status *
302ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, 332ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
303 ext4_fsblk_t pblk) 333 ext4_fsblk_t pblk)
@@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
314 * We don't count delayed extent because we never try to reclaim them 344 * We don't count delayed extent because we never try to reclaim them
315 */ 345 */
316 if (!ext4_es_is_delayed(es)) { 346 if (!ext4_es_is_delayed(es)) {
317 EXT4_I(inode)->i_es_lru_nr++; 347 EXT4_I(inode)->i_es_shk_nr++;
318 percpu_counter_inc(&EXT4_SB(inode->i_sb)-> 348 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
319 s_es_stats.es_stats_lru_cnt); 349 s_es_stats.es_stats_shk_cnt);
320 } 350 }
321 351
322 EXT4_I(inode)->i_es_all_nr++; 352 EXT4_I(inode)->i_es_all_nr++;
@@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
330 EXT4_I(inode)->i_es_all_nr--; 360 EXT4_I(inode)->i_es_all_nr--;
331 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); 361 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
332 362
333 /* Decrease the lru counter when this es is not delayed */ 363 /* Decrease the shrink counter when this es is not delayed */
334 if (!ext4_es_is_delayed(es)) { 364 if (!ext4_es_is_delayed(es)) {
335 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); 365 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
336 EXT4_I(inode)->i_es_lru_nr--; 366 EXT4_I(inode)->i_es_shk_nr--;
337 percpu_counter_dec(&EXT4_SB(inode->i_sb)-> 367 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
338 s_es_stats.es_stats_lru_cnt); 368 s_es_stats.es_stats_shk_cnt);
339 } 369 }
340 370
341 kmem_cache_free(ext4_es_cachep, es); 371 kmem_cache_free(ext4_es_cachep, es);
@@ -683,8 +713,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
683 goto error; 713 goto error;
684retry: 714retry:
685 err = __es_insert_extent(inode, &newes); 715 err = __es_insert_extent(inode, &newes);
686 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, 716 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
687 EXT4_I(inode))) 717 1, EXT4_I(inode)))
688 goto retry; 718 goto retry;
689 if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) 719 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
690 err = 0; 720 err = 0;
@@ -841,8 +871,8 @@ retry:
841 es->es_lblk = orig_es.es_lblk; 871 es->es_lblk = orig_es.es_lblk;
842 es->es_len = orig_es.es_len; 872 es->es_len = orig_es.es_len;
843 if ((err == -ENOMEM) && 873 if ((err == -ENOMEM) &&
844 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, 874 __es_shrink(EXT4_SB(inode->i_sb),
845 EXT4_I(inode))) 875 1, EXT4_I(inode)))
846 goto retry; 876 goto retry;
847 goto out; 877 goto out;
848 } 878 }
@@ -914,6 +944,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
914 end = lblk + len - 1; 944 end = lblk + len - 1;
915 BUG_ON(end < lblk); 945 BUG_ON(end < lblk);
916 946
947 /*
948 * ext4_clear_inode() depends on us taking i_es_lock unconditionally
949 * so that we are sure __es_shrink() is done with the inode before it
950 * is reclaimed.
951 */
917 write_lock(&EXT4_I(inode)->i_es_lock); 952 write_lock(&EXT4_I(inode)->i_es_lock);
918 err = __es_remove_extent(inode, lblk, end); 953 err = __es_remove_extent(inode, lblk, end);
919 write_unlock(&EXT4_I(inode)->i_es_lock); 954 write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -921,114 +956,80 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
921 return err; 956 return err;
922} 957}
923 958
924static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 959static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
925 struct list_head *b) 960 struct ext4_inode_info *locked_ei)
926{
927 struct ext4_inode_info *eia, *eib;
928 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
929 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
930
931 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
932 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
933 return 1;
934 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
935 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
936 return -1;
937 if (eia->i_touch_when == eib->i_touch_when)
938 return 0;
939 if (time_after(eia->i_touch_when, eib->i_touch_when))
940 return 1;
941 else
942 return -1;
943}
944
945static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
946 struct ext4_inode_info *locked_ei)
947{ 961{
948 struct ext4_inode_info *ei; 962 struct ext4_inode_info *ei;
949 struct ext4_es_stats *es_stats; 963 struct ext4_es_stats *es_stats;
950 struct list_head *cur, *tmp;
951 LIST_HEAD(skipped);
952 ktime_t start_time; 964 ktime_t start_time;
953 u64 scan_time; 965 u64 scan_time;
966 int nr_to_walk;
954 int nr_shrunk = 0; 967 int nr_shrunk = 0;
955 int retried = 0, skip_precached = 1, nr_skipped = 0; 968 int retried = 0, nr_skipped = 0;
956 969
957 es_stats = &sbi->s_es_stats; 970 es_stats = &sbi->s_es_stats;
958 start_time = ktime_get(); 971 start_time = ktime_get();
959 spin_lock(&sbi->s_es_lru_lock);
960 972
961retry: 973retry:
962 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 974 spin_lock(&sbi->s_es_lock);
975 nr_to_walk = sbi->s_es_nr_inode;
976 while (nr_to_walk-- > 0) {
963 int shrunk; 977 int shrunk;
964 978
965 /* 979 if (list_empty(&sbi->s_es_list)) {
966 * If we have already reclaimed all extents from extent 980 spin_unlock(&sbi->s_es_lock);
967 * status tree, just stop the loop immediately. 981 goto out;
968 */ 982 }
969 if (percpu_counter_read_positive( 983 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
970 &es_stats->es_stats_lru_cnt) == 0) 984 i_es_list);
971 break; 985 /* Move the inode to the tail */
972 986 list_move(&ei->i_es_list, sbi->s_es_list.prev);
973 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
974 987
975 /* 988 /*
976 * Skip the inode that is newer than the last_sorted 989 * Normally we try hard to avoid shrinking precached inodes,
977 * time. Normally we try hard to avoid shrinking 990 * but we will as a last resort.
978 * precached inodes, but we will as a last resort.
979 */ 991 */
980 if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || 992 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
981 (skip_precached && ext4_test_inode_state(&ei->vfs_inode, 993 EXT4_STATE_EXT_PRECACHED)) {
982 EXT4_STATE_EXT_PRECACHED))) {
983 nr_skipped++; 994 nr_skipped++;
984 list_move_tail(cur, &skipped);
985 continue; 995 continue;
986 } 996 }
987 997
988 if (ei->i_es_lru_nr == 0 || ei == locked_ei || 998 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
989 !write_trylock(&ei->i_es_lock)) 999 nr_skipped++;
990 continue; 1000 continue;
1001 }
1002 /*
1003 * Now we hold i_es_lock which protects us from inode reclaim
1004 * freeing inode under us
1005 */
1006 spin_unlock(&sbi->s_es_lock);
991 1007
992 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1008 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
993 if (ei->i_es_lru_nr == 0)
994 list_del_init(&ei->i_es_lru);
995 write_unlock(&ei->i_es_lock); 1009 write_unlock(&ei->i_es_lock);
996 1010
997 nr_shrunk += shrunk; 1011 nr_shrunk += shrunk;
998 nr_to_scan -= shrunk; 1012 nr_to_scan -= shrunk;
1013
999 if (nr_to_scan == 0) 1014 if (nr_to_scan == 0)
1000 break; 1015 goto out;
1016 spin_lock(&sbi->s_es_lock);
1001 } 1017 }
1002 1018 spin_unlock(&sbi->s_es_lock);
1003 /* Move the newer inodes into the tail of the LRU list. */
1004 list_splice_tail(&skipped, &sbi->s_es_lru);
1005 INIT_LIST_HEAD(&skipped);
1006 1019
1007 /* 1020 /*
1008 * If we skipped any inodes, and we weren't able to make any 1021 * If we skipped any inodes, and we weren't able to make any
1009 * forward progress, sort the list and try again. 1022 * forward progress, try again to scan precached inodes.
1010 */ 1023 */
1011 if ((nr_shrunk == 0) && nr_skipped && !retried) { 1024 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1012 retried++; 1025 retried++;
1013 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
1014 es_stats->es_stats_last_sorted = jiffies;
1015 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
1016 i_es_lru);
1017 /*
1018 * If there are no non-precached inodes left on the
1019 * list, start releasing precached extents.
1020 */
1021 if (ext4_test_inode_state(&ei->vfs_inode,
1022 EXT4_STATE_EXT_PRECACHED))
1023 skip_precached = 0;
1024 goto retry; 1026 goto retry;
1025 } 1027 }
1026 1028
1027 spin_unlock(&sbi->s_es_lru_lock);
1028
1029 if (locked_ei && nr_shrunk == 0) 1029 if (locked_ei && nr_shrunk == 0)
1030 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); 1030 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
1031 1031
1032out:
1032 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1033 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1033 if (likely(es_stats->es_stats_scan_time)) 1034 if (likely(es_stats->es_stats_scan_time))
1034 es_stats->es_stats_scan_time = (scan_time + 1035 es_stats->es_stats_scan_time = (scan_time +
@@ -1043,7 +1044,7 @@ retry:
1043 else 1044 else
1044 es_stats->es_stats_shrunk = nr_shrunk; 1045 es_stats->es_stats_shrunk = nr_shrunk;
1045 1046
1046 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, 1047 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1047 nr_skipped, retried); 1048 nr_skipped, retried);
1048 return nr_shrunk; 1049 return nr_shrunk;
1049} 1050}
@@ -1055,7 +1056,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
1055 struct ext4_sb_info *sbi; 1056 struct ext4_sb_info *sbi;
1056 1057
1057 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 1058 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1058 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); 1059 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1059 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); 1060 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1060 return nr; 1061 return nr;
1061} 1062}
@@ -1068,13 +1069,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
1068 int nr_to_scan = sc->nr_to_scan; 1069 int nr_to_scan = sc->nr_to_scan;
1069 int ret, nr_shrunk; 1070 int ret, nr_shrunk;
1070 1071
1071 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); 1072 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1072 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); 1073 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1073 1074
1074 if (!nr_to_scan) 1075 if (!nr_to_scan)
1075 return ret; 1076 return ret;
1076 1077
1077 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1078 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1078 1079
1079 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); 1080 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1080 return nr_shrunk; 1081 return nr_shrunk;
@@ -1102,28 +1103,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1102 return 0; 1103 return 0;
1103 1104
1104 /* here we just find an inode that has the max nr. of objects */ 1105 /* here we just find an inode that has the max nr. of objects */
1105 spin_lock(&sbi->s_es_lru_lock); 1106 spin_lock(&sbi->s_es_lock);
1106 list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { 1107 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1107 inode_cnt++; 1108 inode_cnt++;
1108 if (max && max->i_es_all_nr < ei->i_es_all_nr) 1109 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1109 max = ei; 1110 max = ei;
1110 else if (!max) 1111 else if (!max)
1111 max = ei; 1112 max = ei;
1112 } 1113 }
1113 spin_unlock(&sbi->s_es_lru_lock); 1114 spin_unlock(&sbi->s_es_lock);
1114 1115
1115 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", 1116 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1116 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), 1117 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1117 percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); 1118 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1118 seq_printf(seq, " %lu/%lu cache hits/misses\n", 1119 seq_printf(seq, " %lu/%lu cache hits/misses\n",
1119 es_stats->es_stats_cache_hits, 1120 es_stats->es_stats_cache_hits,
1120 es_stats->es_stats_cache_misses); 1121 es_stats->es_stats_cache_misses);
1121 if (es_stats->es_stats_last_sorted != 0)
1122 seq_printf(seq, " %u ms last sorted interval\n",
1123 jiffies_to_msecs(jiffies -
1124 es_stats->es_stats_last_sorted));
1125 if (inode_cnt) 1122 if (inode_cnt)
1126 seq_printf(seq, " %d inodes on lru list\n", inode_cnt); 1123 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1127 1124
1128 seq_printf(seq, "average:\n %llu us scan time\n", 1125 seq_printf(seq, "average:\n %llu us scan time\n",
1129 div_u64(es_stats->es_stats_scan_time, 1000)); 1126 div_u64(es_stats->es_stats_scan_time, 1000));
@@ -1132,7 +1129,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1132 seq_printf(seq, 1129 seq_printf(seq,
1133 "maximum:\n %lu inode (%u objects, %u reclaimable)\n" 1130 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1134 " %llu us max scan time\n", 1131 " %llu us max scan time\n",
1135 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, 1132 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1136 div_u64(es_stats->es_stats_max_scan_time, 1000)); 1133 div_u64(es_stats->es_stats_max_scan_time, 1000));
1137 1134
1138 return 0; 1135 return 0;
@@ -1181,9 +1178,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1181{ 1178{
1182 int err; 1179 int err;
1183 1180
1184 INIT_LIST_HEAD(&sbi->s_es_lru); 1181 INIT_LIST_HEAD(&sbi->s_es_list);
1185 spin_lock_init(&sbi->s_es_lru_lock); 1182 sbi->s_es_nr_inode = 0;
1186 sbi->s_es_stats.es_stats_last_sorted = 0; 1183 spin_lock_init(&sbi->s_es_lock);
1187 sbi->s_es_stats.es_stats_shrunk = 0; 1184 sbi->s_es_stats.es_stats_shrunk = 0;
1188 sbi->s_es_stats.es_stats_cache_hits = 0; 1185 sbi->s_es_stats.es_stats_cache_hits = 0;
1189 sbi->s_es_stats.es_stats_cache_misses = 0; 1186 sbi->s_es_stats.es_stats_cache_misses = 0;
@@ -1192,7 +1189,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1192 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); 1189 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1193 if (err) 1190 if (err)
1194 return err; 1191 return err;
1195 err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); 1192 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1196 if (err) 1193 if (err)
1197 goto err1; 1194 goto err1;
1198 1195
@@ -1210,7 +1207,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1210 return 0; 1207 return 0;
1211 1208
1212err2: 1209err2:
1213 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); 1210 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1214err1: 1211err1:
1215 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 1212 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1216 return err; 1213 return err;
@@ -1221,37 +1218,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1221 if (sbi->s_proc) 1218 if (sbi->s_proc)
1222 remove_proc_entry("es_shrinker_info", sbi->s_proc); 1219 remove_proc_entry("es_shrinker_info", sbi->s_proc);
1223 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 1220 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1224 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); 1221 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1225 unregister_shrinker(&sbi->s_es_shrinker); 1222 unregister_shrinker(&sbi->s_es_shrinker);
1226} 1223}
1227 1224
1228void ext4_es_lru_add(struct inode *inode)
1229{
1230 struct ext4_inode_info *ei = EXT4_I(inode);
1231 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1232
1233 ei->i_touch_when = jiffies;
1234
1235 if (!list_empty(&ei->i_es_lru))
1236 return;
1237
1238 spin_lock(&sbi->s_es_lru_lock);
1239 if (list_empty(&ei->i_es_lru))
1240 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
1241 spin_unlock(&sbi->s_es_lru_lock);
1242}
1243
1244void ext4_es_lru_del(struct inode *inode)
1245{
1246 struct ext4_inode_info *ei = EXT4_I(inode);
1247 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1248
1249 spin_lock(&sbi->s_es_lru_lock);
1250 if (!list_empty(&ei->i_es_lru))
1251 list_del_init(&ei->i_es_lru);
1252 spin_unlock(&sbi->s_es_lru_lock);
1253}
1254
1255static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 1225static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1256 int nr_to_scan) 1226 int nr_to_scan)
1257{ 1227{
@@ -1263,7 +1233,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1263 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 1233 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1264 DEFAULT_RATELIMIT_BURST); 1234 DEFAULT_RATELIMIT_BURST);
1265 1235
1266 if (ei->i_es_lru_nr == 0) 1236 if (ei->i_es_shk_nr == 0)
1267 return 0; 1237 return 0;
1268 1238
1269 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && 1239 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index efd5f970b501..0e6a33e81e5f 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -65,14 +65,13 @@ struct ext4_es_tree {
65}; 65};
66 66
67struct ext4_es_stats { 67struct ext4_es_stats {
68 unsigned long es_stats_last_sorted;
69 unsigned long es_stats_shrunk; 68 unsigned long es_stats_shrunk;
70 unsigned long es_stats_cache_hits; 69 unsigned long es_stats_cache_hits;
71 unsigned long es_stats_cache_misses; 70 unsigned long es_stats_cache_misses;
72 u64 es_stats_scan_time; 71 u64 es_stats_scan_time;
73 u64 es_stats_max_scan_time; 72 u64 es_stats_max_scan_time;
74 struct percpu_counter es_stats_all_cnt; 73 struct percpu_counter es_stats_all_cnt;
75 struct percpu_counter es_stats_lru_cnt; 74 struct percpu_counter es_stats_shk_cnt;
76}; 75};
77 76
78extern int __init ext4_init_es(void); 77extern int __init ext4_init_es(void);
@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
151 150
152extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); 151extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
153extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 152extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
154extern void ext4_es_lru_add(struct inode *inode); 153extern void ext4_es_list_add(struct inode *inode);
155extern void ext4_es_lru_del(struct inode *inode); 154extern void ext4_es_list_del(struct inode *inode);
156 155
157#endif /* _EXT4_EXTENTS_STATUS_H */ 156#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d5a46a8df70b..540b0b0481a5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
486 486
487 /* Lookup extent status tree firstly */ 487 /* Lookup extent status tree firstly */
488 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 488 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
489 ext4_es_lru_add(inode); 489 ext4_es_list_add(inode);
490 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 490 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
491 map->m_pblk = ext4_es_pblock(&es) + 491 map->m_pblk = ext4_es_pblock(&es) +
492 map->m_lblk - es.es_lblk; 492 map->m_lblk - es.es_lblk;
@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1388 1388
1389 /* Lookup extent status tree firstly */ 1389 /* Lookup extent status tree firstly */
1390 if (ext4_es_lookup_extent(inode, iblock, &es)) { 1390 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1391 ext4_es_lru_add(inode); 1391 ext4_es_list_add(inode);
1392 if (ext4_es_is_hole(&es)) { 1392 if (ext4_es_is_hole(&es)) {
1393 retval = 0; 1393 retval = 0;
1394 down_read(&EXT4_I(inode)->i_data_sem); 1394 down_read(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bfda18a15592..7b377c41dd81 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
78 memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); 78 memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
79 ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); 79 ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
80 ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); 80 ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
81 ext4_es_lru_del(inode1); 81 ext4_es_list_del(inode1);
82 ext4_es_lru_del(inode2); 82 ext4_es_list_del(inode2);
83 83
84 isize = i_size_read(inode1); 84 isize = i_size_read(inode1);
85 i_size_write(inode1, i_size_read(inode2)); 85 i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4b79f39ebf66..32df08e99ca9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
871 spin_lock_init(&ei->i_prealloc_lock); 871 spin_lock_init(&ei->i_prealloc_lock);
872 ext4_es_init_tree(&ei->i_es_tree); 872 ext4_es_init_tree(&ei->i_es_tree);
873 rwlock_init(&ei->i_es_lock); 873 rwlock_init(&ei->i_es_lock);
874 INIT_LIST_HEAD(&ei->i_es_lru); 874 INIT_LIST_HEAD(&ei->i_es_list);
875 ei->i_es_all_nr = 0; 875 ei->i_es_all_nr = 0;
876 ei->i_es_lru_nr = 0; 876 ei->i_es_shk_nr = 0;
877 ei->i_touch_when = 0;
878 ei->i_reserved_data_blocks = 0; 877 ei->i_reserved_data_blocks = 0;
879 ei->i_reserved_meta_blocks = 0; 878 ei->i_reserved_meta_blocks = 0;
880 ei->i_allocated_meta_blocks = 0; 879 ei->i_allocated_meta_blocks = 0;
@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode)
963 dquot_drop(inode); 962 dquot_drop(inode);
964 ext4_discard_preallocations(inode); 963 ext4_discard_preallocations(inode);
965 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 964 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
966 ext4_es_lru_del(inode); 965 ext4_es_list_del(inode);
967 if (EXT4_I(inode)->jinode) { 966 if (EXT4_I(inode)->jinode) {
968 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 967 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
969 EXT4_I(inode)->jinode); 968 EXT4_I(inode)->jinode);