diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2014-11-25 11:45:37 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2014-11-25 11:45:37 -0500 |
commit | edaa53cac8fd4b96ed4b8f96c4933158ff2dd337 (patch) | |
tree | f414417ca79fe9678743ea5af75bd5afb70ad8cd /fs | |
parent | 2f8e0a7c6c89f850ebd5d6c0b9a08317030d1b89 (diff) |
ext4: change LRU to round-robin in extent status tree shrinker
In this commit we discard the lru algorithm for inodes with extent
status tree because it takes significant effort to maintain a lru list
in extent status tree shrinker and the shrinker can take a long time to
scan this lru list in order to reclaim some objects.
We replace the lru ordering with a simple round-robin. After that we
never need to keep a lru list. That means that the list needn't be
sorted if the shrinker can not reclaim any objects in the first round.
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 10 | ||||
-rw-r--r-- | fs/ext4/extents.c | 4 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 224 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 7 | ||||
-rw-r--r-- | fs/ext4/inode.c | 4 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 7 |
7 files changed, 114 insertions, 146 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 98da4cda9d18..ab6caf55f5bf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -878,10 +878,9 @@ struct ext4_inode_info { | |||
878 | /* extents status tree */ | 878 | /* extents status tree */ |
879 | struct ext4_es_tree i_es_tree; | 879 | struct ext4_es_tree i_es_tree; |
880 | rwlock_t i_es_lock; | 880 | rwlock_t i_es_lock; |
881 | struct list_head i_es_lru; | 881 | struct list_head i_es_list; |
882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | 882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ |
883 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 883 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ |
884 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
885 | 884 | ||
886 | /* ialloc */ | 885 | /* ialloc */ |
887 | ext4_group_t i_last_alloc_group; | 886 | ext4_group_t i_last_alloc_group; |
@@ -1322,10 +1321,11 @@ struct ext4_sb_info { | |||
1322 | 1321 | ||
1323 | /* Reclaim extents from extent status tree */ | 1322 | /* Reclaim extents from extent status tree */ |
1324 | struct shrinker s_es_shrinker; | 1323 | struct shrinker s_es_shrinker; |
1325 | struct list_head s_es_lru; | 1324 | struct list_head s_es_list; |
1325 | long s_es_nr_inode; | ||
1326 | struct ext4_es_stats s_es_stats; | 1326 | struct ext4_es_stats s_es_stats; |
1327 | struct mb_cache *s_mb_cache; | 1327 | struct mb_cache *s_mb_cache; |
1328 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1328 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; |
1329 | 1329 | ||
1330 | /* Ratelimit ext4 messages. */ | 1330 | /* Ratelimit ext4 messages. */ |
1331 | struct ratelimit_state s_err_ratelimit_state; | 1331 | struct ratelimit_state s_err_ratelimit_state; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1ee24d74270f..e406f66a903f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4632,7 +4632,7 @@ out2: | |||
4632 | 4632 | ||
4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4634 | err ? err : allocated); | 4634 | err ? err : allocated); |
4635 | ext4_es_lru_add(inode); | 4635 | ext4_es_list_add(inode); |
4636 | return err ? err : allocated; | 4636 | return err ? err : allocated; |
4637 | } | 4637 | } |
4638 | 4638 | ||
@@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
5191 | error = ext4_fill_fiemap_extents(inode, start_blk, | 5191 | error = ext4_fill_fiemap_extents(inode, start_blk, |
5192 | len_blks, fieinfo); | 5192 | len_blks, fieinfo); |
5193 | } | 5193 | } |
5194 | ext4_es_lru_add(inode); | 5194 | ext4_es_list_add(inode); |
5195 | return error; | 5195 | return error; |
5196 | } | 5196 | } |
5197 | 5197 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 94e7855ae71b..0193ca107396 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -149,8 +149,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
149 | ext4_lblk_t end); | 149 | ext4_lblk_t end); |
150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
151 | int nr_to_scan); | 151 | int nr_to_scan); |
152 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 152 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
153 | struct ext4_inode_info *locked_ei); | 153 | struct ext4_inode_info *locked_ei); |
154 | 154 | ||
155 | int __init ext4_init_es(void) | 155 | int __init ext4_init_es(void) |
156 | { | 156 | { |
@@ -298,6 +298,36 @@ out: | |||
298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
299 | } | 299 | } |
300 | 300 | ||
301 | void ext4_es_list_add(struct inode *inode) | ||
302 | { | ||
303 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
304 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
305 | |||
306 | if (!list_empty(&ei->i_es_list)) | ||
307 | return; | ||
308 | |||
309 | spin_lock(&sbi->s_es_lock); | ||
310 | if (list_empty(&ei->i_es_list)) { | ||
311 | list_add_tail(&ei->i_es_list, &sbi->s_es_list); | ||
312 | sbi->s_es_nr_inode++; | ||
313 | } | ||
314 | spin_unlock(&sbi->s_es_lock); | ||
315 | } | ||
316 | |||
317 | void ext4_es_list_del(struct inode *inode) | ||
318 | { | ||
319 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
320 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
321 | |||
322 | spin_lock(&sbi->s_es_lock); | ||
323 | if (!list_empty(&ei->i_es_list)) { | ||
324 | list_del_init(&ei->i_es_list); | ||
325 | sbi->s_es_nr_inode--; | ||
326 | WARN_ON_ONCE(sbi->s_es_nr_inode < 0); | ||
327 | } | ||
328 | spin_unlock(&sbi->s_es_lock); | ||
329 | } | ||
330 | |||
301 | static struct extent_status * | 331 | static struct extent_status * |
302 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 332 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, |
303 | ext4_fsblk_t pblk) | 333 | ext4_fsblk_t pblk) |
@@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
314 | * We don't count delayed extent because we never try to reclaim them | 344 | * We don't count delayed extent because we never try to reclaim them |
315 | */ | 345 | */ |
316 | if (!ext4_es_is_delayed(es)) { | 346 | if (!ext4_es_is_delayed(es)) { |
317 | EXT4_I(inode)->i_es_lru_nr++; | 347 | EXT4_I(inode)->i_es_shk_nr++; |
318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> | 348 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
319 | s_es_stats.es_stats_lru_cnt); | 349 | s_es_stats.es_stats_shk_cnt); |
320 | } | 350 | } |
321 | 351 | ||
322 | EXT4_I(inode)->i_es_all_nr++; | 352 | EXT4_I(inode)->i_es_all_nr++; |
@@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
330 | EXT4_I(inode)->i_es_all_nr--; | 360 | EXT4_I(inode)->i_es_all_nr--; |
331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | 361 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); |
332 | 362 | ||
333 | /* Decrease the lru counter when this es is not delayed */ | 363 | /* Decrease the shrink counter when this es is not delayed */ |
334 | if (!ext4_es_is_delayed(es)) { | 364 | if (!ext4_es_is_delayed(es)) { |
335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 365 | BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); |
336 | EXT4_I(inode)->i_es_lru_nr--; | 366 | EXT4_I(inode)->i_es_shk_nr--; |
337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> | 367 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
338 | s_es_stats.es_stats_lru_cnt); | 368 | s_es_stats.es_stats_shk_cnt); |
339 | } | 369 | } |
340 | 370 | ||
341 | kmem_cache_free(ext4_es_cachep, es); | 371 | kmem_cache_free(ext4_es_cachep, es); |
@@ -683,8 +713,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
683 | goto error; | 713 | goto error; |
684 | retry: | 714 | retry: |
685 | err = __es_insert_extent(inode, &newes); | 715 | err = __es_insert_extent(inode, &newes); |
686 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 716 | if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), |
687 | EXT4_I(inode))) | 717 | 1, EXT4_I(inode))) |
688 | goto retry; | 718 | goto retry; |
689 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 719 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) |
690 | err = 0; | 720 | err = 0; |
@@ -841,8 +871,8 @@ retry: | |||
841 | es->es_lblk = orig_es.es_lblk; | 871 | es->es_lblk = orig_es.es_lblk; |
842 | es->es_len = orig_es.es_len; | 872 | es->es_len = orig_es.es_len; |
843 | if ((err == -ENOMEM) && | 873 | if ((err == -ENOMEM) && |
844 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 874 | __es_shrink(EXT4_SB(inode->i_sb), |
845 | EXT4_I(inode))) | 875 | 1, EXT4_I(inode))) |
846 | goto retry; | 876 | goto retry; |
847 | goto out; | 877 | goto out; |
848 | } | 878 | } |
@@ -914,6 +944,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
914 | end = lblk + len - 1; | 944 | end = lblk + len - 1; |
915 | BUG_ON(end < lblk); | 945 | BUG_ON(end < lblk); |
916 | 946 | ||
947 | /* | ||
948 | * ext4_clear_inode() depends on us taking i_es_lock unconditionally | ||
949 | * so that we are sure __es_shrink() is done with the inode before it | ||
950 | * is reclaimed. | ||
951 | */ | ||
917 | write_lock(&EXT4_I(inode)->i_es_lock); | 952 | write_lock(&EXT4_I(inode)->i_es_lock); |
918 | err = __es_remove_extent(inode, lblk, end); | 953 | err = __es_remove_extent(inode, lblk, end); |
919 | write_unlock(&EXT4_I(inode)->i_es_lock); | 954 | write_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -921,114 +956,80 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
921 | return err; | 956 | return err; |
922 | } | 957 | } |
923 | 958 | ||
924 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 959 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
925 | struct list_head *b) | 960 | struct ext4_inode_info *locked_ei) |
926 | { | ||
927 | struct ext4_inode_info *eia, *eib; | ||
928 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
929 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
930 | |||
931 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
932 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
933 | return 1; | ||
934 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
935 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
936 | return -1; | ||
937 | if (eia->i_touch_when == eib->i_touch_when) | ||
938 | return 0; | ||
939 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
940 | return 1; | ||
941 | else | ||
942 | return -1; | ||
943 | } | ||
944 | |||
945 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
946 | struct ext4_inode_info *locked_ei) | ||
947 | { | 961 | { |
948 | struct ext4_inode_info *ei; | 962 | struct ext4_inode_info *ei; |
949 | struct ext4_es_stats *es_stats; | 963 | struct ext4_es_stats *es_stats; |
950 | struct list_head *cur, *tmp; | ||
951 | LIST_HEAD(skipped); | ||
952 | ktime_t start_time; | 964 | ktime_t start_time; |
953 | u64 scan_time; | 965 | u64 scan_time; |
966 | int nr_to_walk; | ||
954 | int nr_shrunk = 0; | 967 | int nr_shrunk = 0; |
955 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 968 | int retried = 0, nr_skipped = 0; |
956 | 969 | ||
957 | es_stats = &sbi->s_es_stats; | 970 | es_stats = &sbi->s_es_stats; |
958 | start_time = ktime_get(); | 971 | start_time = ktime_get(); |
959 | spin_lock(&sbi->s_es_lru_lock); | ||
960 | 972 | ||
961 | retry: | 973 | retry: |
962 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 974 | spin_lock(&sbi->s_es_lock); |
975 | nr_to_walk = sbi->s_es_nr_inode; | ||
976 | while (nr_to_walk-- > 0) { | ||
963 | int shrunk; | 977 | int shrunk; |
964 | 978 | ||
965 | /* | 979 | if (list_empty(&sbi->s_es_list)) { |
966 | * If we have already reclaimed all extents from extent | 980 | spin_unlock(&sbi->s_es_lock); |
967 | * status tree, just stop the loop immediately. | 981 | goto out; |
968 | */ | 982 | } |
969 | if (percpu_counter_read_positive( | 983 | ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, |
970 | &es_stats->es_stats_lru_cnt) == 0) | 984 | i_es_list); |
971 | break; | 985 | /* Move the inode to the tail */ |
972 | 986 | list_move(&ei->i_es_list, sbi->s_es_list.prev); | |
973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
974 | 987 | ||
975 | /* | 988 | /* |
976 | * Skip the inode that is newer than the last_sorted | 989 | * Normally we try hard to avoid shrinking precached inodes, |
977 | * time. Normally we try hard to avoid shrinking | 990 | * but we will as a last resort. |
978 | * precached inodes, but we will as a last resort. | ||
979 | */ | 991 | */ |
980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || | 992 | if (!retried && ext4_test_inode_state(&ei->vfs_inode, |
981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 993 | EXT4_STATE_EXT_PRECACHED)) { |
982 | EXT4_STATE_EXT_PRECACHED))) { | ||
983 | nr_skipped++; | 994 | nr_skipped++; |
984 | list_move_tail(cur, &skipped); | ||
985 | continue; | 995 | continue; |
986 | } | 996 | } |
987 | 997 | ||
988 | if (ei->i_es_lru_nr == 0 || ei == locked_ei || | 998 | if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { |
989 | !write_trylock(&ei->i_es_lock)) | 999 | nr_skipped++; |
990 | continue; | 1000 | continue; |
1001 | } | ||
1002 | /* | ||
1003 | * Now we hold i_es_lock which protects us from inode reclaim | ||
1004 | * freeing inode under us | ||
1005 | */ | ||
1006 | spin_unlock(&sbi->s_es_lock); | ||
991 | 1007 | ||
992 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1008 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); |
993 | if (ei->i_es_lru_nr == 0) | ||
994 | list_del_init(&ei->i_es_lru); | ||
995 | write_unlock(&ei->i_es_lock); | 1009 | write_unlock(&ei->i_es_lock); |
996 | 1010 | ||
997 | nr_shrunk += shrunk; | 1011 | nr_shrunk += shrunk; |
998 | nr_to_scan -= shrunk; | 1012 | nr_to_scan -= shrunk; |
1013 | |||
999 | if (nr_to_scan == 0) | 1014 | if (nr_to_scan == 0) |
1000 | break; | 1015 | goto out; |
1016 | spin_lock(&sbi->s_es_lock); | ||
1001 | } | 1017 | } |
1002 | 1018 | spin_unlock(&sbi->s_es_lock); | |
1003 | /* Move the newer inodes into the tail of the LRU list. */ | ||
1004 | list_splice_tail(&skipped, &sbi->s_es_lru); | ||
1005 | INIT_LIST_HEAD(&skipped); | ||
1006 | 1019 | ||
1007 | /* | 1020 | /* |
1008 | * If we skipped any inodes, and we weren't able to make any | 1021 | * If we skipped any inodes, and we weren't able to make any |
1009 | * forward progress, sort the list and try again. | 1022 | * forward progress, try again to scan precached inodes. |
1010 | */ | 1023 | */ |
1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1024 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
1012 | retried++; | 1025 | retried++; |
1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
1014 | es_stats->es_stats_last_sorted = jiffies; | ||
1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
1016 | i_es_lru); | ||
1017 | /* | ||
1018 | * If there are no non-precached inodes left on the | ||
1019 | * list, start releasing precached extents. | ||
1020 | */ | ||
1021 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
1022 | EXT4_STATE_EXT_PRECACHED)) | ||
1023 | skip_precached = 0; | ||
1024 | goto retry; | 1026 | goto retry; |
1025 | } | 1027 | } |
1026 | 1028 | ||
1027 | spin_unlock(&sbi->s_es_lru_lock); | ||
1028 | |||
1029 | if (locked_ei && nr_shrunk == 0) | 1029 | if (locked_ei && nr_shrunk == 0) |
1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
1031 | 1031 | ||
1032 | out: | ||
1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | 1033 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
1033 | if (likely(es_stats->es_stats_scan_time)) | 1034 | if (likely(es_stats->es_stats_scan_time)) |
1034 | es_stats->es_stats_scan_time = (scan_time + | 1035 | es_stats->es_stats_scan_time = (scan_time + |
@@ -1043,7 +1044,7 @@ retry: | |||
1043 | else | 1044 | else |
1044 | es_stats->es_stats_shrunk = nr_shrunk; | 1045 | es_stats->es_stats_shrunk = nr_shrunk; |
1045 | 1046 | ||
1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | 1047 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, |
1047 | nr_skipped, retried); | 1048 | nr_skipped, retried); |
1048 | return nr_shrunk; | 1049 | return nr_shrunk; |
1049 | } | 1050 | } |
@@ -1055,7 +1056,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
1055 | struct ext4_sb_info *sbi; | 1056 | struct ext4_sb_info *sbi; |
1056 | 1057 | ||
1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1058 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1059 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); | 1060 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
1060 | return nr; | 1061 | return nr; |
1061 | } | 1062 | } |
@@ -1068,13 +1069,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
1068 | int nr_to_scan = sc->nr_to_scan; | 1069 | int nr_to_scan = sc->nr_to_scan; |
1069 | int ret, nr_shrunk; | 1070 | int ret, nr_shrunk; |
1070 | 1071 | ||
1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1072 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); | 1073 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
1073 | 1074 | ||
1074 | if (!nr_to_scan) | 1075 | if (!nr_to_scan) |
1075 | return ret; | 1076 | return ret; |
1076 | 1077 | ||
1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1078 | nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); |
1078 | 1079 | ||
1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); | 1080 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
1080 | return nr_shrunk; | 1081 | return nr_shrunk; |
@@ -1102,28 +1103,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1102 | return 0; | 1103 | return 0; |
1103 | 1104 | ||
1104 | /* here we just find an inode that has the max nr. of objects */ | 1105 | /* here we just find an inode that has the max nr. of objects */ |
1105 | spin_lock(&sbi->s_es_lru_lock); | 1106 | spin_lock(&sbi->s_es_lock); |
1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | 1107 | list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { |
1107 | inode_cnt++; | 1108 | inode_cnt++; |
1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | 1109 | if (max && max->i_es_all_nr < ei->i_es_all_nr) |
1109 | max = ei; | 1110 | max = ei; |
1110 | else if (!max) | 1111 | else if (!max) |
1111 | max = ei; | 1112 | max = ei; |
1112 | } | 1113 | } |
1113 | spin_unlock(&sbi->s_es_lru_lock); | 1114 | spin_unlock(&sbi->s_es_lock); |
1114 | 1115 | ||
1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1116 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1117 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | 1118 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1119 | seq_printf(seq, " %lu/%lu cache hits/misses\n", |
1119 | es_stats->es_stats_cache_hits, | 1120 | es_stats->es_stats_cache_hits, |
1120 | es_stats->es_stats_cache_misses); | 1121 | es_stats->es_stats_cache_misses); |
1121 | if (es_stats->es_stats_last_sorted != 0) | ||
1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
1123 | jiffies_to_msecs(jiffies - | ||
1124 | es_stats->es_stats_last_sorted)); | ||
1125 | if (inode_cnt) | 1122 | if (inode_cnt) |
1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | 1123 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
1127 | 1124 | ||
1128 | seq_printf(seq, "average:\n %llu us scan time\n", | 1125 | seq_printf(seq, "average:\n %llu us scan time\n", |
1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | 1126 | div_u64(es_stats->es_stats_scan_time, 1000)); |
@@ -1132,7 +1129,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1132 | seq_printf(seq, | 1129 | seq_printf(seq, |
1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | 1130 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" |
1134 | " %llu us max scan time\n", | 1131 | " %llu us max scan time\n", |
1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | 1132 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, |
1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | 1133 | div_u64(es_stats->es_stats_max_scan_time, 1000)); |
1137 | 1134 | ||
1138 | return 0; | 1135 | return 0; |
@@ -1181,9 +1178,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1181 | { | 1178 | { |
1182 | int err; | 1179 | int err; |
1183 | 1180 | ||
1184 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1181 | INIT_LIST_HEAD(&sbi->s_es_list); |
1185 | spin_lock_init(&sbi->s_es_lru_lock); | 1182 | sbi->s_es_nr_inode = 0; |
1186 | sbi->s_es_stats.es_stats_last_sorted = 0; | 1183 | spin_lock_init(&sbi->s_es_lock); |
1187 | sbi->s_es_stats.es_stats_shrunk = 0; | 1184 | sbi->s_es_stats.es_stats_shrunk = 0; |
1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1185 | sbi->s_es_stats.es_stats_cache_hits = 0; |
1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1186 | sbi->s_es_stats.es_stats_cache_misses = 0; |
@@ -1192,7 +1189,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1189 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
1193 | if (err) | 1190 | if (err) |
1194 | return err; | 1191 | return err; |
1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | 1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
1196 | if (err) | 1193 | if (err) |
1197 | goto err1; | 1194 | goto err1; |
1198 | 1195 | ||
@@ -1210,7 +1207,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1210 | return 0; | 1207 | return 0; |
1211 | 1208 | ||
1212 | err2: | 1209 | err2: |
1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1210 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1214 | err1: | 1211 | err1: |
1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1212 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1216 | return err; | 1213 | return err; |
@@ -1221,37 +1218,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | |||
1221 | if (sbi->s_proc) | 1218 | if (sbi->s_proc) |
1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | 1219 | remove_proc_entry("es_shrinker_info", sbi->s_proc); |
1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1220 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1221 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1225 | unregister_shrinker(&sbi->s_es_shrinker); | 1222 | unregister_shrinker(&sbi->s_es_shrinker); |
1226 | } | 1223 | } |
1227 | 1224 | ||
1228 | void ext4_es_lru_add(struct inode *inode) | ||
1229 | { | ||
1230 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1231 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1232 | |||
1233 | ei->i_touch_when = jiffies; | ||
1234 | |||
1235 | if (!list_empty(&ei->i_es_lru)) | ||
1236 | return; | ||
1237 | |||
1238 | spin_lock(&sbi->s_es_lru_lock); | ||
1239 | if (list_empty(&ei->i_es_lru)) | ||
1240 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
1241 | spin_unlock(&sbi->s_es_lru_lock); | ||
1242 | } | ||
1243 | |||
1244 | void ext4_es_lru_del(struct inode *inode) | ||
1245 | { | ||
1246 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
1247 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1248 | |||
1249 | spin_lock(&sbi->s_es_lru_lock); | ||
1250 | if (!list_empty(&ei->i_es_lru)) | ||
1251 | list_del_init(&ei->i_es_lru); | ||
1252 | spin_unlock(&sbi->s_es_lru_lock); | ||
1253 | } | ||
1254 | |||
1255 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 1225 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
1256 | int nr_to_scan) | 1226 | int nr_to_scan) |
1257 | { | 1227 | { |
@@ -1263,7 +1233,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | |||
1263 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1233 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
1264 | DEFAULT_RATELIMIT_BURST); | 1234 | DEFAULT_RATELIMIT_BURST); |
1265 | 1235 | ||
1266 | if (ei->i_es_lru_nr == 0) | 1236 | if (ei->i_es_shk_nr == 0) |
1267 | return 0; | 1237 | return 0; |
1268 | 1238 | ||
1269 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 1239 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index efd5f970b501..0e6a33e81e5f 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -65,14 +65,13 @@ struct ext4_es_tree { | |||
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct ext4_es_stats { | 67 | struct ext4_es_stats { |
68 | unsigned long es_stats_last_sorted; | ||
69 | unsigned long es_stats_shrunk; | 68 | unsigned long es_stats_shrunk; |
70 | unsigned long es_stats_cache_hits; | 69 | unsigned long es_stats_cache_hits; |
71 | unsigned long es_stats_cache_misses; | 70 | unsigned long es_stats_cache_misses; |
72 | u64 es_stats_scan_time; | 71 | u64 es_stats_scan_time; |
73 | u64 es_stats_max_scan_time; | 72 | u64 es_stats_max_scan_time; |
74 | struct percpu_counter es_stats_all_cnt; | 73 | struct percpu_counter es_stats_all_cnt; |
75 | struct percpu_counter es_stats_lru_cnt; | 74 | struct percpu_counter es_stats_shk_cnt; |
76 | }; | 75 | }; |
77 | 76 | ||
78 | extern int __init ext4_init_es(void); | 77 | extern int __init ext4_init_es(void); |
@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, | |||
151 | 150 | ||
152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 151 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 152 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
154 | extern void ext4_es_lru_add(struct inode *inode); | 153 | extern void ext4_es_list_add(struct inode *inode); |
155 | extern void ext4_es_lru_del(struct inode *inode); | 154 | extern void ext4_es_list_del(struct inode *inode); |
156 | 155 | ||
157 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 156 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d5a46a8df70b..540b0b0481a5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
486 | 486 | ||
487 | /* Lookup extent status tree firstly */ | 487 | /* Lookup extent status tree firstly */ |
488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
489 | ext4_es_lru_add(inode); | 489 | ext4_es_list_add(inode); |
490 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 490 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
491 | map->m_pblk = ext4_es_pblock(&es) + | 491 | map->m_pblk = ext4_es_pblock(&es) + |
492 | map->m_lblk - es.es_lblk; | 492 | map->m_lblk - es.es_lblk; |
@@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1388 | 1388 | ||
1389 | /* Lookup extent status tree firstly */ | 1389 | /* Lookup extent status tree firstly */ |
1390 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1390 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1391 | ext4_es_lru_add(inode); | 1391 | ext4_es_list_add(inode); |
1392 | if (ext4_es_is_hole(&es)) { | 1392 | if (ext4_es_is_hole(&es)) { |
1393 | retval = 0; | 1393 | retval = 0; |
1394 | down_read(&EXT4_I(inode)->i_data_sem); | 1394 | down_read(&EXT4_I(inode)->i_data_sem); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bfda18a15592..7b377c41dd81 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); | 79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); | 80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
81 | ext4_es_lru_del(inode1); | 81 | ext4_es_list_del(inode1); |
82 | ext4_es_lru_del(inode2); | 82 | ext4_es_list_del(inode2); |
83 | 83 | ||
84 | isize = i_size_read(inode1); | 84 | isize = i_size_read(inode1); |
85 | i_size_write(inode1, i_size_read(inode2)); | 85 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4b79f39ebf66..32df08e99ca9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
871 | spin_lock_init(&ei->i_prealloc_lock); | 871 | spin_lock_init(&ei->i_prealloc_lock); |
872 | ext4_es_init_tree(&ei->i_es_tree); | 872 | ext4_es_init_tree(&ei->i_es_tree); |
873 | rwlock_init(&ei->i_es_lock); | 873 | rwlock_init(&ei->i_es_lock); |
874 | INIT_LIST_HEAD(&ei->i_es_lru); | 874 | INIT_LIST_HEAD(&ei->i_es_list); |
875 | ei->i_es_all_nr = 0; | 875 | ei->i_es_all_nr = 0; |
876 | ei->i_es_lru_nr = 0; | 876 | ei->i_es_shk_nr = 0; |
877 | ei->i_touch_when = 0; | ||
878 | ei->i_reserved_data_blocks = 0; | 877 | ei->i_reserved_data_blocks = 0; |
879 | ei->i_reserved_meta_blocks = 0; | 878 | ei->i_reserved_meta_blocks = 0; |
880 | ei->i_allocated_meta_blocks = 0; | 879 | ei->i_allocated_meta_blocks = 0; |
@@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode) | |||
963 | dquot_drop(inode); | 962 | dquot_drop(inode); |
964 | ext4_discard_preallocations(inode); | 963 | ext4_discard_preallocations(inode); |
965 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 964 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
966 | ext4_es_lru_del(inode); | 965 | ext4_es_list_del(inode); |
967 | if (EXT4_I(inode)->jinode) { | 966 | if (EXT4_I(inode)->jinode) { |
968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 967 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
969 | EXT4_I(inode)->jinode); | 968 | EXT4_I(inode)->jinode); |