diff options
| -rw-r--r-- | fs/ext4/ext4.h | 10 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 4 | ||||
| -rw-r--r-- | fs/ext4/extents_status.c | 224 | ||||
| -rw-r--r-- | fs/ext4/extents_status.h | 7 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 4 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
| -rw-r--r-- | fs/ext4/super.c | 7 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 11 |
8 files changed, 118 insertions, 153 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 98da4cda9d18..ab6caf55f5bf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -878,10 +878,9 @@ struct ext4_inode_info { | |||
| 878 | /* extents status tree */ | 878 | /* extents status tree */ |
| 879 | struct ext4_es_tree i_es_tree; | 879 | struct ext4_es_tree i_es_tree; |
| 880 | rwlock_t i_es_lock; | 880 | rwlock_t i_es_lock; |
| 881 | struct list_head i_es_lru; | 881 | struct list_head i_es_list; |
| 882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | 882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ |
| 883 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 883 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ |
| 884 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
| 885 | 884 | ||
| 886 | /* ialloc */ | 885 | /* ialloc */ |
| 887 | ext4_group_t i_last_alloc_group; | 886 | ext4_group_t i_last_alloc_group; |
| @@ -1322,10 +1321,11 @@ struct ext4_sb_info { | |||
| 1322 | 1321 | ||
| 1323 | /* Reclaim extents from extent status tree */ | 1322 | /* Reclaim extents from extent status tree */ |
| 1324 | struct shrinker s_es_shrinker; | 1323 | struct shrinker s_es_shrinker; |
| 1325 | struct list_head s_es_lru; | 1324 | struct list_head s_es_list; |
| 1325 | long s_es_nr_inode; | ||
| 1326 | struct ext4_es_stats s_es_stats; | 1326 | struct ext4_es_stats s_es_stats; |
| 1327 | struct mb_cache *s_mb_cache; | 1327 | struct mb_cache *s_mb_cache; |
| 1328 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1328 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; |
| 1329 | 1329 | ||
| 1330 | /* Ratelimit ext4 messages. */ | 1330 | /* Ratelimit ext4 messages. */ |
| 1331 | struct ratelimit_state s_err_ratelimit_state; | 1331 | struct ratelimit_state s_err_ratelimit_state; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1ee24d74270f..e406f66a903f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -4632,7 +4632,7 @@ out2: | |||
| 4632 | 4632 | ||
| 4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4633 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
| 4634 | err ? err : allocated); | 4634 | err ? err : allocated); |
| 4635 | ext4_es_lru_add(inode); | 4635 | ext4_es_list_add(inode); |
| 4636 | return err ? err : allocated; | 4636 | return err ? err : allocated; |
| 4637 | } | 4637 | } |
| 4638 | 4638 | ||
| @@ -5191,7 +5191,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
| 5191 | error = ext4_fill_fiemap_extents(inode, start_blk, | 5191 | error = ext4_fill_fiemap_extents(inode, start_blk, |
| 5192 | len_blks, fieinfo); | 5192 | len_blks, fieinfo); |
| 5193 | } | 5193 | } |
| 5194 | ext4_es_lru_add(inode); | 5194 | ext4_es_list_add(inode); |
| 5195 | return error; | 5195 | return error; |
| 5196 | } | 5196 | } |
| 5197 | 5197 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 94e7855ae71b..0193ca107396 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -149,8 +149,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 149 | ext4_lblk_t end); | 149 | ext4_lblk_t end); |
| 150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
| 151 | int nr_to_scan); | 151 | int nr_to_scan); |
| 152 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 152 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
| 153 | struct ext4_inode_info *locked_ei); | 153 | struct ext4_inode_info *locked_ei); |
| 154 | 154 | ||
| 155 | int __init ext4_init_es(void) | 155 | int __init ext4_init_es(void) |
| 156 | { | 156 | { |
| @@ -298,6 +298,36 @@ out: | |||
| 298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
| 299 | } | 299 | } |
| 300 | 300 | ||
| 301 | void ext4_es_list_add(struct inode *inode) | ||
| 302 | { | ||
| 303 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 304 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 305 | |||
| 306 | if (!list_empty(&ei->i_es_list)) | ||
| 307 | return; | ||
| 308 | |||
| 309 | spin_lock(&sbi->s_es_lock); | ||
| 310 | if (list_empty(&ei->i_es_list)) { | ||
| 311 | list_add_tail(&ei->i_es_list, &sbi->s_es_list); | ||
| 312 | sbi->s_es_nr_inode++; | ||
| 313 | } | ||
| 314 | spin_unlock(&sbi->s_es_lock); | ||
| 315 | } | ||
| 316 | |||
| 317 | void ext4_es_list_del(struct inode *inode) | ||
| 318 | { | ||
| 319 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 320 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 321 | |||
| 322 | spin_lock(&sbi->s_es_lock); | ||
| 323 | if (!list_empty(&ei->i_es_list)) { | ||
| 324 | list_del_init(&ei->i_es_list); | ||
| 325 | sbi->s_es_nr_inode--; | ||
| 326 | WARN_ON_ONCE(sbi->s_es_nr_inode < 0); | ||
| 327 | } | ||
| 328 | spin_unlock(&sbi->s_es_lock); | ||
| 329 | } | ||
| 330 | |||
| 301 | static struct extent_status * | 331 | static struct extent_status * |
| 302 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 332 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, |
| 303 | ext4_fsblk_t pblk) | 333 | ext4_fsblk_t pblk) |
| @@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
| 314 | * We don't count delayed extent because we never try to reclaim them | 344 | * We don't count delayed extent because we never try to reclaim them |
| 315 | */ | 345 | */ |
| 316 | if (!ext4_es_is_delayed(es)) { | 346 | if (!ext4_es_is_delayed(es)) { |
| 317 | EXT4_I(inode)->i_es_lru_nr++; | 347 | EXT4_I(inode)->i_es_shk_nr++; |
| 318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> | 348 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
| 319 | s_es_stats.es_stats_lru_cnt); | 349 | s_es_stats.es_stats_shk_cnt); |
| 320 | } | 350 | } |
| 321 | 351 | ||
| 322 | EXT4_I(inode)->i_es_all_nr++; | 352 | EXT4_I(inode)->i_es_all_nr++; |
| @@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
| 330 | EXT4_I(inode)->i_es_all_nr--; | 360 | EXT4_I(inode)->i_es_all_nr--; |
| 331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | 361 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); |
| 332 | 362 | ||
| 333 | /* Decrease the lru counter when this es is not delayed */ | 363 | /* Decrease the shrink counter when this es is not delayed */ |
| 334 | if (!ext4_es_is_delayed(es)) { | 364 | if (!ext4_es_is_delayed(es)) { |
| 335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 365 | BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); |
| 336 | EXT4_I(inode)->i_es_lru_nr--; | 366 | EXT4_I(inode)->i_es_shk_nr--; |
| 337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> | 367 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
| 338 | s_es_stats.es_stats_lru_cnt); | 368 | s_es_stats.es_stats_shk_cnt); |
| 339 | } | 369 | } |
| 340 | 370 | ||
| 341 | kmem_cache_free(ext4_es_cachep, es); | 371 | kmem_cache_free(ext4_es_cachep, es); |
| @@ -683,8 +713,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 683 | goto error; | 713 | goto error; |
| 684 | retry: | 714 | retry: |
| 685 | err = __es_insert_extent(inode, &newes); | 715 | err = __es_insert_extent(inode, &newes); |
| 686 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 716 | if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), |
| 687 | EXT4_I(inode))) | 717 | 1, EXT4_I(inode))) |
| 688 | goto retry; | 718 | goto retry; |
| 689 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 719 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) |
| 690 | err = 0; | 720 | err = 0; |
| @@ -841,8 +871,8 @@ retry: | |||
| 841 | es->es_lblk = orig_es.es_lblk; | 871 | es->es_lblk = orig_es.es_lblk; |
| 842 | es->es_len = orig_es.es_len; | 872 | es->es_len = orig_es.es_len; |
| 843 | if ((err == -ENOMEM) && | 873 | if ((err == -ENOMEM) && |
| 844 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 874 | __es_shrink(EXT4_SB(inode->i_sb), |
| 845 | EXT4_I(inode))) | 875 | 1, EXT4_I(inode))) |
| 846 | goto retry; | 876 | goto retry; |
| 847 | goto out; | 877 | goto out; |
| 848 | } | 878 | } |
| @@ -914,6 +944,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 914 | end = lblk + len - 1; | 944 | end = lblk + len - 1; |
| 915 | BUG_ON(end < lblk); | 945 | BUG_ON(end < lblk); |
| 916 | 946 | ||
| 947 | /* | ||
| 948 | * ext4_clear_inode() depends on us taking i_es_lock unconditionally | ||
| 949 | * so that we are sure __es_shrink() is done with the inode before it | ||
| 950 | * is reclaimed. | ||
| 951 | */ | ||
| 917 | write_lock(&EXT4_I(inode)->i_es_lock); | 952 | write_lock(&EXT4_I(inode)->i_es_lock); |
| 918 | err = __es_remove_extent(inode, lblk, end); | 953 | err = __es_remove_extent(inode, lblk, end); |
| 919 | write_unlock(&EXT4_I(inode)->i_es_lock); | 954 | write_unlock(&EXT4_I(inode)->i_es_lock); |
| @@ -921,114 +956,80 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 921 | return err; | 956 | return err; |
| 922 | } | 957 | } |
| 923 | 958 | ||
| 924 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 959 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
| 925 | struct list_head *b) | 960 | struct ext4_inode_info *locked_ei) |
| 926 | { | ||
| 927 | struct ext4_inode_info *eia, *eib; | ||
| 928 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
| 929 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
| 930 | |||
| 931 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
| 932 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
| 933 | return 1; | ||
| 934 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
| 935 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
| 936 | return -1; | ||
| 937 | if (eia->i_touch_when == eib->i_touch_when) | ||
| 938 | return 0; | ||
| 939 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
| 940 | return 1; | ||
| 941 | else | ||
| 942 | return -1; | ||
| 943 | } | ||
| 944 | |||
| 945 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
| 946 | struct ext4_inode_info *locked_ei) | ||
| 947 | { | 961 | { |
| 948 | struct ext4_inode_info *ei; | 962 | struct ext4_inode_info *ei; |
| 949 | struct ext4_es_stats *es_stats; | 963 | struct ext4_es_stats *es_stats; |
| 950 | struct list_head *cur, *tmp; | ||
| 951 | LIST_HEAD(skipped); | ||
| 952 | ktime_t start_time; | 964 | ktime_t start_time; |
| 953 | u64 scan_time; | 965 | u64 scan_time; |
| 966 | int nr_to_walk; | ||
| 954 | int nr_shrunk = 0; | 967 | int nr_shrunk = 0; |
| 955 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 968 | int retried = 0, nr_skipped = 0; |
| 956 | 969 | ||
| 957 | es_stats = &sbi->s_es_stats; | 970 | es_stats = &sbi->s_es_stats; |
| 958 | start_time = ktime_get(); | 971 | start_time = ktime_get(); |
| 959 | spin_lock(&sbi->s_es_lru_lock); | ||
| 960 | 972 | ||
| 961 | retry: | 973 | retry: |
| 962 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 974 | spin_lock(&sbi->s_es_lock); |
| 975 | nr_to_walk = sbi->s_es_nr_inode; | ||
| 976 | while (nr_to_walk-- > 0) { | ||
| 963 | int shrunk; | 977 | int shrunk; |
| 964 | 978 | ||
| 965 | /* | 979 | if (list_empty(&sbi->s_es_list)) { |
| 966 | * If we have already reclaimed all extents from extent | 980 | spin_unlock(&sbi->s_es_lock); |
| 967 | * status tree, just stop the loop immediately. | 981 | goto out; |
| 968 | */ | 982 | } |
| 969 | if (percpu_counter_read_positive( | 983 | ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, |
| 970 | &es_stats->es_stats_lru_cnt) == 0) | 984 | i_es_list); |
| 971 | break; | 985 | /* Move the inode to the tail */ |
| 972 | 986 | list_move(&ei->i_es_list, sbi->s_es_list.prev); | |
| 973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
| 974 | 987 | ||
| 975 | /* | 988 | /* |
| 976 | * Skip the inode that is newer than the last_sorted | 989 | * Normally we try hard to avoid shrinking precached inodes, |
| 977 | * time. Normally we try hard to avoid shrinking | 990 | * but we will as a last resort. |
| 978 | * precached inodes, but we will as a last resort. | ||
| 979 | */ | 991 | */ |
| 980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || | 992 | if (!retried && ext4_test_inode_state(&ei->vfs_inode, |
| 981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 993 | EXT4_STATE_EXT_PRECACHED)) { |
| 982 | EXT4_STATE_EXT_PRECACHED))) { | ||
| 983 | nr_skipped++; | 994 | nr_skipped++; |
| 984 | list_move_tail(cur, &skipped); | ||
| 985 | continue; | 995 | continue; |
| 986 | } | 996 | } |
| 987 | 997 | ||
| 988 | if (ei->i_es_lru_nr == 0 || ei == locked_ei || | 998 | if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { |
| 989 | !write_trylock(&ei->i_es_lock)) | 999 | nr_skipped++; |
| 990 | continue; | 1000 | continue; |
| 1001 | } | ||
| 1002 | /* | ||
| 1003 | * Now we hold i_es_lock which protects us from inode reclaim | ||
| 1004 | * freeing inode under us | ||
| 1005 | */ | ||
| 1006 | spin_unlock(&sbi->s_es_lock); | ||
| 991 | 1007 | ||
| 992 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1008 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); |
| 993 | if (ei->i_es_lru_nr == 0) | ||
| 994 | list_del_init(&ei->i_es_lru); | ||
| 995 | write_unlock(&ei->i_es_lock); | 1009 | write_unlock(&ei->i_es_lock); |
| 996 | 1010 | ||
| 997 | nr_shrunk += shrunk; | 1011 | nr_shrunk += shrunk; |
| 998 | nr_to_scan -= shrunk; | 1012 | nr_to_scan -= shrunk; |
| 1013 | |||
| 999 | if (nr_to_scan == 0) | 1014 | if (nr_to_scan == 0) |
| 1000 | break; | 1015 | goto out; |
| 1016 | spin_lock(&sbi->s_es_lock); | ||
| 1001 | } | 1017 | } |
| 1002 | 1018 | spin_unlock(&sbi->s_es_lock); | |
| 1003 | /* Move the newer inodes into the tail of the LRU list. */ | ||
| 1004 | list_splice_tail(&skipped, &sbi->s_es_lru); | ||
| 1005 | INIT_LIST_HEAD(&skipped); | ||
| 1006 | 1019 | ||
| 1007 | /* | 1020 | /* |
| 1008 | * If we skipped any inodes, and we weren't able to make any | 1021 | * If we skipped any inodes, and we weren't able to make any |
| 1009 | * forward progress, sort the list and try again. | 1022 | * forward progress, try again to scan precached inodes. |
| 1010 | */ | 1023 | */ |
| 1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1024 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
| 1012 | retried++; | 1025 | retried++; |
| 1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
| 1014 | es_stats->es_stats_last_sorted = jiffies; | ||
| 1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
| 1016 | i_es_lru); | ||
| 1017 | /* | ||
| 1018 | * If there are no non-precached inodes left on the | ||
| 1019 | * list, start releasing precached extents. | ||
| 1020 | */ | ||
| 1021 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
| 1022 | EXT4_STATE_EXT_PRECACHED)) | ||
| 1023 | skip_precached = 0; | ||
| 1024 | goto retry; | 1026 | goto retry; |
| 1025 | } | 1027 | } |
| 1026 | 1028 | ||
| 1027 | spin_unlock(&sbi->s_es_lru_lock); | ||
| 1028 | |||
| 1029 | if (locked_ei && nr_shrunk == 0) | 1029 | if (locked_ei && nr_shrunk == 0) |
| 1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
| 1031 | 1031 | ||
| 1032 | out: | ||
| 1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | 1033 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
| 1033 | if (likely(es_stats->es_stats_scan_time)) | 1034 | if (likely(es_stats->es_stats_scan_time)) |
| 1034 | es_stats->es_stats_scan_time = (scan_time + | 1035 | es_stats->es_stats_scan_time = (scan_time + |
| @@ -1043,7 +1044,7 @@ retry: | |||
| 1043 | else | 1044 | else |
| 1044 | es_stats->es_stats_shrunk = nr_shrunk; | 1045 | es_stats->es_stats_shrunk = nr_shrunk; |
| 1045 | 1046 | ||
| 1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | 1047 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, |
| 1047 | nr_skipped, retried); | 1048 | nr_skipped, retried); |
| 1048 | return nr_shrunk; | 1049 | return nr_shrunk; |
| 1049 | } | 1050 | } |
| @@ -1055,7 +1056,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
| 1055 | struct ext4_sb_info *sbi; | 1056 | struct ext4_sb_info *sbi; |
| 1056 | 1057 | ||
| 1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1058 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
| 1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1059 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
| 1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); | 1060 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
| 1060 | return nr; | 1061 | return nr; |
| 1061 | } | 1062 | } |
| @@ -1068,13 +1069,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
| 1068 | int nr_to_scan = sc->nr_to_scan; | 1069 | int nr_to_scan = sc->nr_to_scan; |
| 1069 | int ret, nr_shrunk; | 1070 | int ret, nr_shrunk; |
| 1070 | 1071 | ||
| 1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1072 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
| 1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); | 1073 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
| 1073 | 1074 | ||
| 1074 | if (!nr_to_scan) | 1075 | if (!nr_to_scan) |
| 1075 | return ret; | 1076 | return ret; |
| 1076 | 1077 | ||
| 1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1078 | nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); |
| 1078 | 1079 | ||
| 1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); | 1080 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
| 1080 | return nr_shrunk; | 1081 | return nr_shrunk; |
| @@ -1102,28 +1103,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
| 1102 | return 0; | 1103 | return 0; |
| 1103 | 1104 | ||
| 1104 | /* here we just find an inode that has the max nr. of objects */ | 1105 | /* here we just find an inode that has the max nr. of objects */ |
| 1105 | spin_lock(&sbi->s_es_lru_lock); | 1106 | spin_lock(&sbi->s_es_lock); |
| 1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | 1107 | list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { |
| 1107 | inode_cnt++; | 1108 | inode_cnt++; |
| 1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | 1109 | if (max && max->i_es_all_nr < ei->i_es_all_nr) |
| 1109 | max = ei; | 1110 | max = ei; |
| 1110 | else if (!max) | 1111 | else if (!max) |
| 1111 | max = ei; | 1112 | max = ei; |
| 1112 | } | 1113 | } |
| 1113 | spin_unlock(&sbi->s_es_lru_lock); | 1114 | spin_unlock(&sbi->s_es_lock); |
| 1114 | 1115 | ||
| 1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1116 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
| 1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1117 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
| 1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | 1118 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
| 1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1119 | seq_printf(seq, " %lu/%lu cache hits/misses\n", |
| 1119 | es_stats->es_stats_cache_hits, | 1120 | es_stats->es_stats_cache_hits, |
| 1120 | es_stats->es_stats_cache_misses); | 1121 | es_stats->es_stats_cache_misses); |
| 1121 | if (es_stats->es_stats_last_sorted != 0) | ||
| 1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
| 1123 | jiffies_to_msecs(jiffies - | ||
| 1124 | es_stats->es_stats_last_sorted)); | ||
| 1125 | if (inode_cnt) | 1122 | if (inode_cnt) |
| 1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | 1123 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
| 1127 | 1124 | ||
| 1128 | seq_printf(seq, "average:\n %llu us scan time\n", | 1125 | seq_printf(seq, "average:\n %llu us scan time\n", |
| 1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | 1126 | div_u64(es_stats->es_stats_scan_time, 1000)); |
| @@ -1132,7 +1129,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
| 1132 | seq_printf(seq, | 1129 | seq_printf(seq, |
| 1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | 1130 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" |
| 1134 | " %llu us max scan time\n", | 1131 | " %llu us max scan time\n", |
| 1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | 1132 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, |
| 1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | 1133 | div_u64(es_stats->es_stats_max_scan_time, 1000)); |
| 1137 | 1134 | ||
| 1138 | return 0; | 1135 | return 0; |
| @@ -1181,9 +1178,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
| 1181 | { | 1178 | { |
| 1182 | int err; | 1179 | int err; |
| 1183 | 1180 | ||
| 1184 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1181 | INIT_LIST_HEAD(&sbi->s_es_list); |
| 1185 | spin_lock_init(&sbi->s_es_lru_lock); | 1182 | sbi->s_es_nr_inode = 0; |
| 1186 | sbi->s_es_stats.es_stats_last_sorted = 0; | 1183 | spin_lock_init(&sbi->s_es_lock); |
| 1187 | sbi->s_es_stats.es_stats_shrunk = 0; | 1184 | sbi->s_es_stats.es_stats_shrunk = 0; |
| 1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1185 | sbi->s_es_stats.es_stats_cache_hits = 0; |
| 1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1186 | sbi->s_es_stats.es_stats_cache_misses = 0; |
| @@ -1192,7 +1189,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
| 1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1189 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
| 1193 | if (err) | 1190 | if (err) |
| 1194 | return err; | 1191 | return err; |
| 1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | 1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
| 1196 | if (err) | 1193 | if (err) |
| 1197 | goto err1; | 1194 | goto err1; |
| 1198 | 1195 | ||
| @@ -1210,7 +1207,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
| 1210 | return 0; | 1207 | return 0; |
| 1211 | 1208 | ||
| 1212 | err2: | 1209 | err2: |
| 1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1210 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
| 1214 | err1: | 1211 | err1: |
| 1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1212 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
| 1216 | return err; | 1213 | return err; |
| @@ -1221,37 +1218,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | |||
| 1221 | if (sbi->s_proc) | 1218 | if (sbi->s_proc) |
| 1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | 1219 | remove_proc_entry("es_shrinker_info", sbi->s_proc); |
| 1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1220 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
| 1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1221 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
| 1225 | unregister_shrinker(&sbi->s_es_shrinker); | 1222 | unregister_shrinker(&sbi->s_es_shrinker); |
| 1226 | } | 1223 | } |
| 1227 | 1224 | ||
| 1228 | void ext4_es_lru_add(struct inode *inode) | ||
| 1229 | { | ||
| 1230 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 1231 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1232 | |||
| 1233 | ei->i_touch_when = jiffies; | ||
| 1234 | |||
| 1235 | if (!list_empty(&ei->i_es_lru)) | ||
| 1236 | return; | ||
| 1237 | |||
| 1238 | spin_lock(&sbi->s_es_lru_lock); | ||
| 1239 | if (list_empty(&ei->i_es_lru)) | ||
| 1240 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
| 1241 | spin_unlock(&sbi->s_es_lru_lock); | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | void ext4_es_lru_del(struct inode *inode) | ||
| 1245 | { | ||
| 1246 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 1247 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1248 | |||
| 1249 | spin_lock(&sbi->s_es_lru_lock); | ||
| 1250 | if (!list_empty(&ei->i_es_lru)) | ||
| 1251 | list_del_init(&ei->i_es_lru); | ||
| 1252 | spin_unlock(&sbi->s_es_lru_lock); | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 1225 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
| 1256 | int nr_to_scan) | 1226 | int nr_to_scan) |
| 1257 | { | 1227 | { |
| @@ -1263,7 +1233,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | |||
| 1263 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1233 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
| 1264 | DEFAULT_RATELIMIT_BURST); | 1234 | DEFAULT_RATELIMIT_BURST); |
| 1265 | 1235 | ||
| 1266 | if (ei->i_es_lru_nr == 0) | 1236 | if (ei->i_es_shk_nr == 0) |
| 1267 | return 0; | 1237 | return 0; |
| 1268 | 1238 | ||
| 1269 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 1239 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index efd5f970b501..0e6a33e81e5f 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
| @@ -65,14 +65,13 @@ struct ext4_es_tree { | |||
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | struct ext4_es_stats { | 67 | struct ext4_es_stats { |
| 68 | unsigned long es_stats_last_sorted; | ||
| 69 | unsigned long es_stats_shrunk; | 68 | unsigned long es_stats_shrunk; |
| 70 | unsigned long es_stats_cache_hits; | 69 | unsigned long es_stats_cache_hits; |
| 71 | unsigned long es_stats_cache_misses; | 70 | unsigned long es_stats_cache_misses; |
| 72 | u64 es_stats_scan_time; | 71 | u64 es_stats_scan_time; |
| 73 | u64 es_stats_max_scan_time; | 72 | u64 es_stats_max_scan_time; |
| 74 | struct percpu_counter es_stats_all_cnt; | 73 | struct percpu_counter es_stats_all_cnt; |
| 75 | struct percpu_counter es_stats_lru_cnt; | 74 | struct percpu_counter es_stats_shk_cnt; |
| 76 | }; | 75 | }; |
| 77 | 76 | ||
| 78 | extern int __init ext4_init_es(void); | 77 | extern int __init ext4_init_es(void); |
| @@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, | |||
| 151 | 150 | ||
| 152 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 151 | extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
| 153 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 152 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
| 154 | extern void ext4_es_lru_add(struct inode *inode); | 153 | extern void ext4_es_list_add(struct inode *inode); |
| 155 | extern void ext4_es_lru_del(struct inode *inode); | 154 | extern void ext4_es_list_del(struct inode *inode); |
| 156 | 155 | ||
| 157 | #endif /* _EXT4_EXTENTS_STATUS_H */ | 156 | #endif /* _EXT4_EXTENTS_STATUS_H */ |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d5a46a8df70b..540b0b0481a5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -486,7 +486,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 486 | 486 | ||
| 487 | /* Lookup extent status tree firstly */ | 487 | /* Lookup extent status tree firstly */ |
| 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
| 489 | ext4_es_lru_add(inode); | 489 | ext4_es_list_add(inode); |
| 490 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 490 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
| 491 | map->m_pblk = ext4_es_pblock(&es) + | 491 | map->m_pblk = ext4_es_pblock(&es) + |
| 492 | map->m_lblk - es.es_lblk; | 492 | map->m_lblk - es.es_lblk; |
| @@ -1388,7 +1388,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
| 1388 | 1388 | ||
| 1389 | /* Lookup extent status tree firstly */ | 1389 | /* Lookup extent status tree firstly */ |
| 1390 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1390 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
| 1391 | ext4_es_lru_add(inode); | 1391 | ext4_es_list_add(inode); |
| 1392 | if (ext4_es_is_hole(&es)) { | 1392 | if (ext4_es_is_hole(&es)) { |
| 1393 | retval = 0; | 1393 | retval = 0; |
| 1394 | down_read(&EXT4_I(inode)->i_data_sem); | 1394 | down_read(&EXT4_I(inode)->i_data_sem); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bfda18a15592..7b377c41dd81 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -78,8 +78,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
| 78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 78 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
| 79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); | 79 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
| 80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); | 80 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
| 81 | ext4_es_lru_del(inode1); | 81 | ext4_es_list_del(inode1); |
| 82 | ext4_es_lru_del(inode2); | 82 | ext4_es_list_del(inode2); |
| 83 | 83 | ||
| 84 | isize = i_size_read(inode1); | 84 | isize = i_size_read(inode1); |
| 85 | i_size_write(inode1, i_size_read(inode2)); | 85 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4b79f39ebf66..32df08e99ca9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -871,10 +871,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 871 | spin_lock_init(&ei->i_prealloc_lock); | 871 | spin_lock_init(&ei->i_prealloc_lock); |
| 872 | ext4_es_init_tree(&ei->i_es_tree); | 872 | ext4_es_init_tree(&ei->i_es_tree); |
| 873 | rwlock_init(&ei->i_es_lock); | 873 | rwlock_init(&ei->i_es_lock); |
| 874 | INIT_LIST_HEAD(&ei->i_es_lru); | 874 | INIT_LIST_HEAD(&ei->i_es_list); |
| 875 | ei->i_es_all_nr = 0; | 875 | ei->i_es_all_nr = 0; |
| 876 | ei->i_es_lru_nr = 0; | 876 | ei->i_es_shk_nr = 0; |
| 877 | ei->i_touch_when = 0; | ||
| 878 | ei->i_reserved_data_blocks = 0; | 877 | ei->i_reserved_data_blocks = 0; |
| 879 | ei->i_reserved_meta_blocks = 0; | 878 | ei->i_reserved_meta_blocks = 0; |
| 880 | ei->i_allocated_meta_blocks = 0; | 879 | ei->i_allocated_meta_blocks = 0; |
| @@ -963,7 +962,7 @@ void ext4_clear_inode(struct inode *inode) | |||
| 963 | dquot_drop(inode); | 962 | dquot_drop(inode); |
| 964 | ext4_discard_preallocations(inode); | 963 | ext4_discard_preallocations(inode); |
| 965 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 964 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
| 966 | ext4_es_lru_del(inode); | 965 | ext4_es_list_del(inode); |
| 967 | if (EXT4_I(inode)->jinode) { | 966 | if (EXT4_I(inode)->jinode) { |
| 968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 967 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
| 969 | EXT4_I(inode)->jinode); | 968 | EXT4_I(inode)->jinode); |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index cd37a584ee88..6cfb841fea7c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
| @@ -2450,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range, | |||
| 2450 | 2450 | ||
| 2451 | TRACE_EVENT(ext4_es_shrink, | 2451 | TRACE_EVENT(ext4_es_shrink, |
| 2452 | TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, | 2452 | TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, |
| 2453 | int skip_precached, int nr_skipped, int retried), | 2453 | int nr_skipped, int retried), |
| 2454 | 2454 | ||
| 2455 | TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), | 2455 | TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried), |
| 2456 | 2456 | ||
| 2457 | TP_STRUCT__entry( | 2457 | TP_STRUCT__entry( |
| 2458 | __field( dev_t, dev ) | 2458 | __field( dev_t, dev ) |
| 2459 | __field( int, nr_shrunk ) | 2459 | __field( int, nr_shrunk ) |
| 2460 | __field( unsigned long long, scan_time ) | 2460 | __field( unsigned long long, scan_time ) |
| 2461 | __field( int, skip_precached ) | ||
| 2462 | __field( int, nr_skipped ) | 2461 | __field( int, nr_skipped ) |
| 2463 | __field( int, retried ) | 2462 | __field( int, retried ) |
| 2464 | ), | 2463 | ), |
| @@ -2467,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink, | |||
| 2467 | __entry->dev = sb->s_dev; | 2466 | __entry->dev = sb->s_dev; |
| 2468 | __entry->nr_shrunk = nr_shrunk; | 2467 | __entry->nr_shrunk = nr_shrunk; |
| 2469 | __entry->scan_time = div_u64(scan_time, 1000); | 2468 | __entry->scan_time = div_u64(scan_time, 1000); |
| 2470 | __entry->skip_precached = skip_precached; | ||
| 2471 | __entry->nr_skipped = nr_skipped; | 2469 | __entry->nr_skipped = nr_skipped; |
| 2472 | __entry->retried = retried; | 2470 | __entry->retried = retried; |
| 2473 | ), | 2471 | ), |
| 2474 | 2472 | ||
| 2475 | TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " | 2473 | TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu " |
| 2476 | "nr_skipped %d retried %d", | 2474 | "nr_skipped %d retried %d", |
| 2477 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, | 2475 | MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, |
| 2478 | __entry->scan_time, __entry->skip_precached, | 2476 | __entry->scan_time, __entry->nr_skipped, __entry->retried) |
| 2479 | __entry->nr_skipped, __entry->retried) | ||
| 2480 | ); | 2477 | ); |
| 2481 | 2478 | ||
| 2482 | #endif /* _TRACE_EXT4_H */ | 2479 | #endif /* _TRACE_EXT4_H */ |
