diff options
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r-- | fs/ext4/extents_status.c | 321 |
1 files changed, 167 insertions, 154 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 94e7855ae71b..e04d45733976 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep; | |||
147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); | 147 | static int __es_insert_extent(struct inode *inode, struct extent_status *newes); |
148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 148 | static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
149 | ext4_lblk_t end); | 149 | ext4_lblk_t end); |
150 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 150 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); |
151 | int nr_to_scan); | 151 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
152 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | 152 | struct ext4_inode_info *locked_ei); |
153 | struct ext4_inode_info *locked_ei); | ||
154 | 153 | ||
155 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
156 | { | 155 | { |
@@ -298,6 +297,36 @@ out: | |||
298 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
299 | } | 298 | } |
300 | 299 | ||
300 | static void ext4_es_list_add(struct inode *inode) | ||
301 | { | ||
302 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
303 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
304 | |||
305 | if (!list_empty(&ei->i_es_list)) | ||
306 | return; | ||
307 | |||
308 | spin_lock(&sbi->s_es_lock); | ||
309 | if (list_empty(&ei->i_es_list)) { | ||
310 | list_add_tail(&ei->i_es_list, &sbi->s_es_list); | ||
311 | sbi->s_es_nr_inode++; | ||
312 | } | ||
313 | spin_unlock(&sbi->s_es_lock); | ||
314 | } | ||
315 | |||
316 | static void ext4_es_list_del(struct inode *inode) | ||
317 | { | ||
318 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
319 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
320 | |||
321 | spin_lock(&sbi->s_es_lock); | ||
322 | if (!list_empty(&ei->i_es_list)) { | ||
323 | list_del_init(&ei->i_es_list); | ||
324 | sbi->s_es_nr_inode--; | ||
325 | WARN_ON_ONCE(sbi->s_es_nr_inode < 0); | ||
326 | } | ||
327 | spin_unlock(&sbi->s_es_lock); | ||
328 | } | ||
329 | |||
301 | static struct extent_status * | 330 | static struct extent_status * |
302 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | 331 | ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, |
303 | ext4_fsblk_t pblk) | 332 | ext4_fsblk_t pblk) |
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, | |||
314 | * We don't count delayed extent because we never try to reclaim them | 343 | * We don't count delayed extent because we never try to reclaim them |
315 | */ | 344 | */ |
316 | if (!ext4_es_is_delayed(es)) { | 345 | if (!ext4_es_is_delayed(es)) { |
317 | EXT4_I(inode)->i_es_lru_nr++; | 346 | if (!EXT4_I(inode)->i_es_shk_nr++) |
347 | ext4_es_list_add(inode); | ||
318 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> | 348 | percpu_counter_inc(&EXT4_SB(inode->i_sb)-> |
319 | s_es_stats.es_stats_lru_cnt); | 349 | s_es_stats.es_stats_shk_cnt); |
320 | } | 350 | } |
321 | 351 | ||
322 | EXT4_I(inode)->i_es_all_nr++; | 352 | EXT4_I(inode)->i_es_all_nr++; |
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
330 | EXT4_I(inode)->i_es_all_nr--; | 360 | EXT4_I(inode)->i_es_all_nr--; |
331 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); | 361 | percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); |
332 | 362 | ||
333 | /* Decrease the lru counter when this es is not delayed */ | 363 | /* Decrease the shrink counter when this es is not delayed */ |
334 | if (!ext4_es_is_delayed(es)) { | 364 | if (!ext4_es_is_delayed(es)) { |
335 | BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); | 365 | BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); |
336 | EXT4_I(inode)->i_es_lru_nr--; | 366 | if (!--EXT4_I(inode)->i_es_shk_nr) |
367 | ext4_es_list_del(inode); | ||
337 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> | 368 | percpu_counter_dec(&EXT4_SB(inode->i_sb)-> |
338 | s_es_stats.es_stats_lru_cnt); | 369 | s_es_stats.es_stats_shk_cnt); |
339 | } | 370 | } |
340 | 371 | ||
341 | kmem_cache_free(ext4_es_cachep, es); | 372 | kmem_cache_free(ext4_es_cachep, es); |
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) | |||
351 | static int ext4_es_can_be_merged(struct extent_status *es1, | 382 | static int ext4_es_can_be_merged(struct extent_status *es1, |
352 | struct extent_status *es2) | 383 | struct extent_status *es2) |
353 | { | 384 | { |
354 | if (ext4_es_status(es1) != ext4_es_status(es2)) | 385 | if (ext4_es_type(es1) != ext4_es_type(es2)) |
355 | return 0; | 386 | return 0; |
356 | 387 | ||
357 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { | 388 | if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { |
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) | |||
394 | es1 = rb_entry(node, struct extent_status, rb_node); | 425 | es1 = rb_entry(node, struct extent_status, rb_node); |
395 | if (ext4_es_can_be_merged(es1, es)) { | 426 | if (ext4_es_can_be_merged(es1, es)) { |
396 | es1->es_len += es->es_len; | 427 | es1->es_len += es->es_len; |
428 | if (ext4_es_is_referenced(es)) | ||
429 | ext4_es_set_referenced(es1); | ||
397 | rb_erase(&es->rb_node, &tree->root); | 430 | rb_erase(&es->rb_node, &tree->root); |
398 | ext4_es_free_extent(inode, es); | 431 | ext4_es_free_extent(inode, es); |
399 | es = es1; | 432 | es = es1; |
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) | |||
416 | es1 = rb_entry(node, struct extent_status, rb_node); | 449 | es1 = rb_entry(node, struct extent_status, rb_node); |
417 | if (ext4_es_can_be_merged(es, es1)) { | 450 | if (ext4_es_can_be_merged(es, es1)) { |
418 | es->es_len += es1->es_len; | 451 | es->es_len += es1->es_len; |
452 | if (ext4_es_is_referenced(es1)) | ||
453 | ext4_es_set_referenced(es); | ||
419 | rb_erase(node, &tree->root); | 454 | rb_erase(node, &tree->root); |
420 | ext4_es_free_extent(inode, es1); | 455 | ext4_es_free_extent(inode, es1); |
421 | } | 456 | } |
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
683 | goto error; | 718 | goto error; |
684 | retry: | 719 | retry: |
685 | err = __es_insert_extent(inode, &newes); | 720 | err = __es_insert_extent(inode, &newes); |
686 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 721 | if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), |
687 | EXT4_I(inode))) | 722 | 128, EXT4_I(inode))) |
688 | goto retry; | 723 | goto retry; |
689 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | 724 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) |
690 | err = 0; | 725 | err = 0; |
@@ -782,6 +817,8 @@ out: | |||
782 | es->es_lblk = es1->es_lblk; | 817 | es->es_lblk = es1->es_lblk; |
783 | es->es_len = es1->es_len; | 818 | es->es_len = es1->es_len; |
784 | es->es_pblk = es1->es_pblk; | 819 | es->es_pblk = es1->es_pblk; |
820 | if (!ext4_es_is_referenced(es)) | ||
821 | ext4_es_set_referenced(es); | ||
785 | stats->es_stats_cache_hits++; | 822 | stats->es_stats_cache_hits++; |
786 | } else { | 823 | } else { |
787 | stats->es_stats_cache_misses++; | 824 | stats->es_stats_cache_misses++; |
@@ -841,8 +878,8 @@ retry: | |||
841 | es->es_lblk = orig_es.es_lblk; | 878 | es->es_lblk = orig_es.es_lblk; |
842 | es->es_len = orig_es.es_len; | 879 | es->es_len = orig_es.es_len; |
843 | if ((err == -ENOMEM) && | 880 | if ((err == -ENOMEM) && |
844 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | 881 | __es_shrink(EXT4_SB(inode->i_sb), |
845 | EXT4_I(inode))) | 882 | 128, EXT4_I(inode))) |
846 | goto retry; | 883 | goto retry; |
847 | goto out; | 884 | goto out; |
848 | } | 885 | } |
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
914 | end = lblk + len - 1; | 951 | end = lblk + len - 1; |
915 | BUG_ON(end < lblk); | 952 | BUG_ON(end < lblk); |
916 | 953 | ||
954 | /* | ||
955 | * ext4_clear_inode() depends on us taking i_es_lock unconditionally | ||
956 | * so that we are sure __es_shrink() is done with the inode before it | ||
957 | * is reclaimed. | ||
958 | */ | ||
917 | write_lock(&EXT4_I(inode)->i_es_lock); | 959 | write_lock(&EXT4_I(inode)->i_es_lock); |
918 | err = __es_remove_extent(inode, lblk, end); | 960 | err = __es_remove_extent(inode, lblk, end); |
919 | write_unlock(&EXT4_I(inode)->i_es_lock); | 961 | write_unlock(&EXT4_I(inode)->i_es_lock); |
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
921 | return err; | 963 | return err; |
922 | } | 964 | } |
923 | 965 | ||
924 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | 966 | static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, |
925 | struct list_head *b) | 967 | struct ext4_inode_info *locked_ei) |
926 | { | ||
927 | struct ext4_inode_info *eia, *eib; | ||
928 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
929 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
930 | |||
931 | if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
932 | !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
933 | return 1; | ||
934 | if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && | ||
935 | ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) | ||
936 | return -1; | ||
937 | if (eia->i_touch_when == eib->i_touch_when) | ||
938 | return 0; | ||
939 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
940 | return 1; | ||
941 | else | ||
942 | return -1; | ||
943 | } | ||
944 | |||
945 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
946 | struct ext4_inode_info *locked_ei) | ||
947 | { | 968 | { |
948 | struct ext4_inode_info *ei; | 969 | struct ext4_inode_info *ei; |
949 | struct ext4_es_stats *es_stats; | 970 | struct ext4_es_stats *es_stats; |
950 | struct list_head *cur, *tmp; | ||
951 | LIST_HEAD(skipped); | ||
952 | ktime_t start_time; | 971 | ktime_t start_time; |
953 | u64 scan_time; | 972 | u64 scan_time; |
973 | int nr_to_walk; | ||
954 | int nr_shrunk = 0; | 974 | int nr_shrunk = 0; |
955 | int retried = 0, skip_precached = 1, nr_skipped = 0; | 975 | int retried = 0, nr_skipped = 0; |
956 | 976 | ||
957 | es_stats = &sbi->s_es_stats; | 977 | es_stats = &sbi->s_es_stats; |
958 | start_time = ktime_get(); | 978 | start_time = ktime_get(); |
959 | spin_lock(&sbi->s_es_lru_lock); | ||
960 | 979 | ||
961 | retry: | 980 | retry: |
962 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 981 | spin_lock(&sbi->s_es_lock); |
963 | int shrunk; | 982 | nr_to_walk = sbi->s_es_nr_inode; |
964 | 983 | while (nr_to_walk-- > 0) { | |
965 | /* | 984 | if (list_empty(&sbi->s_es_list)) { |
966 | * If we have already reclaimed all extents from extent | 985 | spin_unlock(&sbi->s_es_lock); |
967 | * status tree, just stop the loop immediately. | 986 | goto out; |
968 | */ | 987 | } |
969 | if (percpu_counter_read_positive( | 988 | ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, |
970 | &es_stats->es_stats_lru_cnt) == 0) | 989 | i_es_list); |
971 | break; | 990 | /* Move the inode to the tail */ |
972 | 991 | list_move_tail(&ei->i_es_list, &sbi->s_es_list); | |
973 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | ||
974 | 992 | ||
975 | /* | 993 | /* |
976 | * Skip the inode that is newer than the last_sorted | 994 | * Normally we try hard to avoid shrinking precached inodes, |
977 | * time. Normally we try hard to avoid shrinking | 995 | * but we will as a last resort. |
978 | * precached inodes, but we will as a last resort. | ||
979 | */ | 996 | */ |
980 | if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || | 997 | if (!retried && ext4_test_inode_state(&ei->vfs_inode, |
981 | (skip_precached && ext4_test_inode_state(&ei->vfs_inode, | 998 | EXT4_STATE_EXT_PRECACHED)) { |
982 | EXT4_STATE_EXT_PRECACHED))) { | ||
983 | nr_skipped++; | 999 | nr_skipped++; |
984 | list_move_tail(cur, &skipped); | ||
985 | continue; | 1000 | continue; |
986 | } | 1001 | } |
987 | 1002 | ||
988 | if (ei->i_es_lru_nr == 0 || ei == locked_ei || | 1003 | if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { |
989 | !write_trylock(&ei->i_es_lock)) | 1004 | nr_skipped++; |
990 | continue; | 1005 | continue; |
1006 | } | ||
1007 | /* | ||
1008 | * Now we hold i_es_lock which protects us from inode reclaim | ||
1009 | * freeing inode under us | ||
1010 | */ | ||
1011 | spin_unlock(&sbi->s_es_lock); | ||
991 | 1012 | ||
992 | shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1013 | nr_shrunk += es_reclaim_extents(ei, &nr_to_scan); |
993 | if (ei->i_es_lru_nr == 0) | ||
994 | list_del_init(&ei->i_es_lru); | ||
995 | write_unlock(&ei->i_es_lock); | 1014 | write_unlock(&ei->i_es_lock); |
996 | 1015 | ||
997 | nr_shrunk += shrunk; | 1016 | if (nr_to_scan <= 0) |
998 | nr_to_scan -= shrunk; | 1017 | goto out; |
999 | if (nr_to_scan == 0) | 1018 | spin_lock(&sbi->s_es_lock); |
1000 | break; | ||
1001 | } | 1019 | } |
1002 | 1020 | spin_unlock(&sbi->s_es_lock); | |
1003 | /* Move the newer inodes into the tail of the LRU list. */ | ||
1004 | list_splice_tail(&skipped, &sbi->s_es_lru); | ||
1005 | INIT_LIST_HEAD(&skipped); | ||
1006 | 1021 | ||
1007 | /* | 1022 | /* |
1008 | * If we skipped any inodes, and we weren't able to make any | 1023 | * If we skipped any inodes, and we weren't able to make any |
1009 | * forward progress, sort the list and try again. | 1024 | * forward progress, try again to scan precached inodes. |
1010 | */ | 1025 | */ |
1011 | if ((nr_shrunk == 0) && nr_skipped && !retried) { | 1026 | if ((nr_shrunk == 0) && nr_skipped && !retried) { |
1012 | retried++; | 1027 | retried++; |
1013 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
1014 | es_stats->es_stats_last_sorted = jiffies; | ||
1015 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, | ||
1016 | i_es_lru); | ||
1017 | /* | ||
1018 | * If there are no non-precached inodes left on the | ||
1019 | * list, start releasing precached extents. | ||
1020 | */ | ||
1021 | if (ext4_test_inode_state(&ei->vfs_inode, | ||
1022 | EXT4_STATE_EXT_PRECACHED)) | ||
1023 | skip_precached = 0; | ||
1024 | goto retry; | 1028 | goto retry; |
1025 | } | 1029 | } |
1026 | 1030 | ||
1027 | spin_unlock(&sbi->s_es_lru_lock); | ||
1028 | |||
1029 | if (locked_ei && nr_shrunk == 0) | 1031 | if (locked_ei && nr_shrunk == 0) |
1030 | nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); | 1032 | nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan); |
1031 | 1033 | ||
1034 | out: | ||
1032 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | 1035 | scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); |
1033 | if (likely(es_stats->es_stats_scan_time)) | 1036 | if (likely(es_stats->es_stats_scan_time)) |
1034 | es_stats->es_stats_scan_time = (scan_time + | 1037 | es_stats->es_stats_scan_time = (scan_time + |
@@ -1043,7 +1046,7 @@ retry: | |||
1043 | else | 1046 | else |
1044 | es_stats->es_stats_shrunk = nr_shrunk; | 1047 | es_stats->es_stats_shrunk = nr_shrunk; |
1045 | 1048 | ||
1046 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, | 1049 | trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, |
1047 | nr_skipped, retried); | 1050 | nr_skipped, retried); |
1048 | return nr_shrunk; | 1051 | return nr_shrunk; |
1049 | } | 1052 | } |
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, | |||
1055 | struct ext4_sb_info *sbi; | 1058 | struct ext4_sb_info *sbi; |
1056 | 1059 | ||
1057 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); | 1060 | sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); |
1058 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1061 | nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1059 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); | 1062 | trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); |
1060 | return nr; | 1063 | return nr; |
1061 | } | 1064 | } |
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, | |||
1068 | int nr_to_scan = sc->nr_to_scan; | 1071 | int nr_to_scan = sc->nr_to_scan; |
1069 | int ret, nr_shrunk; | 1072 | int ret, nr_shrunk; |
1070 | 1073 | ||
1071 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); | 1074 | ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); |
1072 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); | 1075 | trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); |
1073 | 1076 | ||
1074 | if (!nr_to_scan) | 1077 | if (!nr_to_scan) |
1075 | return ret; | 1078 | return ret; |
1076 | 1079 | ||
1077 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | 1080 | nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); |
1078 | 1081 | ||
1079 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); | 1082 | trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); |
1080 | return nr_shrunk; | 1083 | return nr_shrunk; |
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1102 | return 0; | 1105 | return 0; |
1103 | 1106 | ||
1104 | /* here we just find an inode that has the max nr. of objects */ | 1107 | /* here we just find an inode that has the max nr. of objects */ |
1105 | spin_lock(&sbi->s_es_lru_lock); | 1108 | spin_lock(&sbi->s_es_lock); |
1106 | list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { | 1109 | list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { |
1107 | inode_cnt++; | 1110 | inode_cnt++; |
1108 | if (max && max->i_es_all_nr < ei->i_es_all_nr) | 1111 | if (max && max->i_es_all_nr < ei->i_es_all_nr) |
1109 | max = ei; | 1112 | max = ei; |
1110 | else if (!max) | 1113 | else if (!max) |
1111 | max = ei; | 1114 | max = ei; |
1112 | } | 1115 | } |
1113 | spin_unlock(&sbi->s_es_lru_lock); | 1116 | spin_unlock(&sbi->s_es_lock); |
1114 | 1117 | ||
1115 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", | 1118 | seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", |
1116 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), | 1119 | percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), |
1117 | percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); | 1120 | percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); |
1118 | seq_printf(seq, " %lu/%lu cache hits/misses\n", | 1121 | seq_printf(seq, " %lu/%lu cache hits/misses\n", |
1119 | es_stats->es_stats_cache_hits, | 1122 | es_stats->es_stats_cache_hits, |
1120 | es_stats->es_stats_cache_misses); | 1123 | es_stats->es_stats_cache_misses); |
1121 | if (es_stats->es_stats_last_sorted != 0) | ||
1122 | seq_printf(seq, " %u ms last sorted interval\n", | ||
1123 | jiffies_to_msecs(jiffies - | ||
1124 | es_stats->es_stats_last_sorted)); | ||
1125 | if (inode_cnt) | 1124 | if (inode_cnt) |
1126 | seq_printf(seq, " %d inodes on lru list\n", inode_cnt); | 1125 | seq_printf(seq, " %d inodes on list\n", inode_cnt); |
1127 | 1126 | ||
1128 | seq_printf(seq, "average:\n %llu us scan time\n", | 1127 | seq_printf(seq, "average:\n %llu us scan time\n", |
1129 | div_u64(es_stats->es_stats_scan_time, 1000)); | 1128 | div_u64(es_stats->es_stats_scan_time, 1000)); |
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) | |||
1132 | seq_printf(seq, | 1131 | seq_printf(seq, |
1133 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" | 1132 | "maximum:\n %lu inode (%u objects, %u reclaimable)\n" |
1134 | " %llu us max scan time\n", | 1133 | " %llu us max scan time\n", |
1135 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, | 1134 | max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, |
1136 | div_u64(es_stats->es_stats_max_scan_time, 1000)); | 1135 | div_u64(es_stats->es_stats_max_scan_time, 1000)); |
1137 | 1136 | ||
1138 | return 0; | 1137 | return 0; |
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1181 | { | 1180 | { |
1182 | int err; | 1181 | int err; |
1183 | 1182 | ||
1184 | INIT_LIST_HEAD(&sbi->s_es_lru); | 1183 | /* Make sure we have enough bits for physical block number */ |
1185 | spin_lock_init(&sbi->s_es_lru_lock); | 1184 | BUILD_BUG_ON(ES_SHIFT < 48); |
1186 | sbi->s_es_stats.es_stats_last_sorted = 0; | 1185 | INIT_LIST_HEAD(&sbi->s_es_list); |
1186 | sbi->s_es_nr_inode = 0; | ||
1187 | spin_lock_init(&sbi->s_es_lock); | ||
1187 | sbi->s_es_stats.es_stats_shrunk = 0; | 1188 | sbi->s_es_stats.es_stats_shrunk = 0; |
1188 | sbi->s_es_stats.es_stats_cache_hits = 0; | 1189 | sbi->s_es_stats.es_stats_cache_hits = 0; |
1189 | sbi->s_es_stats.es_stats_cache_misses = 0; | 1190 | sbi->s_es_stats.es_stats_cache_misses = 0; |
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1192 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); | 1193 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); |
1193 | if (err) | 1194 | if (err) |
1194 | return err; | 1195 | return err; |
1195 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); | 1196 | err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL); |
1196 | if (err) | 1197 | if (err) |
1197 | goto err1; | 1198 | goto err1; |
1198 | 1199 | ||
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) | |||
1210 | return 0; | 1211 | return 0; |
1211 | 1212 | ||
1212 | err2: | 1213 | err2: |
1213 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1214 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1214 | err1: | 1215 | err1: |
1215 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1216 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1216 | return err; | 1217 | return err; |
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) | |||
1221 | if (sbi->s_proc) | 1222 | if (sbi->s_proc) |
1222 | remove_proc_entry("es_shrinker_info", sbi->s_proc); | 1223 | remove_proc_entry("es_shrinker_info", sbi->s_proc); |
1223 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); | 1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); |
1224 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); | 1225 | percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); |
1225 | unregister_shrinker(&sbi->s_es_shrinker); | 1226 | unregister_shrinker(&sbi->s_es_shrinker); |
1226 | } | 1227 | } |
1227 | 1228 | ||
1228 | void ext4_es_lru_add(struct inode *inode) | 1229 | /* |
1230 | * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at | ||
1231 | * most *nr_to_scan extents, update *nr_to_scan accordingly. | ||
1232 | * | ||
1233 | * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan. | ||
1234 | * Increment *nr_shrunk by the number of reclaimed extents. Also update | ||
1235 | * ei->i_es_shrink_lblk to where we should continue scanning. | ||
1236 | */ | ||
1237 | static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, | ||
1238 | int *nr_to_scan, int *nr_shrunk) | ||
1229 | { | 1239 | { |
1230 | struct ext4_inode_info *ei = EXT4_I(inode); | 1240 | struct inode *inode = &ei->vfs_inode; |
1231 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1241 | struct ext4_es_tree *tree = &ei->i_es_tree; |
1232 | 1242 | struct extent_status *es; | |
1233 | ei->i_touch_when = jiffies; | 1243 | struct rb_node *node; |
1234 | |||
1235 | if (!list_empty(&ei->i_es_lru)) | ||
1236 | return; | ||
1237 | 1244 | ||
1238 | spin_lock(&sbi->s_es_lru_lock); | 1245 | es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); |
1239 | if (list_empty(&ei->i_es_lru)) | 1246 | if (!es) |
1240 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1247 | goto out_wrap; |
1241 | spin_unlock(&sbi->s_es_lru_lock); | 1248 | node = &es->rb_node; |
1242 | } | 1249 | while (*nr_to_scan > 0) { |
1250 | if (es->es_lblk > end) { | ||
1251 | ei->i_es_shrink_lblk = end + 1; | ||
1252 | return 0; | ||
1253 | } | ||
1243 | 1254 | ||
1244 | void ext4_es_lru_del(struct inode *inode) | 1255 | (*nr_to_scan)--; |
1245 | { | 1256 | node = rb_next(&es->rb_node); |
1246 | struct ext4_inode_info *ei = EXT4_I(inode); | 1257 | /* |
1247 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1258 | * We can't reclaim delayed extent from status tree because |
1259 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1260 | */ | ||
1261 | if (ext4_es_is_delayed(es)) | ||
1262 | goto next; | ||
1263 | if (ext4_es_is_referenced(es)) { | ||
1264 | ext4_es_clear_referenced(es); | ||
1265 | goto next; | ||
1266 | } | ||
1248 | 1267 | ||
1249 | spin_lock(&sbi->s_es_lru_lock); | 1268 | rb_erase(&es->rb_node, &tree->root); |
1250 | if (!list_empty(&ei->i_es_lru)) | 1269 | ext4_es_free_extent(inode, es); |
1251 | list_del_init(&ei->i_es_lru); | 1270 | (*nr_shrunk)++; |
1252 | spin_unlock(&sbi->s_es_lru_lock); | 1271 | next: |
1272 | if (!node) | ||
1273 | goto out_wrap; | ||
1274 | es = rb_entry(node, struct extent_status, rb_node); | ||
1275 | } | ||
1276 | ei->i_es_shrink_lblk = es->es_lblk; | ||
1277 | return 1; | ||
1278 | out_wrap: | ||
1279 | ei->i_es_shrink_lblk = 0; | ||
1280 | return 0; | ||
1253 | } | 1281 | } |
1254 | 1282 | ||
1255 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 1283 | static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) |
1256 | int nr_to_scan) | ||
1257 | { | 1284 | { |
1258 | struct inode *inode = &ei->vfs_inode; | 1285 | struct inode *inode = &ei->vfs_inode; |
1259 | struct ext4_es_tree *tree = &ei->i_es_tree; | 1286 | int nr_shrunk = 0; |
1260 | struct rb_node *node; | 1287 | ext4_lblk_t start = ei->i_es_shrink_lblk; |
1261 | struct extent_status *es; | ||
1262 | unsigned long nr_shrunk = 0; | ||
1263 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | 1288 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, |
1264 | DEFAULT_RATELIMIT_BURST); | 1289 | DEFAULT_RATELIMIT_BURST); |
1265 | 1290 | ||
1266 | if (ei->i_es_lru_nr == 0) | 1291 | if (ei->i_es_shk_nr == 0) |
1267 | return 0; | 1292 | return 0; |
1268 | 1293 | ||
1269 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && | 1294 | if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && |
1270 | __ratelimit(&_rs)) | 1295 | __ratelimit(&_rs)) |
1271 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); | 1296 | ext4_warning(inode->i_sb, "forced shrink of precached extents"); |
1272 | 1297 | ||
1273 | node = rb_first(&tree->root); | 1298 | if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) && |
1274 | while (node != NULL) { | 1299 | start != 0) |
1275 | es = rb_entry(node, struct extent_status, rb_node); | 1300 | es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk); |
1276 | node = rb_next(&es->rb_node); | 1301 | |
1277 | /* | 1302 | ei->i_es_tree.cache_es = NULL; |
1278 | * We can't reclaim delayed extent from status tree because | ||
1279 | * fiemap, bigallic, and seek_data/hole need to use it. | ||
1280 | */ | ||
1281 | if (!ext4_es_is_delayed(es)) { | ||
1282 | rb_erase(&es->rb_node, &tree->root); | ||
1283 | ext4_es_free_extent(inode, es); | ||
1284 | nr_shrunk++; | ||
1285 | if (--nr_to_scan == 0) | ||
1286 | break; | ||
1287 | } | ||
1288 | } | ||
1289 | tree->cache_es = NULL; | ||
1290 | return nr_shrunk; | 1303 | return nr_shrunk; |
1291 | } | 1304 | } |