aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents_status.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r--fs/ext4/extents_status.c321
1 files changed, 167 insertions, 154 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 94e7855ae71b..e04d45733976 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep;
147static int __es_insert_extent(struct inode *inode, struct extent_status *newes); 147static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end); 149 ext4_lblk_t end);
150static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 150static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
151 int nr_to_scan); 151static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 152 struct ext4_inode_info *locked_ei);
153 struct ext4_inode_info *locked_ei);
154 153
155int __init ext4_init_es(void) 154int __init ext4_init_es(void)
156{ 155{
@@ -298,6 +297,36 @@ out:
298 trace_ext4_es_find_delayed_extent_range_exit(inode, es); 297 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
299} 298}
300 299
300static void ext4_es_list_add(struct inode *inode)
301{
302 struct ext4_inode_info *ei = EXT4_I(inode);
303 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
304
305 if (!list_empty(&ei->i_es_list))
306 return;
307
308 spin_lock(&sbi->s_es_lock);
309 if (list_empty(&ei->i_es_list)) {
310 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
311 sbi->s_es_nr_inode++;
312 }
313 spin_unlock(&sbi->s_es_lock);
314}
315
316static void ext4_es_list_del(struct inode *inode)
317{
318 struct ext4_inode_info *ei = EXT4_I(inode);
319 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
320
321 spin_lock(&sbi->s_es_lock);
322 if (!list_empty(&ei->i_es_list)) {
323 list_del_init(&ei->i_es_list);
324 sbi->s_es_nr_inode--;
325 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
326 }
327 spin_unlock(&sbi->s_es_lock);
328}
329
301static struct extent_status * 330static struct extent_status *
302ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, 331ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
303 ext4_fsblk_t pblk) 332 ext4_fsblk_t pblk)
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
314 * We don't count delayed extent because we never try to reclaim them 343 * We don't count delayed extent because we never try to reclaim them
315 */ 344 */
316 if (!ext4_es_is_delayed(es)) { 345 if (!ext4_es_is_delayed(es)) {
317 EXT4_I(inode)->i_es_lru_nr++; 346 if (!EXT4_I(inode)->i_es_shk_nr++)
347 ext4_es_list_add(inode);
318 percpu_counter_inc(&EXT4_SB(inode->i_sb)-> 348 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
319 s_es_stats.es_stats_lru_cnt); 349 s_es_stats.es_stats_shk_cnt);
320 } 350 }
321 351
322 EXT4_I(inode)->i_es_all_nr++; 352 EXT4_I(inode)->i_es_all_nr++;
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
330 EXT4_I(inode)->i_es_all_nr--; 360 EXT4_I(inode)->i_es_all_nr--;
331 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); 361 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
332 362
333 /* Decrease the lru counter when this es is not delayed */ 363 /* Decrease the shrink counter when this es is not delayed */
334 if (!ext4_es_is_delayed(es)) { 364 if (!ext4_es_is_delayed(es)) {
335 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); 365 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
336 EXT4_I(inode)->i_es_lru_nr--; 366 if (!--EXT4_I(inode)->i_es_shk_nr)
367 ext4_es_list_del(inode);
337 percpu_counter_dec(&EXT4_SB(inode->i_sb)-> 368 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
338 s_es_stats.es_stats_lru_cnt); 369 s_es_stats.es_stats_shk_cnt);
339 } 370 }
340 371
341 kmem_cache_free(ext4_es_cachep, es); 372 kmem_cache_free(ext4_es_cachep, es);
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
351static int ext4_es_can_be_merged(struct extent_status *es1, 382static int ext4_es_can_be_merged(struct extent_status *es1,
352 struct extent_status *es2) 383 struct extent_status *es2)
353{ 384{
354 if (ext4_es_status(es1) != ext4_es_status(es2)) 385 if (ext4_es_type(es1) != ext4_es_type(es2))
355 return 0; 386 return 0;
356 387
357 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { 388 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
394 es1 = rb_entry(node, struct extent_status, rb_node); 425 es1 = rb_entry(node, struct extent_status, rb_node);
395 if (ext4_es_can_be_merged(es1, es)) { 426 if (ext4_es_can_be_merged(es1, es)) {
396 es1->es_len += es->es_len; 427 es1->es_len += es->es_len;
428 if (ext4_es_is_referenced(es))
429 ext4_es_set_referenced(es1);
397 rb_erase(&es->rb_node, &tree->root); 430 rb_erase(&es->rb_node, &tree->root);
398 ext4_es_free_extent(inode, es); 431 ext4_es_free_extent(inode, es);
399 es = es1; 432 es = es1;
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
416 es1 = rb_entry(node, struct extent_status, rb_node); 449 es1 = rb_entry(node, struct extent_status, rb_node);
417 if (ext4_es_can_be_merged(es, es1)) { 450 if (ext4_es_can_be_merged(es, es1)) {
418 es->es_len += es1->es_len; 451 es->es_len += es1->es_len;
452 if (ext4_es_is_referenced(es1))
453 ext4_es_set_referenced(es);
419 rb_erase(node, &tree->root); 454 rb_erase(node, &tree->root);
420 ext4_es_free_extent(inode, es1); 455 ext4_es_free_extent(inode, es1);
421 } 456 }
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
683 goto error; 718 goto error;
684retry: 719retry:
685 err = __es_insert_extent(inode, &newes); 720 err = __es_insert_extent(inode, &newes);
686 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, 721 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
687 EXT4_I(inode))) 722 128, EXT4_I(inode)))
688 goto retry; 723 goto retry;
689 if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) 724 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
690 err = 0; 725 err = 0;
@@ -782,6 +817,8 @@ out:
782 es->es_lblk = es1->es_lblk; 817 es->es_lblk = es1->es_lblk;
783 es->es_len = es1->es_len; 818 es->es_len = es1->es_len;
784 es->es_pblk = es1->es_pblk; 819 es->es_pblk = es1->es_pblk;
820 if (!ext4_es_is_referenced(es))
821 ext4_es_set_referenced(es);
785 stats->es_stats_cache_hits++; 822 stats->es_stats_cache_hits++;
786 } else { 823 } else {
787 stats->es_stats_cache_misses++; 824 stats->es_stats_cache_misses++;
@@ -841,8 +878,8 @@ retry:
841 es->es_lblk = orig_es.es_lblk; 878 es->es_lblk = orig_es.es_lblk;
842 es->es_len = orig_es.es_len; 879 es->es_len = orig_es.es_len;
843 if ((err == -ENOMEM) && 880 if ((err == -ENOMEM) &&
844 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, 881 __es_shrink(EXT4_SB(inode->i_sb),
845 EXT4_I(inode))) 882 128, EXT4_I(inode)))
846 goto retry; 883 goto retry;
847 goto out; 884 goto out;
848 } 885 }
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
914 end = lblk + len - 1; 951 end = lblk + len - 1;
915 BUG_ON(end < lblk); 952 BUG_ON(end < lblk);
916 953
954 /*
955 * ext4_clear_inode() depends on us taking i_es_lock unconditionally
956 * so that we are sure __es_shrink() is done with the inode before it
957 * is reclaimed.
958 */
917 write_lock(&EXT4_I(inode)->i_es_lock); 959 write_lock(&EXT4_I(inode)->i_es_lock);
918 err = __es_remove_extent(inode, lblk, end); 960 err = __es_remove_extent(inode, lblk, end);
919 write_unlock(&EXT4_I(inode)->i_es_lock); 961 write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
921 return err; 963 return err;
922} 964}
923 965
924static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 966static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
925 struct list_head *b) 967 struct ext4_inode_info *locked_ei)
926{
927 struct ext4_inode_info *eia, *eib;
928 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
929 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
930
931 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
932 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
933 return 1;
934 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
935 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
936 return -1;
937 if (eia->i_touch_when == eib->i_touch_when)
938 return 0;
939 if (time_after(eia->i_touch_when, eib->i_touch_when))
940 return 1;
941 else
942 return -1;
943}
944
945static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
946 struct ext4_inode_info *locked_ei)
947{ 968{
948 struct ext4_inode_info *ei; 969 struct ext4_inode_info *ei;
949 struct ext4_es_stats *es_stats; 970 struct ext4_es_stats *es_stats;
950 struct list_head *cur, *tmp;
951 LIST_HEAD(skipped);
952 ktime_t start_time; 971 ktime_t start_time;
953 u64 scan_time; 972 u64 scan_time;
973 int nr_to_walk;
954 int nr_shrunk = 0; 974 int nr_shrunk = 0;
955 int retried = 0, skip_precached = 1, nr_skipped = 0; 975 int retried = 0, nr_skipped = 0;
956 976
957 es_stats = &sbi->s_es_stats; 977 es_stats = &sbi->s_es_stats;
958 start_time = ktime_get(); 978 start_time = ktime_get();
959 spin_lock(&sbi->s_es_lru_lock);
960 979
961retry: 980retry:
962 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { 981 spin_lock(&sbi->s_es_lock);
963 int shrunk; 982 nr_to_walk = sbi->s_es_nr_inode;
964 983 while (nr_to_walk-- > 0) {
965 /* 984 if (list_empty(&sbi->s_es_list)) {
966 * If we have already reclaimed all extents from extent 985 spin_unlock(&sbi->s_es_lock);
967 * status tree, just stop the loop immediately. 986 goto out;
968 */ 987 }
969 if (percpu_counter_read_positive( 988 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
970 &es_stats->es_stats_lru_cnt) == 0) 989 i_es_list);
971 break; 990 /* Move the inode to the tail */
972 991 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
973 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
974 992
975 /* 993 /*
976 * Skip the inode that is newer than the last_sorted 994 * Normally we try hard to avoid shrinking precached inodes,
977 * time. Normally we try hard to avoid shrinking 995 * but we will as a last resort.
978 * precached inodes, but we will as a last resort.
979 */ 996 */
980 if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || 997 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
981 (skip_precached && ext4_test_inode_state(&ei->vfs_inode, 998 EXT4_STATE_EXT_PRECACHED)) {
982 EXT4_STATE_EXT_PRECACHED))) {
983 nr_skipped++; 999 nr_skipped++;
984 list_move_tail(cur, &skipped);
985 continue; 1000 continue;
986 } 1001 }
987 1002
988 if (ei->i_es_lru_nr == 0 || ei == locked_ei || 1003 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
989 !write_trylock(&ei->i_es_lock)) 1004 nr_skipped++;
990 continue; 1005 continue;
1006 }
1007 /*
1008 * Now we hold i_es_lock which protects us from inode reclaim
1009 * freeing inode under us
1010 */
1011 spin_unlock(&sbi->s_es_lock);
991 1012
992 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1013 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
993 if (ei->i_es_lru_nr == 0)
994 list_del_init(&ei->i_es_lru);
995 write_unlock(&ei->i_es_lock); 1014 write_unlock(&ei->i_es_lock);
996 1015
997 nr_shrunk += shrunk; 1016 if (nr_to_scan <= 0)
998 nr_to_scan -= shrunk; 1017 goto out;
999 if (nr_to_scan == 0) 1018 spin_lock(&sbi->s_es_lock);
1000 break;
1001 } 1019 }
1002 1020 spin_unlock(&sbi->s_es_lock);
1003 /* Move the newer inodes into the tail of the LRU list. */
1004 list_splice_tail(&skipped, &sbi->s_es_lru);
1005 INIT_LIST_HEAD(&skipped);
1006 1021
1007 /* 1022 /*
1008 * If we skipped any inodes, and we weren't able to make any 1023 * If we skipped any inodes, and we weren't able to make any
1009 * forward progress, sort the list and try again. 1024 * forward progress, try again to scan precached inodes.
1010 */ 1025 */
1011 if ((nr_shrunk == 0) && nr_skipped && !retried) { 1026 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1012 retried++; 1027 retried++;
1013 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
1014 es_stats->es_stats_last_sorted = jiffies;
1015 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
1016 i_es_lru);
1017 /*
1018 * If there are no non-precached inodes left on the
1019 * list, start releasing precached extents.
1020 */
1021 if (ext4_test_inode_state(&ei->vfs_inode,
1022 EXT4_STATE_EXT_PRECACHED))
1023 skip_precached = 0;
1024 goto retry; 1028 goto retry;
1025 } 1029 }
1026 1030
1027 spin_unlock(&sbi->s_es_lru_lock);
1028
1029 if (locked_ei && nr_shrunk == 0) 1031 if (locked_ei && nr_shrunk == 0)
1030 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); 1032 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1031 1033
1034out:
1032 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1035 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1033 if (likely(es_stats->es_stats_scan_time)) 1036 if (likely(es_stats->es_stats_scan_time))
1034 es_stats->es_stats_scan_time = (scan_time + 1037 es_stats->es_stats_scan_time = (scan_time +
@@ -1043,7 +1046,7 @@ retry:
1043 else 1046 else
1044 es_stats->es_stats_shrunk = nr_shrunk; 1047 es_stats->es_stats_shrunk = nr_shrunk;
1045 1048
1046 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, 1049 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1047 nr_skipped, retried); 1050 nr_skipped, retried);
1048 return nr_shrunk; 1051 return nr_shrunk;
1049} 1052}
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
1055 struct ext4_sb_info *sbi; 1058 struct ext4_sb_info *sbi;
1056 1059
1057 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 1060 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1058 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); 1061 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1059 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); 1062 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1060 return nr; 1063 return nr;
1061} 1064}
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
1068 int nr_to_scan = sc->nr_to_scan; 1071 int nr_to_scan = sc->nr_to_scan;
1069 int ret, nr_shrunk; 1072 int ret, nr_shrunk;
1070 1073
1071 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); 1074 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1072 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); 1075 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1073 1076
1074 if (!nr_to_scan) 1077 if (!nr_to_scan)
1075 return ret; 1078 return ret;
1076 1079
1077 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1080 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1078 1081
1079 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); 1082 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1080 return nr_shrunk; 1083 return nr_shrunk;
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1102 return 0; 1105 return 0;
1103 1106
1104 /* here we just find an inode that has the max nr. of objects */ 1107 /* here we just find an inode that has the max nr. of objects */
1105 spin_lock(&sbi->s_es_lru_lock); 1108 spin_lock(&sbi->s_es_lock);
1106 list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { 1109 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1107 inode_cnt++; 1110 inode_cnt++;
1108 if (max && max->i_es_all_nr < ei->i_es_all_nr) 1111 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1109 max = ei; 1112 max = ei;
1110 else if (!max) 1113 else if (!max)
1111 max = ei; 1114 max = ei;
1112 } 1115 }
1113 spin_unlock(&sbi->s_es_lru_lock); 1116 spin_unlock(&sbi->s_es_lock);
1114 1117
1115 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", 1118 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1116 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), 1119 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1117 percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); 1120 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1118 seq_printf(seq, " %lu/%lu cache hits/misses\n", 1121 seq_printf(seq, " %lu/%lu cache hits/misses\n",
1119 es_stats->es_stats_cache_hits, 1122 es_stats->es_stats_cache_hits,
1120 es_stats->es_stats_cache_misses); 1123 es_stats->es_stats_cache_misses);
1121 if (es_stats->es_stats_last_sorted != 0)
1122 seq_printf(seq, " %u ms last sorted interval\n",
1123 jiffies_to_msecs(jiffies -
1124 es_stats->es_stats_last_sorted));
1125 if (inode_cnt) 1124 if (inode_cnt)
1126 seq_printf(seq, " %d inodes on lru list\n", inode_cnt); 1125 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1127 1126
1128 seq_printf(seq, "average:\n %llu us scan time\n", 1127 seq_printf(seq, "average:\n %llu us scan time\n",
1129 div_u64(es_stats->es_stats_scan_time, 1000)); 1128 div_u64(es_stats->es_stats_scan_time, 1000));
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1132 seq_printf(seq, 1131 seq_printf(seq,
1133 "maximum:\n %lu inode (%u objects, %u reclaimable)\n" 1132 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1134 " %llu us max scan time\n", 1133 " %llu us max scan time\n",
1135 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, 1134 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1136 div_u64(es_stats->es_stats_max_scan_time, 1000)); 1135 div_u64(es_stats->es_stats_max_scan_time, 1000));
1137 1136
1138 return 0; 1137 return 0;
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1181{ 1180{
1182 int err; 1181 int err;
1183 1182
1184 INIT_LIST_HEAD(&sbi->s_es_lru); 1183 /* Make sure we have enough bits for physical block number */
1185 spin_lock_init(&sbi->s_es_lru_lock); 1184 BUILD_BUG_ON(ES_SHIFT < 48);
1186 sbi->s_es_stats.es_stats_last_sorted = 0; 1185 INIT_LIST_HEAD(&sbi->s_es_list);
1186 sbi->s_es_nr_inode = 0;
1187 spin_lock_init(&sbi->s_es_lock);
1187 sbi->s_es_stats.es_stats_shrunk = 0; 1188 sbi->s_es_stats.es_stats_shrunk = 0;
1188 sbi->s_es_stats.es_stats_cache_hits = 0; 1189 sbi->s_es_stats.es_stats_cache_hits = 0;
1189 sbi->s_es_stats.es_stats_cache_misses = 0; 1190 sbi->s_es_stats.es_stats_cache_misses = 0;
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1192 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); 1193 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1193 if (err) 1194 if (err)
1194 return err; 1195 return err;
1195 err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); 1196 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1196 if (err) 1197 if (err)
1197 goto err1; 1198 goto err1;
1198 1199
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1210 return 0; 1211 return 0;
1211 1212
1212err2: 1213err2:
1213 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); 1214 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1214err1: 1215err1:
1215 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 1216 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1216 return err; 1217 return err;
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1221 if (sbi->s_proc) 1222 if (sbi->s_proc)
1222 remove_proc_entry("es_shrinker_info", sbi->s_proc); 1223 remove_proc_entry("es_shrinker_info", sbi->s_proc);
1223 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); 1224 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1224 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); 1225 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1225 unregister_shrinker(&sbi->s_es_shrinker); 1226 unregister_shrinker(&sbi->s_es_shrinker);
1226} 1227}
1227 1228
1228void ext4_es_lru_add(struct inode *inode) 1229/*
1230 * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
1231 * most *nr_to_scan extents, update *nr_to_scan accordingly.
1232 *
1233 * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
1234 * Increment *nr_shrunk by the number of reclaimed extents. Also update
1235 * ei->i_es_shrink_lblk to where we should continue scanning.
1236 */
1237static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1238 int *nr_to_scan, int *nr_shrunk)
1229{ 1239{
1230 struct ext4_inode_info *ei = EXT4_I(inode); 1240 struct inode *inode = &ei->vfs_inode;
1231 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1241 struct ext4_es_tree *tree = &ei->i_es_tree;
1232 1242 struct extent_status *es;
1233 ei->i_touch_when = jiffies; 1243 struct rb_node *node;
1234
1235 if (!list_empty(&ei->i_es_lru))
1236 return;
1237 1244
1238 spin_lock(&sbi->s_es_lru_lock); 1245 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1239 if (list_empty(&ei->i_es_lru)) 1246 if (!es)
1240 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); 1247 goto out_wrap;
1241 spin_unlock(&sbi->s_es_lru_lock); 1248 node = &es->rb_node;
1242} 1249 while (*nr_to_scan > 0) {
1250 if (es->es_lblk > end) {
1251 ei->i_es_shrink_lblk = end + 1;
1252 return 0;
1253 }
1243 1254
1244void ext4_es_lru_del(struct inode *inode) 1255 (*nr_to_scan)--;
1245{ 1256 node = rb_next(&es->rb_node);
1246 struct ext4_inode_info *ei = EXT4_I(inode); 1257 /*
1247 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1258 * We can't reclaim delayed extent from status tree because
1259 * fiemap, bigallic, and seek_data/hole need to use it.
1260 */
1261 if (ext4_es_is_delayed(es))
1262 goto next;
1263 if (ext4_es_is_referenced(es)) {
1264 ext4_es_clear_referenced(es);
1265 goto next;
1266 }
1248 1267
1249 spin_lock(&sbi->s_es_lru_lock); 1268 rb_erase(&es->rb_node, &tree->root);
1250 if (!list_empty(&ei->i_es_lru)) 1269 ext4_es_free_extent(inode, es);
1251 list_del_init(&ei->i_es_lru); 1270 (*nr_shrunk)++;
1252 spin_unlock(&sbi->s_es_lru_lock); 1271next:
1272 if (!node)
1273 goto out_wrap;
1274 es = rb_entry(node, struct extent_status, rb_node);
1275 }
1276 ei->i_es_shrink_lblk = es->es_lblk;
1277 return 1;
1278out_wrap:
1279 ei->i_es_shrink_lblk = 0;
1280 return 0;
1253} 1281}
1254 1282
1255static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 1283static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1256 int nr_to_scan)
1257{ 1284{
1258 struct inode *inode = &ei->vfs_inode; 1285 struct inode *inode = &ei->vfs_inode;
1259 struct ext4_es_tree *tree = &ei->i_es_tree; 1286 int nr_shrunk = 0;
1260 struct rb_node *node; 1287 ext4_lblk_t start = ei->i_es_shrink_lblk;
1261 struct extent_status *es;
1262 unsigned long nr_shrunk = 0;
1263 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 1288 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1264 DEFAULT_RATELIMIT_BURST); 1289 DEFAULT_RATELIMIT_BURST);
1265 1290
1266 if (ei->i_es_lru_nr == 0) 1291 if (ei->i_es_shk_nr == 0)
1267 return 0; 1292 return 0;
1268 1293
1269 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && 1294 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1270 __ratelimit(&_rs)) 1295 __ratelimit(&_rs))
1271 ext4_warning(inode->i_sb, "forced shrink of precached extents"); 1296 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1272 1297
1273 node = rb_first(&tree->root); 1298 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1274 while (node != NULL) { 1299 start != 0)
1275 es = rb_entry(node, struct extent_status, rb_node); 1300 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1276 node = rb_next(&es->rb_node); 1301
1277 /* 1302 ei->i_es_tree.cache_es = NULL;
1278 * We can't reclaim delayed extent from status tree because
1279 * fiemap, bigallic, and seek_data/hole need to use it.
1280 */
1281 if (!ext4_es_is_delayed(es)) {
1282 rb_erase(&es->rb_node, &tree->root);
1283 ext4_es_free_extent(inode, es);
1284 nr_shrunk++;
1285 if (--nr_to_scan == 0)
1286 break;
1287 }
1288 }
1289 tree->cache_es = NULL;
1290 return nr_shrunk; 1303 return nr_shrunk;
1291} 1304}