aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-11-25 11:51:23 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-11-25 11:51:23 -0500
commitdd4759255188771e60cf3455982959a1ba04f4eb (patch)
tree8338fbccf9ca744aa6da866a52de407d83818761 /fs/ext4
parentb0dea4c1651f3cdb6d17604fa473e72cb74cdc6b (diff)
ext4: limit number of scanned extents in status tree shrinker
Currently we scan extent status trees of inodes until we reclaim nr_to_scan extents. This can however require a lot of scanning when there are lots of delayed extents (as those cannot be reclaimed). Change shrinker to work as shrinkers are supposed to and *scan* only nr_to_scan extents regardless of how many extents did we actually reclaim. We however need to be careful and avoid scanning each status tree from the beginning - that could lead to a situation where we would not be able to reclaim anything at all when first nr_to_scan extents in the tree are always unreclaimable. We remember with each inode offset where we stopped scanning and continue from there when we next come across the inode. Note that we also need to update places calling __es_shrink() manually to pass reasonable nr_to_scan to have a chance of reclaiming anything and not just 1. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/extents_status.c91
-rw-r--r--fs/ext4/super.c1
3 files changed, 64 insertions, 33 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab6caf55f5bf..4186ec84f835 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -881,6 +881,9 @@ struct ext4_inode_info {
881 struct list_head i_es_list; 881 struct list_head i_es_list;
882 unsigned int i_es_all_nr; /* protected by i_es_lock */ 882 unsigned int i_es_all_nr; /* protected by i_es_lock */
883 unsigned int i_es_shk_nr; /* protected by i_es_lock */ 883 unsigned int i_es_shk_nr; /* protected by i_es_lock */
884 ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
885 extents to shrink. Protected by
886 i_es_lock */
884 887
885 /* ialloc */ 888 /* ialloc */
886 ext4_group_t i_last_alloc_group; 889 ext4_group_t i_last_alloc_group;
@@ -1321,7 +1324,7 @@ struct ext4_sb_info {
1321 1324
1322 /* Reclaim extents from extent status tree */ 1325 /* Reclaim extents from extent status tree */
1323 struct shrinker s_es_shrinker; 1326 struct shrinker s_es_shrinker;
1324 struct list_head s_es_list; 1327 struct list_head s_es_list; /* List of inodes with reclaimable extents */
1325 long s_es_nr_inode; 1328 long s_es_nr_inode;
1326 struct ext4_es_stats s_es_stats; 1329 struct ext4_es_stats s_es_stats;
1327 struct mb_cache *s_mb_cache; 1330 struct mb_cache *s_mb_cache;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index de2d9d8bf22f..8f2aac4006d2 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -147,8 +147,7 @@ static struct kmem_cache *ext4_es_cachep;
147static int __es_insert_extent(struct inode *inode, struct extent_status *newes); 147static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end); 149 ext4_lblk_t end);
150static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 150static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
151 int nr_to_scan);
152static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 151static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
153 struct ext4_inode_info *locked_ei); 152 struct ext4_inode_info *locked_ei);
154 153
@@ -716,7 +715,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
716retry: 715retry:
717 err = __es_insert_extent(inode, &newes); 716 err = __es_insert_extent(inode, &newes);
718 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), 717 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
719 1, EXT4_I(inode))) 718 128, EXT4_I(inode)))
720 goto retry; 719 goto retry;
721 if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) 720 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
722 err = 0; 721 err = 0;
@@ -874,7 +873,7 @@ retry:
874 es->es_len = orig_es.es_len; 873 es->es_len = orig_es.es_len;
875 if ((err == -ENOMEM) && 874 if ((err == -ENOMEM) &&
876 __es_shrink(EXT4_SB(inode->i_sb), 875 __es_shrink(EXT4_SB(inode->i_sb),
877 1, EXT4_I(inode))) 876 128, EXT4_I(inode)))
878 goto retry; 877 goto retry;
879 goto out; 878 goto out;
880 } 879 }
@@ -976,8 +975,6 @@ retry:
976 spin_lock(&sbi->s_es_lock); 975 spin_lock(&sbi->s_es_lock);
977 nr_to_walk = sbi->s_es_nr_inode; 976 nr_to_walk = sbi->s_es_nr_inode;
978 while (nr_to_walk-- > 0) { 977 while (nr_to_walk-- > 0) {
979 int shrunk;
980
981 if (list_empty(&sbi->s_es_list)) { 978 if (list_empty(&sbi->s_es_list)) {
982 spin_unlock(&sbi->s_es_lock); 979 spin_unlock(&sbi->s_es_lock);
983 goto out; 980 goto out;
@@ -985,7 +982,7 @@ retry:
985 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, 982 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
986 i_es_list); 983 i_es_list);
987 /* Move the inode to the tail */ 984 /* Move the inode to the tail */
988 list_move(&ei->i_es_list, sbi->s_es_list.prev); 985 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
989 986
990 /* 987 /*
991 * Normally we try hard to avoid shrinking precached inodes, 988 * Normally we try hard to avoid shrinking precached inodes,
@@ -1007,13 +1004,10 @@ retry:
1007 */ 1004 */
1008 spin_unlock(&sbi->s_es_lock); 1005 spin_unlock(&sbi->s_es_lock);
1009 1006
1010 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 1007 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
1011 write_unlock(&ei->i_es_lock); 1008 write_unlock(&ei->i_es_lock);
1012 1009
1013 nr_shrunk += shrunk; 1010 if (nr_to_scan <= 0)
1014 nr_to_scan -= shrunk;
1015
1016 if (nr_to_scan == 0)
1017 goto out; 1011 goto out;
1018 spin_lock(&sbi->s_es_lock); 1012 spin_lock(&sbi->s_es_lock);
1019 } 1013 }
@@ -1029,7 +1023,7 @@ retry:
1029 } 1023 }
1030 1024
1031 if (locked_ei && nr_shrunk == 0) 1025 if (locked_ei && nr_shrunk == 0)
1032 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); 1026 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1033 1027
1034out: 1028out:
1035 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 1029 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
@@ -1224,27 +1218,33 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1224 unregister_shrinker(&sbi->s_es_shrinker); 1218 unregister_shrinker(&sbi->s_es_shrinker);
1225} 1219}
1226 1220
1227static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 1221/*
1228 int nr_to_scan) 1222 * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
1223 * most *nr_to_scan extents, update *nr_to_scan accordingly.
1224 *
1225 * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
1226 * Increment *nr_shrunk by the number of reclaimed extents. Also update
1227 * ei->i_es_shrink_lblk to where we should continue scanning.
1228 */
1229static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1230 int *nr_to_scan, int *nr_shrunk)
1229{ 1231{
1230 struct inode *inode = &ei->vfs_inode; 1232 struct inode *inode = &ei->vfs_inode;
1231 struct ext4_es_tree *tree = &ei->i_es_tree; 1233 struct ext4_es_tree *tree = &ei->i_es_tree;
1232 struct rb_node *node;
1233 struct extent_status *es; 1234 struct extent_status *es;
1234 unsigned long nr_shrunk = 0; 1235 struct rb_node *node;
1235 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1236 DEFAULT_RATELIMIT_BURST);
1237
1238 if (ei->i_es_shk_nr == 0)
1239 return 0;
1240 1236
1241 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && 1237 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1242 __ratelimit(&_rs)) 1238 if (!es)
1243 ext4_warning(inode->i_sb, "forced shrink of precached extents"); 1239 goto out_wrap;
1240 node = &es->rb_node;
1241 while (*nr_to_scan > 0) {
1242 if (es->es_lblk > end) {
1243 ei->i_es_shrink_lblk = end + 1;
1244 return 0;
1245 }
1244 1246
1245 node = rb_first(&tree->root); 1247 (*nr_to_scan)--;
1246 while (node != NULL) {
1247 es = rb_entry(node, struct extent_status, rb_node);
1248 node = rb_next(&es->rb_node); 1248 node = rb_next(&es->rb_node);
1249 /* 1249 /*
1250 * We can't reclaim delayed extent from status tree because 1250 * We can't reclaim delayed extent from status tree because
@@ -1253,11 +1253,38 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1253 if (!ext4_es_is_delayed(es)) { 1253 if (!ext4_es_is_delayed(es)) {
1254 rb_erase(&es->rb_node, &tree->root); 1254 rb_erase(&es->rb_node, &tree->root);
1255 ext4_es_free_extent(inode, es); 1255 ext4_es_free_extent(inode, es);
1256 nr_shrunk++; 1256 (*nr_shrunk)++;
1257 if (--nr_to_scan == 0)
1258 break;
1259 } 1257 }
1258 if (!node)
1259 goto out_wrap;
1260 es = rb_entry(node, struct extent_status, rb_node);
1260 } 1261 }
1261 tree->cache_es = NULL; 1262 ei->i_es_shrink_lblk = es->es_lblk;
1263 return 1;
1264out_wrap:
1265 ei->i_es_shrink_lblk = 0;
1266 return 0;
1267}
1268
1269static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1270{
1271 struct inode *inode = &ei->vfs_inode;
1272 int nr_shrunk = 0;
1273 ext4_lblk_t start = ei->i_es_shrink_lblk;
1274 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1275 DEFAULT_RATELIMIT_BURST);
1276
1277 if (ei->i_es_shk_nr == 0)
1278 return 0;
1279
1280 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1281 __ratelimit(&_rs))
1282 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1283
1284 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1285 start != 0)
1286 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1287
1288 ei->i_es_tree.cache_es = NULL;
1262 return nr_shrunk; 1289 return nr_shrunk;
1263} 1290}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2a17f8b7adc..48318497e8e9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -874,6 +874,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
874 INIT_LIST_HEAD(&ei->i_es_list); 874 INIT_LIST_HEAD(&ei->i_es_list);
875 ei->i_es_all_nr = 0; 875 ei->i_es_all_nr = 0;
876 ei->i_es_shk_nr = 0; 876 ei->i_es_shk_nr = 0;
877 ei->i_es_shrink_lblk = 0;
877 ei->i_reserved_data_blocks = 0; 878 ei->i_reserved_data_blocks = 0;
878 ei->i_reserved_meta_blocks = 0; 879 ei->i_reserved_meta_blocks = 0;
879 ei->i_allocated_meta_blocks = 0; 880 ei->i_allocated_meta_blocks = 0;