diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:58:47 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:08 -0500 |
commit | 503c358cf1925853195ee39ec437e51138bbb7df (patch) | |
tree | 14aebe291975ec4353f21068990ebfec503ed63f /fs/super.c | |
parent | 10c1045f28e86ac90589a188f0be2d7a4347efdf (diff) |
list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker
support. That means when we hit the limit we will get ENOMEM w/o any
chance to recover. What we should do then is to call shrink_slab, which
would reclaim old inode/dentry caches from this cgroup. This is what
this patch set is intended to do.
Basically, it does two things. First, it introduces the notion of
per-memcg slab shrinker. A shrinker that wants to reclaim objects per
cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be
passed the memory cgroup to scan from in shrink_control->memcg. For
such shrinkers shrink_slab iterates over the whole cgroup subtree under
the target cgroup and calls the shrinker for each kmem-active memory
cgroup.
Secondly, this patch set makes the list_lru structure per-memcg. It's
done transparently to list_lru users - everything they have to do is to
tell list_lru_init that they want memcg-aware list_lru. Then the
list_lru will automatically distribute objects among per-memcg lists
basing on which cgroup the object is accounted to. This way to make FS
shrinkers (icache, dcache) memcg-aware we only need to make them use
memcg-aware list_lru, and this is what this patch set does.
As before, this patch set only enables per-memcg kmem reclaim when the
pressure goes from memory.limit, not from memory.kmem.limit. Handling
memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and
it is still unclear whether we will have this knob in the unified
hierarchy.
This patch (of 9):
NUMA aware slab shrinkers use the list_lru structure to distribute
objects coming from different NUMA nodes to different lists. Whenever
such a shrinker needs to count or scan objects from a particular node,
it issues commands like this:
count = list_lru_count_node(lru, sc->nid);
freed = list_lru_walk_node(lru, sc->nid, isolate_func,
isolate_arg, &sc->nr_to_scan);
where sc is an instance of the shrink_control structure passed to it
from vmscan.
To simplify this, let's add special list_lru functions to be used by
shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which
consolidate the nid and nr_to_scan arguments in the shrink_control
structure.
This will also allow us to avoid patching shrinkers that use list_lru
when we make shrink_slab() per-memcg - all we will have to do is extend
the shrink_control structure to include the target memcg and make
list_lru_shrink_{count,walk} handle this appropriately.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Suggested-by: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/super.c')
-rw-r--r-- | fs/super.c | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/fs/super.c b/fs/super.c index eae088f6aaae..4554ac257647 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -77,8 +77,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink, | |||
77 | if (sb->s_op->nr_cached_objects) | 77 | if (sb->s_op->nr_cached_objects) |
78 | fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); | 78 | fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); |
79 | 79 | ||
80 | inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); | 80 | inodes = list_lru_shrink_count(&sb->s_inode_lru, sc); |
81 | dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); | 81 | dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc); |
82 | total_objects = dentries + inodes + fs_objects + 1; | 82 | total_objects = dentries + inodes + fs_objects + 1; |
83 | if (!total_objects) | 83 | if (!total_objects) |
84 | total_objects = 1; | 84 | total_objects = 1; |
@@ -86,20 +86,20 @@ static unsigned long super_cache_scan(struct shrinker *shrink, | |||
86 | /* proportion the scan between the caches */ | 86 | /* proportion the scan between the caches */ |
87 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); | 87 | dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); |
88 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); | 88 | inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); |
89 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects); | ||
89 | 90 | ||
90 | /* | 91 | /* |
91 | * prune the dcache first as the icache is pinned by it, then | 92 | * prune the dcache first as the icache is pinned by it, then |
92 | * prune the icache, followed by the filesystem specific caches | 93 | * prune the icache, followed by the filesystem specific caches |
93 | */ | 94 | */ |
94 | freed = prune_dcache_sb(sb, dentries, sc->nid); | 95 | sc->nr_to_scan = dentries; |
95 | freed += prune_icache_sb(sb, inodes, sc->nid); | 96 | freed = prune_dcache_sb(sb, sc); |
97 | sc->nr_to_scan = inodes; | ||
98 | freed += prune_icache_sb(sb, sc); | ||
96 | 99 | ||
97 | if (fs_objects) { | 100 | if (fs_objects) |
98 | fs_objects = mult_frac(sc->nr_to_scan, fs_objects, | ||
99 | total_objects); | ||
100 | freed += sb->s_op->free_cached_objects(sb, fs_objects, | 101 | freed += sb->s_op->free_cached_objects(sb, fs_objects, |
101 | sc->nid); | 102 | sc->nid); |
102 | } | ||
103 | 103 | ||
104 | drop_super(sb); | 104 | drop_super(sb); |
105 | return freed; | 105 | return freed; |
@@ -118,17 +118,15 @@ static unsigned long super_cache_count(struct shrinker *shrink, | |||
118 | * scalability bottleneck. The counts could get updated | 118 | * scalability bottleneck. The counts could get updated |
119 | * between super_cache_count and super_cache_scan anyway. | 119 | * between super_cache_count and super_cache_scan anyway. |
120 | * Call to super_cache_count with shrinker_rwsem held | 120 | * Call to super_cache_count with shrinker_rwsem held |
121 | * ensures the safety of call to list_lru_count_node() and | 121 | * ensures the safety of call to list_lru_shrink_count() and |
122 | * s_op->nr_cached_objects(). | 122 | * s_op->nr_cached_objects(). |
123 | */ | 123 | */ |
124 | if (sb->s_op && sb->s_op->nr_cached_objects) | 124 | if (sb->s_op && sb->s_op->nr_cached_objects) |
125 | total_objects = sb->s_op->nr_cached_objects(sb, | 125 | total_objects = sb->s_op->nr_cached_objects(sb, |
126 | sc->nid); | 126 | sc->nid); |
127 | 127 | ||
128 | total_objects += list_lru_count_node(&sb->s_dentry_lru, | 128 | total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc); |
129 | sc->nid); | 129 | total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc); |
130 | total_objects += list_lru_count_node(&sb->s_inode_lru, | ||
131 | sc->nid); | ||
132 | 130 | ||
133 | total_objects = vfs_pressure_ratio(total_objects); | 131 | total_objects = vfs_pressure_ratio(total_objects); |
134 | return total_objects; | 132 | return total_objects; |