aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-12 17:58:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 21:54:09 -0500
commitcb731d6c62bbc2f890b08ea3d0386d5dad887326 (patch)
tree1c597a1018d8258585aa65b3c4872a9c5d177d46
parent4101b624352fddb5ed72e7a1b6f8be8cffaa20fa (diff)
vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag set, it will be called per memory cgroup. The memory cgroup to scan objects from is passed in shrink_control->memcg. If the memory cgroup is NULL, a memcg aware shrinker is supposed to scan objects from the global list. Unaware shrinkers are only called on global pressure with memcg=NULL. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/drop_caches.c14
-rw-r--r--include/linux/memcontrol.h7
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/shrinker.h6
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c11
-rw-r--r--mm/vmscan.c85
7 files changed, 80 insertions, 50 deletions
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 2bc2c87f35e7..5718cb9f7273 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
37 iput(toput_inode); 37 iput(toput_inode);
38} 38}
39 39
40static void drop_slab(void)
41{
42 int nr_objects;
43
44 do {
45 int nid;
46
47 nr_objects = 0;
48 for_each_online_node(nid)
49 nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
50 1000, 1000);
51 } while (nr_objects > 10);
52}
53
54int drop_caches_sysctl_handler(struct ctl_table *table, int write, 40int drop_caches_sysctl_handler(struct ctl_table *table, int write,
55 void __user *buffer, size_t *length, loff_t *ppos) 41 void __user *buffer, size_t *length, loff_t *ppos)
56{ 42{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6cfd934c7c9b..54992fe0959f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void)
413 return static_key_false(&memcg_kmem_enabled_key); 413 return static_key_false(&memcg_kmem_enabled_key);
414} 414}
415 415
416bool memcg_kmem_is_active(struct mem_cgroup *memcg);
417
416/* 418/*
417 * In general, we'll do everything in our power to not incur in any overhead 419 * In general, we'll do everything in our power to not incur in any overhead
418 * for non-memcg users for the kmem functions. Not even a function call, if we 420 * for non-memcg users for the kmem functions. Not even a function call, if we
@@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void)
542 return false; 544 return false;
543} 545}
544 546
547static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
548{
549 return false;
550}
551
545static inline bool 552static inline bool
546memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) 553memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
547{ 554{
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a4d24f3c5430..af4ff88a11e0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
2168 void __user *, size_t *, loff_t *); 2168 void __user *, size_t *, loff_t *);
2169#endif 2169#endif
2170 2170
2171unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, 2171void drop_slab(void);
2172 unsigned long nr_scanned, 2172void drop_slab_node(int nid);
2173 unsigned long nr_eligible);
2174 2173
2175#ifndef CONFIG_MMU 2174#ifndef CONFIG_MMU
2176#define randomize_va_space 0 2175#define randomize_va_space 0
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index f4aee75f00b1..4fcacd915d45 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -20,6 +20,9 @@ struct shrink_control {
20 20
21 /* current node being shrunk (for NUMA aware shrinkers) */ 21 /* current node being shrunk (for NUMA aware shrinkers) */
22 int nid; 22 int nid;
23
24 /* current memcg being shrunk (for memcg aware shrinkers) */
25 struct mem_cgroup *memcg;
23}; 26};
24 27
25#define SHRINK_STOP (~0UL) 28#define SHRINK_STOP (~0UL)
@@ -61,7 +64,8 @@ struct shrinker {
61#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ 64#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
62 65
63/* Flags */ 66/* Flags */
64#define SHRINKER_NUMA_AWARE (1 << 0) 67#define SHRINKER_NUMA_AWARE (1 << 0)
68#define SHRINKER_MEMCG_AWARE (1 << 1)
65 69
66extern int register_shrinker(struct shrinker *); 70extern int register_shrinker(struct shrinker *);
67extern void unregister_shrinker(struct shrinker *); 71extern void unregister_shrinker(struct shrinker *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 095c1f96fbec..3c2a1a8286ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -352,7 +352,7 @@ struct mem_cgroup {
352}; 352};
353 353
354#ifdef CONFIG_MEMCG_KMEM 354#ifdef CONFIG_MEMCG_KMEM
355static bool memcg_kmem_is_active(struct mem_cgroup *memcg) 355bool memcg_kmem_is_active(struct mem_cgroup *memcg)
356{ 356{
357 return memcg->kmemcg_id >= 0; 357 return memcg->kmemcg_id >= 0;
358} 358}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index feb803bf3443..1a735fad2a13 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access)
242 * Only call shrink_node_slabs here (which would also shrink 242 * Only call shrink_node_slabs here (which would also shrink
243 * other caches) if access is not potentially fatal. 243 * other caches) if access is not potentially fatal.
244 */ 244 */
245 if (access) { 245 if (access)
246 int nr; 246 drop_slab_node(page_to_nid(p));
247 int nid = page_to_nid(p);
248 do {
249 nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
250 if (page_count(p) == 1)
251 break;
252 } while (nr > 10);
253 }
254} 247}
255EXPORT_SYMBOL_GPL(shake_page); 248EXPORT_SYMBOL_GPL(shake_page);
256 249
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8e645ee52045..803886b8e353 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);
232 232
233#define SHRINK_BATCH 128 233#define SHRINK_BATCH 128
234 234
235static unsigned long shrink_slabs(struct shrink_control *shrinkctl, 235static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
236 struct shrinker *shrinker, 236 struct shrinker *shrinker,
237 unsigned long nr_scanned, 237 unsigned long nr_scanned,
238 unsigned long nr_eligible) 238 unsigned long nr_eligible)
239{ 239{
240 unsigned long freed = 0; 240 unsigned long freed = 0;
241 unsigned long long delta; 241 unsigned long long delta;
@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
344} 344}
345 345
346/** 346/**
347 * shrink_node_slabs - shrink slab caches of a given node 347 * shrink_slab - shrink slab caches
348 * @gfp_mask: allocation context 348 * @gfp_mask: allocation context
349 * @nid: node whose slab caches to target 349 * @nid: node whose slab caches to target
350 * @memcg: memory cgroup whose slab caches to target
350 * @nr_scanned: pressure numerator 351 * @nr_scanned: pressure numerator
351 * @nr_eligible: pressure denominator 352 * @nr_eligible: pressure denominator
352 * 353 *
@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
355 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, 356 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
356 * unaware shrinkers will receive a node id of 0 instead. 357 * unaware shrinkers will receive a node id of 0 instead.
357 * 358 *
359 * @memcg specifies the memory cgroup to target. If it is not NULL,
360 * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
361 * objects from the memory cgroup specified. Otherwise all shrinkers
362 * are called, and memcg aware shrinkers are supposed to scan the
363 * global list then.
364 *
358 * @nr_scanned and @nr_eligible form a ratio that indicate how much of 365 * @nr_scanned and @nr_eligible form a ratio that indicate how much of
359 * the available objects should be scanned. Page reclaim for example 366 * the available objects should be scanned. Page reclaim for example
360 * passes the number of pages scanned and the number of pages on the 367 * passes the number of pages scanned and the number of pages on the
@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
365 * 372 *
366 * Returns the number of reclaimed slab objects. 373 * Returns the number of reclaimed slab objects.
367 */ 374 */
368unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, 375static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
369 unsigned long nr_scanned, 376 struct mem_cgroup *memcg,
370 unsigned long nr_eligible) 377 unsigned long nr_scanned,
378 unsigned long nr_eligible)
371{ 379{
372 struct shrinker *shrinker; 380 struct shrinker *shrinker;
373 unsigned long freed = 0; 381 unsigned long freed = 0;
374 382
383 if (memcg && !memcg_kmem_is_active(memcg))
384 return 0;
385
375 if (nr_scanned == 0) 386 if (nr_scanned == 0)
376 nr_scanned = SWAP_CLUSTER_MAX; 387 nr_scanned = SWAP_CLUSTER_MAX;
377 388
@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
390 struct shrink_control sc = { 401 struct shrink_control sc = {
391 .gfp_mask = gfp_mask, 402 .gfp_mask = gfp_mask,
392 .nid = nid, 403 .nid = nid,
404 .memcg = memcg,
393 }; 405 };
394 406
407 if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
408 continue;
409
395 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 410 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
396 sc.nid = 0; 411 sc.nid = 0;
397 412
398 freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); 413 freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
399 } 414 }
400 415
401 up_read(&shrinker_rwsem); 416 up_read(&shrinker_rwsem);
@@ -404,6 +419,29 @@ out:
404 return freed; 419 return freed;
405} 420}
406 421
422void drop_slab_node(int nid)
423{
424 unsigned long freed;
425
426 do {
427 struct mem_cgroup *memcg = NULL;
428
429 freed = 0;
430 do {
431 freed += shrink_slab(GFP_KERNEL, nid, memcg,
432 1000, 1000);
433 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
434 } while (freed > 10);
435}
436
437void drop_slab(void)
438{
439 int nid;
440
441 for_each_online_node(nid)
442 drop_slab_node(nid);
443}
444
407static inline int is_page_cache_freeable(struct page *page) 445static inline int is_page_cache_freeable(struct page *page)
408{ 446{
409 /* 447 /*
@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
2276static bool shrink_zone(struct zone *zone, struct scan_control *sc, 2314static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2277 bool is_classzone) 2315 bool is_classzone)
2278{ 2316{
2317 struct reclaim_state *reclaim_state = current->reclaim_state;
2279 unsigned long nr_reclaimed, nr_scanned; 2318 unsigned long nr_reclaimed, nr_scanned;
2280 bool reclaimable = false; 2319 bool reclaimable = false;
2281 2320
@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2294 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2333 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2295 do { 2334 do {
2296 unsigned long lru_pages; 2335 unsigned long lru_pages;
2336 unsigned long scanned;
2297 struct lruvec *lruvec; 2337 struct lruvec *lruvec;
2298 int swappiness; 2338 int swappiness;
2299 2339
@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2305 2345
2306 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2346 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2307 swappiness = mem_cgroup_swappiness(memcg); 2347 swappiness = mem_cgroup_swappiness(memcg);
2348 scanned = sc->nr_scanned;
2308 2349
2309 shrink_lruvec(lruvec, swappiness, sc, &lru_pages); 2350 shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
2310 zone_lru_pages += lru_pages; 2351 zone_lru_pages += lru_pages;
2311 2352
2353 if (memcg && is_classzone)
2354 shrink_slab(sc->gfp_mask, zone_to_nid(zone),
2355 memcg, sc->nr_scanned - scanned,
2356 lru_pages);
2357
2312 /* 2358 /*
2313 * Direct reclaim and kswapd have to scan all memory 2359 * Direct reclaim and kswapd have to scan all memory
2314 * cgroups to fulfill the overall scan target for the 2360 * cgroups to fulfill the overall scan target for the
@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2330 * Shrink the slab caches in the same proportion that 2376 * Shrink the slab caches in the same proportion that
2331 * the eligible LRU pages were scanned. 2377 * the eligible LRU pages were scanned.
2332 */ 2378 */
2333 if (global_reclaim(sc) && is_classzone) { 2379 if (global_reclaim(sc) && is_classzone)
2334 struct reclaim_state *reclaim_state; 2380 shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
2335 2381 sc->nr_scanned - nr_scanned,
2336 shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), 2382 zone_lru_pages);
2337 sc->nr_scanned - nr_scanned, 2383
2338 zone_lru_pages); 2384 if (reclaim_state) {
2339 2385 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
2340 reclaim_state = current->reclaim_state; 2386 reclaim_state->reclaimed_slab = 0;
2341 if (reclaim_state) {
2342 sc->nr_reclaimed +=
2343 reclaim_state->reclaimed_slab;
2344 reclaim_state->reclaimed_slab = 0;
2345 }
2346 } 2387 }
2347 2388
2348 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, 2389 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,