diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:58:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:09 -0500 |
commit | cb731d6c62bbc2f890b08ea3d0386d5dad887326 (patch) | |
tree | 1c597a1018d8258585aa65b3c4872a9c5d177d46 | |
parent | 4101b624352fddb5ed72e7a1b6f8be8cffaa20fa (diff) |
vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag
set, it will be called per memory cgroup. The memory cgroup to scan
objects from is passed in shrink_control->memcg. If the memory cgroup
is NULL, a memcg aware shrinker is supposed to scan objects from the
global list. Unaware shrinkers are only called on global pressure with
memcg=NULL.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/drop_caches.c | 14 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 7 | ||||
-rw-r--r-- | include/linux/mm.h | 5 | ||||
-rw-r--r-- | include/linux/shrinker.h | 6 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 85 |
7 files changed, 80 insertions, 50 deletions
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 2bc2c87f35e7..5718cb9f7273 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
37 | iput(toput_inode); | 37 | iput(toput_inode); |
38 | } | 38 | } |
39 | 39 | ||
40 | static void drop_slab(void) | ||
41 | { | ||
42 | int nr_objects; | ||
43 | |||
44 | do { | ||
45 | int nid; | ||
46 | |||
47 | nr_objects = 0; | ||
48 | for_each_online_node(nid) | ||
49 | nr_objects += shrink_node_slabs(GFP_KERNEL, nid, | ||
50 | 1000, 1000); | ||
51 | } while (nr_objects > 10); | ||
52 | } | ||
53 | |||
54 | int drop_caches_sysctl_handler(struct ctl_table *table, int write, | 40 | int drop_caches_sysctl_handler(struct ctl_table *table, int write, |
55 | void __user *buffer, size_t *length, loff_t *ppos) | 41 | void __user *buffer, size_t *length, loff_t *ppos) |
56 | { | 42 | { |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6cfd934c7c9b..54992fe0959f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void) | |||
413 | return static_key_false(&memcg_kmem_enabled_key); | 413 | return static_key_false(&memcg_kmem_enabled_key); |
414 | } | 414 | } |
415 | 415 | ||
416 | bool memcg_kmem_is_active(struct mem_cgroup *memcg); | ||
417 | |||
416 | /* | 418 | /* |
417 | * In general, we'll do everything in our power to not incur in any overhead | 419 | * In general, we'll do everything in our power to not incur in any overhead |
418 | * for non-memcg users for the kmem functions. Not even a function call, if we | 420 | * for non-memcg users for the kmem functions. Not even a function call, if we |
@@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void) | |||
542 | return false; | 544 | return false; |
543 | } | 545 | } |
544 | 546 | ||
547 | static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) | ||
548 | { | ||
549 | return false; | ||
550 | } | ||
551 | |||
545 | static inline bool | 552 | static inline bool |
546 | memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) | 553 | memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) |
547 | { | 554 | { |
diff --git a/include/linux/mm.h b/include/linux/mm.h index a4d24f3c5430..af4ff88a11e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, | |||
2168 | void __user *, size_t *, loff_t *); | 2168 | void __user *, size_t *, loff_t *); |
2169 | #endif | 2169 | #endif |
2170 | 2170 | ||
2171 | unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | 2171 | void drop_slab(void); |
2172 | unsigned long nr_scanned, | 2172 | void drop_slab_node(int nid); |
2173 | unsigned long nr_eligible); | ||
2174 | 2173 | ||
2175 | #ifndef CONFIG_MMU | 2174 | #ifndef CONFIG_MMU |
2176 | #define randomize_va_space 0 | 2175 | #define randomize_va_space 0 |
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index f4aee75f00b1..4fcacd915d45 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h | |||
@@ -20,6 +20,9 @@ struct shrink_control { | |||
20 | 20 | ||
21 | /* current node being shrunk (for NUMA aware shrinkers) */ | 21 | /* current node being shrunk (for NUMA aware shrinkers) */ |
22 | int nid; | 22 | int nid; |
23 | |||
24 | /* current memcg being shrunk (for memcg aware shrinkers) */ | ||
25 | struct mem_cgroup *memcg; | ||
23 | }; | 26 | }; |
24 | 27 | ||
25 | #define SHRINK_STOP (~0UL) | 28 | #define SHRINK_STOP (~0UL) |
@@ -61,7 +64,8 @@ struct shrinker { | |||
61 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ | 64 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ |
62 | 65 | ||
63 | /* Flags */ | 66 | /* Flags */ |
64 | #define SHRINKER_NUMA_AWARE (1 << 0) | 67 | #define SHRINKER_NUMA_AWARE (1 << 0) |
68 | #define SHRINKER_MEMCG_AWARE (1 << 1) | ||
65 | 69 | ||
66 | extern int register_shrinker(struct shrinker *); | 70 | extern int register_shrinker(struct shrinker *); |
67 | extern void unregister_shrinker(struct shrinker *); | 71 | extern void unregister_shrinker(struct shrinker *); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 095c1f96fbec..3c2a1a8286ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -352,7 +352,7 @@ struct mem_cgroup { | |||
352 | }; | 352 | }; |
353 | 353 | ||
354 | #ifdef CONFIG_MEMCG_KMEM | 354 | #ifdef CONFIG_MEMCG_KMEM |
355 | static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | 355 | bool memcg_kmem_is_active(struct mem_cgroup *memcg) |
356 | { | 356 | { |
357 | return memcg->kmemcg_id >= 0; | 357 | return memcg->kmemcg_id >= 0; |
358 | } | 358 | } |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index feb803bf3443..1a735fad2a13 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access) | |||
242 | * Only call shrink_node_slabs here (which would also shrink | 242 | * Only call shrink_node_slabs here (which would also shrink |
243 | * other caches) if access is not potentially fatal. | 243 | * other caches) if access is not potentially fatal. |
244 | */ | 244 | */ |
245 | if (access) { | 245 | if (access) |
246 | int nr; | 246 | drop_slab_node(page_to_nid(p)); |
247 | int nid = page_to_nid(p); | ||
248 | do { | ||
249 | nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000); | ||
250 | if (page_count(p) == 1) | ||
251 | break; | ||
252 | } while (nr > 10); | ||
253 | } | ||
254 | } | 247 | } |
255 | EXPORT_SYMBOL_GPL(shake_page); | 248 | EXPORT_SYMBOL_GPL(shake_page); |
256 | 249 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e645ee52045..803886b8e353 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker); | |||
232 | 232 | ||
233 | #define SHRINK_BATCH 128 | 233 | #define SHRINK_BATCH 128 |
234 | 234 | ||
235 | static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | 235 | static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, |
236 | struct shrinker *shrinker, | 236 | struct shrinker *shrinker, |
237 | unsigned long nr_scanned, | 237 | unsigned long nr_scanned, |
238 | unsigned long nr_eligible) | 238 | unsigned long nr_eligible) |
239 | { | 239 | { |
240 | unsigned long freed = 0; | 240 | unsigned long freed = 0; |
241 | unsigned long long delta; | 241 | unsigned long long delta; |
@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
344 | } | 344 | } |
345 | 345 | ||
346 | /** | 346 | /** |
347 | * shrink_node_slabs - shrink slab caches of a given node | 347 | * shrink_slab - shrink slab caches |
348 | * @gfp_mask: allocation context | 348 | * @gfp_mask: allocation context |
349 | * @nid: node whose slab caches to target | 349 | * @nid: node whose slab caches to target |
350 | * @memcg: memory cgroup whose slab caches to target | ||
350 | * @nr_scanned: pressure numerator | 351 | * @nr_scanned: pressure numerator |
351 | * @nr_eligible: pressure denominator | 352 | * @nr_eligible: pressure denominator |
352 | * | 353 | * |
@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
355 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, | 356 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, |
356 | * unaware shrinkers will receive a node id of 0 instead. | 357 | * unaware shrinkers will receive a node id of 0 instead. |
357 | * | 358 | * |
359 | * @memcg specifies the memory cgroup to target. If it is not NULL, | ||
360 | * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan | ||
361 | * objects from the memory cgroup specified. Otherwise all shrinkers | ||
362 | * are called, and memcg aware shrinkers are supposed to scan the | ||
363 | * global list then. | ||
364 | * | ||
358 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of | 365 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of |
359 | * the available objects should be scanned. Page reclaim for example | 366 | * the available objects should be scanned. Page reclaim for example |
360 | * passes the number of pages scanned and the number of pages on the | 367 | * passes the number of pages scanned and the number of pages on the |
@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
365 | * | 372 | * |
366 | * Returns the number of reclaimed slab objects. | 373 | * Returns the number of reclaimed slab objects. |
367 | */ | 374 | */ |
368 | unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | 375 | static unsigned long shrink_slab(gfp_t gfp_mask, int nid, |
369 | unsigned long nr_scanned, | 376 | struct mem_cgroup *memcg, |
370 | unsigned long nr_eligible) | 377 | unsigned long nr_scanned, |
378 | unsigned long nr_eligible) | ||
371 | { | 379 | { |
372 | struct shrinker *shrinker; | 380 | struct shrinker *shrinker; |
373 | unsigned long freed = 0; | 381 | unsigned long freed = 0; |
374 | 382 | ||
383 | if (memcg && !memcg_kmem_is_active(memcg)) | ||
384 | return 0; | ||
385 | |||
375 | if (nr_scanned == 0) | 386 | if (nr_scanned == 0) |
376 | nr_scanned = SWAP_CLUSTER_MAX; | 387 | nr_scanned = SWAP_CLUSTER_MAX; |
377 | 388 | ||
@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | |||
390 | struct shrink_control sc = { | 401 | struct shrink_control sc = { |
391 | .gfp_mask = gfp_mask, | 402 | .gfp_mask = gfp_mask, |
392 | .nid = nid, | 403 | .nid = nid, |
404 | .memcg = memcg, | ||
393 | }; | 405 | }; |
394 | 406 | ||
407 | if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE)) | ||
408 | continue; | ||
409 | |||
395 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | 410 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
396 | sc.nid = 0; | 411 | sc.nid = 0; |
397 | 412 | ||
398 | freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); | 413 | freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible); |
399 | } | 414 | } |
400 | 415 | ||
401 | up_read(&shrinker_rwsem); | 416 | up_read(&shrinker_rwsem); |
@@ -404,6 +419,29 @@ out: | |||
404 | return freed; | 419 | return freed; |
405 | } | 420 | } |
406 | 421 | ||
422 | void drop_slab_node(int nid) | ||
423 | { | ||
424 | unsigned long freed; | ||
425 | |||
426 | do { | ||
427 | struct mem_cgroup *memcg = NULL; | ||
428 | |||
429 | freed = 0; | ||
430 | do { | ||
431 | freed += shrink_slab(GFP_KERNEL, nid, memcg, | ||
432 | 1000, 1000); | ||
433 | } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); | ||
434 | } while (freed > 10); | ||
435 | } | ||
436 | |||
437 | void drop_slab(void) | ||
438 | { | ||
439 | int nid; | ||
440 | |||
441 | for_each_online_node(nid) | ||
442 | drop_slab_node(nid); | ||
443 | } | ||
444 | |||
407 | static inline int is_page_cache_freeable(struct page *page) | 445 | static inline int is_page_cache_freeable(struct page *page) |
408 | { | 446 | { |
409 | /* | 447 | /* |
@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2276 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, | 2314 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, |
2277 | bool is_classzone) | 2315 | bool is_classzone) |
2278 | { | 2316 | { |
2317 | struct reclaim_state *reclaim_state = current->reclaim_state; | ||
2279 | unsigned long nr_reclaimed, nr_scanned; | 2318 | unsigned long nr_reclaimed, nr_scanned; |
2280 | bool reclaimable = false; | 2319 | bool reclaimable = false; |
2281 | 2320 | ||
@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2294 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 2333 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2295 | do { | 2334 | do { |
2296 | unsigned long lru_pages; | 2335 | unsigned long lru_pages; |
2336 | unsigned long scanned; | ||
2297 | struct lruvec *lruvec; | 2337 | struct lruvec *lruvec; |
2298 | int swappiness; | 2338 | int swappiness; |
2299 | 2339 | ||
@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2305 | 2345 | ||
2306 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2346 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2307 | swappiness = mem_cgroup_swappiness(memcg); | 2347 | swappiness = mem_cgroup_swappiness(memcg); |
2348 | scanned = sc->nr_scanned; | ||
2308 | 2349 | ||
2309 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); | 2350 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); |
2310 | zone_lru_pages += lru_pages; | 2351 | zone_lru_pages += lru_pages; |
2311 | 2352 | ||
2353 | if (memcg && is_classzone) | ||
2354 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), | ||
2355 | memcg, sc->nr_scanned - scanned, | ||
2356 | lru_pages); | ||
2357 | |||
2312 | /* | 2358 | /* |
2313 | * Direct reclaim and kswapd have to scan all memory | 2359 | * Direct reclaim and kswapd have to scan all memory |
2314 | * cgroups to fulfill the overall scan target for the | 2360 | * cgroups to fulfill the overall scan target for the |
@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2330 | * Shrink the slab caches in the same proportion that | 2376 | * Shrink the slab caches in the same proportion that |
2331 | * the eligible LRU pages were scanned. | 2377 | * the eligible LRU pages were scanned. |
2332 | */ | 2378 | */ |
2333 | if (global_reclaim(sc) && is_classzone) { | 2379 | if (global_reclaim(sc) && is_classzone) |
2334 | struct reclaim_state *reclaim_state; | 2380 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, |
2335 | 2381 | sc->nr_scanned - nr_scanned, | |
2336 | shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), | 2382 | zone_lru_pages); |
2337 | sc->nr_scanned - nr_scanned, | 2383 | |
2338 | zone_lru_pages); | 2384 | if (reclaim_state) { |
2339 | 2385 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | |
2340 | reclaim_state = current->reclaim_state; | 2386 | reclaim_state->reclaimed_slab = 0; |
2341 | if (reclaim_state) { | ||
2342 | sc->nr_reclaimed += | ||
2343 | reclaim_state->reclaimed_slab; | ||
2344 | reclaim_state->reclaimed_slab = 0; | ||
2345 | } | ||
2346 | } | 2387 | } |
2347 | 2388 | ||
2348 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2389 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, |