diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:58:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:09 -0500 |
commit | cb731d6c62bbc2f890b08ea3d0386d5dad887326 (patch) | |
tree | 1c597a1018d8258585aa65b3c4872a9c5d177d46 /mm | |
parent | 4101b624352fddb5ed72e7a1b6f8be8cffaa20fa (diff) |
vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag
set, it will be called per memory cgroup. The memory cgroup to scan
objects from is passed in shrink_control->memcg. If the memory cgroup
is NULL, a memcg aware shrinker is supposed to scan objects from the
global list. Unaware shrinkers are only called on global pressure with
memcg=NULL.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 11 | ||||
-rw-r--r-- | mm/vmscan.c | 85 |
3 files changed, 66 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 095c1f96fbec..3c2a1a8286ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -352,7 +352,7 @@ struct mem_cgroup { | |||
352 | }; | 352 | }; |
353 | 353 | ||
354 | #ifdef CONFIG_MEMCG_KMEM | 354 | #ifdef CONFIG_MEMCG_KMEM |
355 | static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | 355 | bool memcg_kmem_is_active(struct mem_cgroup *memcg) |
356 | { | 356 | { |
357 | return memcg->kmemcg_id >= 0; | 357 | return memcg->kmemcg_id >= 0; |
358 | } | 358 | } |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index feb803bf3443..1a735fad2a13 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access) | |||
242 | * Only call shrink_node_slabs here (which would also shrink | 242 | * Only call shrink_node_slabs here (which would also shrink |
243 | * other caches) if access is not potentially fatal. | 243 | * other caches) if access is not potentially fatal. |
244 | */ | 244 | */ |
245 | if (access) { | 245 | if (access) |
246 | int nr; | 246 | drop_slab_node(page_to_nid(p)); |
247 | int nid = page_to_nid(p); | ||
248 | do { | ||
249 | nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000); | ||
250 | if (page_count(p) == 1) | ||
251 | break; | ||
252 | } while (nr > 10); | ||
253 | } | ||
254 | } | 247 | } |
255 | EXPORT_SYMBOL_GPL(shake_page); | 248 | EXPORT_SYMBOL_GPL(shake_page); |
256 | 249 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e645ee52045..803886b8e353 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker); | |||
232 | 232 | ||
233 | #define SHRINK_BATCH 128 | 233 | #define SHRINK_BATCH 128 |
234 | 234 | ||
235 | static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | 235 | static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, |
236 | struct shrinker *shrinker, | 236 | struct shrinker *shrinker, |
237 | unsigned long nr_scanned, | 237 | unsigned long nr_scanned, |
238 | unsigned long nr_eligible) | 238 | unsigned long nr_eligible) |
239 | { | 239 | { |
240 | unsigned long freed = 0; | 240 | unsigned long freed = 0; |
241 | unsigned long long delta; | 241 | unsigned long long delta; |
@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
344 | } | 344 | } |
345 | 345 | ||
346 | /** | 346 | /** |
347 | * shrink_node_slabs - shrink slab caches of a given node | 347 | * shrink_slab - shrink slab caches |
348 | * @gfp_mask: allocation context | 348 | * @gfp_mask: allocation context |
349 | * @nid: node whose slab caches to target | 349 | * @nid: node whose slab caches to target |
350 | * @memcg: memory cgroup whose slab caches to target | ||
350 | * @nr_scanned: pressure numerator | 351 | * @nr_scanned: pressure numerator |
351 | * @nr_eligible: pressure denominator | 352 | * @nr_eligible: pressure denominator |
352 | * | 353 | * |
@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
355 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, | 356 | * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, |
356 | * unaware shrinkers will receive a node id of 0 instead. | 357 | * unaware shrinkers will receive a node id of 0 instead. |
357 | * | 358 | * |
359 | * @memcg specifies the memory cgroup to target. If it is not NULL, | ||
360 | * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan | ||
361 | * objects from the memory cgroup specified. Otherwise all shrinkers | ||
362 | * are called, and memcg aware shrinkers are supposed to scan the | ||
363 | * global list then. | ||
364 | * | ||
358 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of | 365 | * @nr_scanned and @nr_eligible form a ratio that indicate how much of |
359 | * the available objects should be scanned. Page reclaim for example | 366 | * the available objects should be scanned. Page reclaim for example |
360 | * passes the number of pages scanned and the number of pages on the | 367 | * passes the number of pages scanned and the number of pages on the |
@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, | |||
365 | * | 372 | * |
366 | * Returns the number of reclaimed slab objects. | 373 | * Returns the number of reclaimed slab objects. |
367 | */ | 374 | */ |
368 | unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | 375 | static unsigned long shrink_slab(gfp_t gfp_mask, int nid, |
369 | unsigned long nr_scanned, | 376 | struct mem_cgroup *memcg, |
370 | unsigned long nr_eligible) | 377 | unsigned long nr_scanned, |
378 | unsigned long nr_eligible) | ||
371 | { | 379 | { |
372 | struct shrinker *shrinker; | 380 | struct shrinker *shrinker; |
373 | unsigned long freed = 0; | 381 | unsigned long freed = 0; |
374 | 382 | ||
383 | if (memcg && !memcg_kmem_is_active(memcg)) | ||
384 | return 0; | ||
385 | |||
375 | if (nr_scanned == 0) | 386 | if (nr_scanned == 0) |
376 | nr_scanned = SWAP_CLUSTER_MAX; | 387 | nr_scanned = SWAP_CLUSTER_MAX; |
377 | 388 | ||
@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, | |||
390 | struct shrink_control sc = { | 401 | struct shrink_control sc = { |
391 | .gfp_mask = gfp_mask, | 402 | .gfp_mask = gfp_mask, |
392 | .nid = nid, | 403 | .nid = nid, |
404 | .memcg = memcg, | ||
393 | }; | 405 | }; |
394 | 406 | ||
407 | if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE)) | ||
408 | continue; | ||
409 | |||
395 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) | 410 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) |
396 | sc.nid = 0; | 411 | sc.nid = 0; |
397 | 412 | ||
398 | freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); | 413 | freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible); |
399 | } | 414 | } |
400 | 415 | ||
401 | up_read(&shrinker_rwsem); | 416 | up_read(&shrinker_rwsem); |
@@ -404,6 +419,29 @@ out: | |||
404 | return freed; | 419 | return freed; |
405 | } | 420 | } |
406 | 421 | ||
422 | void drop_slab_node(int nid) | ||
423 | { | ||
424 | unsigned long freed; | ||
425 | |||
426 | do { | ||
427 | struct mem_cgroup *memcg = NULL; | ||
428 | |||
429 | freed = 0; | ||
430 | do { | ||
431 | freed += shrink_slab(GFP_KERNEL, nid, memcg, | ||
432 | 1000, 1000); | ||
433 | } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); | ||
434 | } while (freed > 10); | ||
435 | } | ||
436 | |||
437 | void drop_slab(void) | ||
438 | { | ||
439 | int nid; | ||
440 | |||
441 | for_each_online_node(nid) | ||
442 | drop_slab_node(nid); | ||
443 | } | ||
444 | |||
407 | static inline int is_page_cache_freeable(struct page *page) | 445 | static inline int is_page_cache_freeable(struct page *page) |
408 | { | 446 | { |
409 | /* | 447 | /* |
@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2276 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, | 2314 | static bool shrink_zone(struct zone *zone, struct scan_control *sc, |
2277 | bool is_classzone) | 2315 | bool is_classzone) |
2278 | { | 2316 | { |
2317 | struct reclaim_state *reclaim_state = current->reclaim_state; | ||
2279 | unsigned long nr_reclaimed, nr_scanned; | 2318 | unsigned long nr_reclaimed, nr_scanned; |
2280 | bool reclaimable = false; | 2319 | bool reclaimable = false; |
2281 | 2320 | ||
@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2294 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 2333 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2295 | do { | 2334 | do { |
2296 | unsigned long lru_pages; | 2335 | unsigned long lru_pages; |
2336 | unsigned long scanned; | ||
2297 | struct lruvec *lruvec; | 2337 | struct lruvec *lruvec; |
2298 | int swappiness; | 2338 | int swappiness; |
2299 | 2339 | ||
@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2305 | 2345 | ||
2306 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2346 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2307 | swappiness = mem_cgroup_swappiness(memcg); | 2347 | swappiness = mem_cgroup_swappiness(memcg); |
2348 | scanned = sc->nr_scanned; | ||
2308 | 2349 | ||
2309 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); | 2350 | shrink_lruvec(lruvec, swappiness, sc, &lru_pages); |
2310 | zone_lru_pages += lru_pages; | 2351 | zone_lru_pages += lru_pages; |
2311 | 2352 | ||
2353 | if (memcg && is_classzone) | ||
2354 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), | ||
2355 | memcg, sc->nr_scanned - scanned, | ||
2356 | lru_pages); | ||
2357 | |||
2312 | /* | 2358 | /* |
2313 | * Direct reclaim and kswapd have to scan all memory | 2359 | * Direct reclaim and kswapd have to scan all memory |
2314 | * cgroups to fulfill the overall scan target for the | 2360 | * cgroups to fulfill the overall scan target for the |
@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2330 | * Shrink the slab caches in the same proportion that | 2376 | * Shrink the slab caches in the same proportion that |
2331 | * the eligible LRU pages were scanned. | 2377 | * the eligible LRU pages were scanned. |
2332 | */ | 2378 | */ |
2333 | if (global_reclaim(sc) && is_classzone) { | 2379 | if (global_reclaim(sc) && is_classzone) |
2334 | struct reclaim_state *reclaim_state; | 2380 | shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, |
2335 | 2381 | sc->nr_scanned - nr_scanned, | |
2336 | shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), | 2382 | zone_lru_pages); |
2337 | sc->nr_scanned - nr_scanned, | 2383 | |
2338 | zone_lru_pages); | 2384 | if (reclaim_state) { |
2339 | 2385 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | |
2340 | reclaim_state = current->reclaim_state; | 2386 | reclaim_state->reclaimed_slab = 0; |
2341 | if (reclaim_state) { | ||
2342 | sc->nr_reclaimed += | ||
2343 | reclaim_state->reclaimed_slab; | ||
2344 | reclaim_state->reclaimed_slab = 0; | ||
2345 | } | ||
2346 | } | 2387 | } |
2347 | 2388 | ||
2348 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2389 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, |