aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-12 17:58:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 21:54:09 -0500
commitcb731d6c62bbc2f890b08ea3d0386d5dad887326 (patch)
tree1c597a1018d8258585aa65b3c4872a9c5d177d46 /mm
parent4101b624352fddb5ed72e7a1b6f8be8cffaa20fa (diff)
vmscan: per memory cgroup slab shrinkers
This patch adds SHRINKER_MEMCG_AWARE flag. If a shrinker has this flag set, it will be called per memory cgroup. The memory cgroup to scan objects from is passed in shrink_control->memcg. If the memory cgroup is NULL, a memcg aware shrinker is supposed to scan objects from the global list. Unaware shrinkers are only called on global pressure with memcg=NULL. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c11
-rw-r--r--mm/vmscan.c85
3 files changed, 66 insertions, 32 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 095c1f96fbec..3c2a1a8286ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -352,7 +352,7 @@ struct mem_cgroup {
352}; 352};
353 353
354#ifdef CONFIG_MEMCG_KMEM 354#ifdef CONFIG_MEMCG_KMEM
355static bool memcg_kmem_is_active(struct mem_cgroup *memcg) 355bool memcg_kmem_is_active(struct mem_cgroup *memcg)
356{ 356{
357 return memcg->kmemcg_id >= 0; 357 return memcg->kmemcg_id >= 0;
358} 358}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index feb803bf3443..1a735fad2a13 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access)
242 * Only call shrink_node_slabs here (which would also shrink 242 * Only call shrink_node_slabs here (which would also shrink
243 * other caches) if access is not potentially fatal. 243 * other caches) if access is not potentially fatal.
244 */ 244 */
245 if (access) { 245 if (access)
246 int nr; 246 drop_slab_node(page_to_nid(p));
247 int nid = page_to_nid(p);
248 do {
249 nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
250 if (page_count(p) == 1)
251 break;
252 } while (nr > 10);
253 }
254} 247}
255EXPORT_SYMBOL_GPL(shake_page); 248EXPORT_SYMBOL_GPL(shake_page);
256 249
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8e645ee52045..803886b8e353 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);
232 232
233#define SHRINK_BATCH 128 233#define SHRINK_BATCH 128
234 234
235static unsigned long shrink_slabs(struct shrink_control *shrinkctl, 235static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
236 struct shrinker *shrinker, 236 struct shrinker *shrinker,
237 unsigned long nr_scanned, 237 unsigned long nr_scanned,
238 unsigned long nr_eligible) 238 unsigned long nr_eligible)
239{ 239{
240 unsigned long freed = 0; 240 unsigned long freed = 0;
241 unsigned long long delta; 241 unsigned long long delta;
@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
344} 344}
345 345
346/** 346/**
347 * shrink_node_slabs - shrink slab caches of a given node 347 * shrink_slab - shrink slab caches
348 * @gfp_mask: allocation context 348 * @gfp_mask: allocation context
349 * @nid: node whose slab caches to target 349 * @nid: node whose slab caches to target
350 * @memcg: memory cgroup whose slab caches to target
350 * @nr_scanned: pressure numerator 351 * @nr_scanned: pressure numerator
351 * @nr_eligible: pressure denominator 352 * @nr_eligible: pressure denominator
352 * 353 *
@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
355 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, 356 * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
356 * unaware shrinkers will receive a node id of 0 instead. 357 * unaware shrinkers will receive a node id of 0 instead.
357 * 358 *
359 * @memcg specifies the memory cgroup to target. If it is not NULL,
360 * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
361 * objects from the memory cgroup specified. Otherwise all shrinkers
362 * are called, and memcg aware shrinkers are supposed to scan the
363 * global list then.
364 *
358 * @nr_scanned and @nr_eligible form a ratio that indicate how much of 365 * @nr_scanned and @nr_eligible form a ratio that indicate how much of
359 * the available objects should be scanned. Page reclaim for example 366 * the available objects should be scanned. Page reclaim for example
360 * passes the number of pages scanned and the number of pages on the 367 * passes the number of pages scanned and the number of pages on the
@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
365 * 372 *
366 * Returns the number of reclaimed slab objects. 373 * Returns the number of reclaimed slab objects.
367 */ 374 */
368unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, 375static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
369 unsigned long nr_scanned, 376 struct mem_cgroup *memcg,
370 unsigned long nr_eligible) 377 unsigned long nr_scanned,
378 unsigned long nr_eligible)
371{ 379{
372 struct shrinker *shrinker; 380 struct shrinker *shrinker;
373 unsigned long freed = 0; 381 unsigned long freed = 0;
374 382
383 if (memcg && !memcg_kmem_is_active(memcg))
384 return 0;
385
375 if (nr_scanned == 0) 386 if (nr_scanned == 0)
376 nr_scanned = SWAP_CLUSTER_MAX; 387 nr_scanned = SWAP_CLUSTER_MAX;
377 388
@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
390 struct shrink_control sc = { 401 struct shrink_control sc = {
391 .gfp_mask = gfp_mask, 402 .gfp_mask = gfp_mask,
392 .nid = nid, 403 .nid = nid,
404 .memcg = memcg,
393 }; 405 };
394 406
407 if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
408 continue;
409
395 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) 410 if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
396 sc.nid = 0; 411 sc.nid = 0;
397 412
398 freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); 413 freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
399 } 414 }
400 415
401 up_read(&shrinker_rwsem); 416 up_read(&shrinker_rwsem);
@@ -404,6 +419,29 @@ out:
404 return freed; 419 return freed;
405} 420}
406 421
422void drop_slab_node(int nid)
423{
424 unsigned long freed;
425
426 do {
427 struct mem_cgroup *memcg = NULL;
428
429 freed = 0;
430 do {
431 freed += shrink_slab(GFP_KERNEL, nid, memcg,
432 1000, 1000);
433 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
434 } while (freed > 10);
435}
436
437void drop_slab(void)
438{
439 int nid;
440
441 for_each_online_node(nid)
442 drop_slab_node(nid);
443}
444
407static inline int is_page_cache_freeable(struct page *page) 445static inline int is_page_cache_freeable(struct page *page)
408{ 446{
409 /* 447 /*
@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
2276static bool shrink_zone(struct zone *zone, struct scan_control *sc, 2314static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2277 bool is_classzone) 2315 bool is_classzone)
2278{ 2316{
2317 struct reclaim_state *reclaim_state = current->reclaim_state;
2279 unsigned long nr_reclaimed, nr_scanned; 2318 unsigned long nr_reclaimed, nr_scanned;
2280 bool reclaimable = false; 2319 bool reclaimable = false;
2281 2320
@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2294 memcg = mem_cgroup_iter(root, NULL, &reclaim); 2333 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2295 do { 2334 do {
2296 unsigned long lru_pages; 2335 unsigned long lru_pages;
2336 unsigned long scanned;
2297 struct lruvec *lruvec; 2337 struct lruvec *lruvec;
2298 int swappiness; 2338 int swappiness;
2299 2339
@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2305 2345
2306 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2346 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2307 swappiness = mem_cgroup_swappiness(memcg); 2347 swappiness = mem_cgroup_swappiness(memcg);
2348 scanned = sc->nr_scanned;
2308 2349
2309 shrink_lruvec(lruvec, swappiness, sc, &lru_pages); 2350 shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
2310 zone_lru_pages += lru_pages; 2351 zone_lru_pages += lru_pages;
2311 2352
2353 if (memcg && is_classzone)
2354 shrink_slab(sc->gfp_mask, zone_to_nid(zone),
2355 memcg, sc->nr_scanned - scanned,
2356 lru_pages);
2357
2312 /* 2358 /*
2313 * Direct reclaim and kswapd have to scan all memory 2359 * Direct reclaim and kswapd have to scan all memory
2314 * cgroups to fulfill the overall scan target for the 2360 * cgroups to fulfill the overall scan target for the
@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2330 * Shrink the slab caches in the same proportion that 2376 * Shrink the slab caches in the same proportion that
2331 * the eligible LRU pages were scanned. 2377 * the eligible LRU pages were scanned.
2332 */ 2378 */
2333 if (global_reclaim(sc) && is_classzone) { 2379 if (global_reclaim(sc) && is_classzone)
2334 struct reclaim_state *reclaim_state; 2380 shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
2335 2381 sc->nr_scanned - nr_scanned,
2336 shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), 2382 zone_lru_pages);
2337 sc->nr_scanned - nr_scanned, 2383
2338 zone_lru_pages); 2384 if (reclaim_state) {
2339 2385 sc->nr_reclaimed += reclaim_state->reclaimed_slab;
2340 reclaim_state = current->reclaim_state; 2386 reclaim_state->reclaimed_slab = 0;
2341 if (reclaim_state) {
2342 sc->nr_reclaimed +=
2343 reclaim_state->reclaimed_slab;
2344 reclaim_state->reclaimed_slab = 0;
2345 }
2346 } 2387 }
2347 2388
2348 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, 2389 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,