aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorYing Han <yinghan@google.com>2011-05-26 19:25:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:35 -0400
commit0ae5e89c60c9eb87da36a2614836bc434b0ec2ad (patch)
tree0d509fd83ac7e7d2f52dfcbba769c43aeeb68b5f /mm
parentf042e707ee671e4beb5389abeb9a1819a2cf5532 (diff)
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the cgroup. It breaks memory isolation since one cgroup can end up reclaiming pages from another cgroup. Instead we should rely on memcg-aware target reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under memory pressure. In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. This patch is the first step to skip shrink_zone() if soft_limit reclaim does enough work. This is part of the effort which tries to reduce reclaiming pages in global LRU in memcg. The per-memcg background reclaim patchset further enhances the per-cgroup targetting reclaim, which I should have V4 posted shortly. Try running multiple memory intensive workloads within seperate memcgs. Watch the counters of soft_steal in memory.stat. $ cat /dev/cgroup/A/memory.stat | grep 'soft' soft_steal 240000 soft_scan 240000 total_soft_steal 240000 total_soft_scan 240000 This patch: In the global background reclaim, we do soft reclaim before scanning the per-zone LRU. However, the return value is ignored. We would like to skip shrink_zone() if soft_limit reclaim does enough work. Also, we need to make the memory pressure balanced across per-memcg zones, like the logic vm-core. This patch is the first step where we start with counting the nr_scanned and nr_reclaimed from soft_limit reclaim into the global scan_control. Signed-off-by: Ying Han <yinghan@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c29
-rw-r--r--mm/vmscan.c16
2 files changed, 33 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc259926c170..e41a6c26f1e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1433static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, 1433static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1434 struct zone *zone, 1434 struct zone *zone,
1435 gfp_t gfp_mask, 1435 gfp_t gfp_mask,
1436 unsigned long reclaim_options) 1436 unsigned long reclaim_options,
1437 unsigned long *total_scanned)
1437{ 1438{
1438 struct mem_cgroup *victim; 1439 struct mem_cgroup *victim;
1439 int ret, total = 0; 1440 int ret, total = 0;
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1442 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1443 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1443 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1444 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1444 unsigned long excess; 1445 unsigned long excess;
1446 unsigned long nr_scanned;
1445 1447
1446 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1448 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1447 1449
@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1484 continue; 1486 continue;
1485 } 1487 }
1486 /* we use swappiness of local cgroup */ 1488 /* we use swappiness of local cgroup */
1487 if (check_soft) 1489 if (check_soft) {
1488 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1490 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1489 noswap, get_swappiness(victim), zone); 1491 noswap, get_swappiness(victim), zone,
1490 else 1492 &nr_scanned);
1493 *total_scanned += nr_scanned;
1494 } else
1491 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1495 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1492 noswap, get_swappiness(victim)); 1496 noswap, get_swappiness(victim));
1493 css_put(&victim->css); 1497 css_put(&victim->css);
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1928 return CHARGE_WOULDBLOCK; 1932 return CHARGE_WOULDBLOCK;
1929 1933
1930 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, 1934 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1931 gfp_mask, flags); 1935 gfp_mask, flags, NULL);
1932 if (mem_cgroup_margin(mem_over_limit) >= nr_pages) 1936 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
1933 return CHARGE_RETRY; 1937 return CHARGE_RETRY;
1934 /* 1938 /*
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
3211 break; 3215 break;
3212 3216
3213 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, 3217 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
3214 MEM_CGROUP_RECLAIM_SHRINK); 3218 MEM_CGROUP_RECLAIM_SHRINK,
3219 NULL);
3215 curusage = res_counter_read_u64(&memcg->res, RES_USAGE); 3220 curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
3216 /* Usage is reduced ? */ 3221 /* Usage is reduced ? */
3217 if (curusage >= oldusage) 3222 if (curusage >= oldusage)
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
3271 3276
3272 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, 3277 mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
3273 MEM_CGROUP_RECLAIM_NOSWAP | 3278 MEM_CGROUP_RECLAIM_NOSWAP |
3274 MEM_CGROUP_RECLAIM_SHRINK); 3279 MEM_CGROUP_RECLAIM_SHRINK,
3280 NULL);
3275 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 3281 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
3276 /* Usage is reduced ? */ 3282 /* Usage is reduced ? */
3277 if (curusage >= oldusage) 3283 if (curusage >= oldusage)
@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
3285} 3291}
3286 3292
3287unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 3293unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3288 gfp_t gfp_mask) 3294 gfp_t gfp_mask,
3295 unsigned long *total_scanned)
3289{ 3296{
3290 unsigned long nr_reclaimed = 0; 3297 unsigned long nr_reclaimed = 0;
3291 struct mem_cgroup_per_zone *mz, *next_mz = NULL; 3298 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3293 int loop = 0; 3300 int loop = 0;
3294 struct mem_cgroup_tree_per_zone *mctz; 3301 struct mem_cgroup_tree_per_zone *mctz;
3295 unsigned long long excess; 3302 unsigned long long excess;
3303 unsigned long nr_scanned;
3296 3304
3297 if (order > 0) 3305 if (order > 0)
3298 return 0; 3306 return 0;
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3311 if (!mz) 3319 if (!mz)
3312 break; 3320 break;
3313 3321
3322 nr_scanned = 0;
3314 reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, 3323 reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
3315 gfp_mask, 3324 gfp_mask,
3316 MEM_CGROUP_RECLAIM_SOFT); 3325 MEM_CGROUP_RECLAIM_SOFT,
3326 &nr_scanned);
3317 nr_reclaimed += reclaimed; 3327 nr_reclaimed += reclaimed;
3328 *total_scanned += nr_scanned;
3318 spin_lock(&mctz->lock); 3329 spin_lock(&mctz->lock);
3319 3330
3320 /* 3331 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7e0116150dc7..9ce6ec84328e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2171unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 2171unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2172 gfp_t gfp_mask, bool noswap, 2172 gfp_t gfp_mask, bool noswap,
2173 unsigned int swappiness, 2173 unsigned int swappiness,
2174 struct zone *zone) 2174 struct zone *zone,
2175 unsigned long *nr_scanned)
2175{ 2176{
2176 struct scan_control sc = { 2177 struct scan_control sc = {
2178 .nr_scanned = 0,
2177 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2179 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2178 .may_writepage = !laptop_mode, 2180 .may_writepage = !laptop_mode,
2179 .may_unmap = 1, 2181 .may_unmap = 1,
@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2182 .order = 0, 2184 .order = 0,
2183 .mem_cgroup = mem, 2185 .mem_cgroup = mem,
2184 }; 2186 };
2187
2185 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2188 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2186 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2189 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
2187 2190
@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2200 2203
2201 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2204 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2202 2205
2206 *nr_scanned = sc.nr_scanned;
2203 return sc.nr_reclaimed; 2207 return sc.nr_reclaimed;
2204} 2208}
2205 2209
@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2347 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2351 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2348 unsigned long total_scanned; 2352 unsigned long total_scanned;
2349 struct reclaim_state *reclaim_state = current->reclaim_state; 2353 struct reclaim_state *reclaim_state = current->reclaim_state;
2354 unsigned long nr_soft_reclaimed;
2355 unsigned long nr_soft_scanned;
2350 struct scan_control sc = { 2356 struct scan_control sc = {
2351 .gfp_mask = GFP_KERNEL, 2357 .gfp_mask = GFP_KERNEL,
2352 .may_unmap = 1, 2358 .may_unmap = 1,
@@ -2439,11 +2445,15 @@ loop_again:
2439 2445
2440 sc.nr_scanned = 0; 2446 sc.nr_scanned = 0;
2441 2447
2448 nr_soft_scanned = 0;
2442 /* 2449 /*
2443 * Call soft limit reclaim before calling shrink_zone. 2450 * Call soft limit reclaim before calling shrink_zone.
2444 * For now we ignore the return value
2445 */ 2451 */
2446 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); 2452 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2453 order, sc.gfp_mask,
2454 &nr_soft_scanned);
2455 sc.nr_reclaimed += nr_soft_reclaimed;
2456 total_scanned += nr_soft_scanned;
2447 2457
2448 /* 2458 /*
2449 * We put equal pressure on every zone, unless 2459 * We put equal pressure on every zone, unless