diff options
author | Ying Han <yinghan@google.com> | 2011-05-26 19:25:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:35 -0400 |
commit | 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad (patch) | |
tree | 0d509fd83ac7e7d2f52dfcbba769c43aeeb68b5f /mm | |
parent | f042e707ee671e4beb5389abeb9a1819a2cf5532 (diff) |
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.
This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.
Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.
$ cat /dev/cgroup/A/memory.stat | grep 'soft'
soft_steal 240000
soft_scan 240000
total_soft_steal 240000
total_soft_scan 240000
This patch:
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored.
We would like to skip shrink_zone() if soft_limit reclaim does enough
work. Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core. This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 29 | ||||
-rw-r--r-- | mm/vmscan.c | 16 |
2 files changed, 33 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fc259926c170..e41a6c26f1e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
1433 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | 1433 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
1434 | struct zone *zone, | 1434 | struct zone *zone, |
1435 | gfp_t gfp_mask, | 1435 | gfp_t gfp_mask, |
1436 | unsigned long reclaim_options) | 1436 | unsigned long reclaim_options, |
1437 | unsigned long *total_scanned) | ||
1437 | { | 1438 | { |
1438 | struct mem_cgroup *victim; | 1439 | struct mem_cgroup *victim; |
1439 | int ret, total = 0; | 1440 | int ret, total = 0; |
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1442 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1443 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1443 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1444 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1444 | unsigned long excess; | 1445 | unsigned long excess; |
1446 | unsigned long nr_scanned; | ||
1445 | 1447 | ||
1446 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1448 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1447 | 1449 | ||
@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1484 | continue; | 1486 | continue; |
1485 | } | 1487 | } |
1486 | /* we use swappiness of local cgroup */ | 1488 | /* we use swappiness of local cgroup */ |
1487 | if (check_soft) | 1489 | if (check_soft) { |
1488 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1490 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1489 | noswap, get_swappiness(victim), zone); | 1491 | noswap, get_swappiness(victim), zone, |
1490 | else | 1492 | &nr_scanned); |
1493 | *total_scanned += nr_scanned; | ||
1494 | } else | ||
1491 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1495 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1492 | noswap, get_swappiness(victim)); | 1496 | noswap, get_swappiness(victim)); |
1493 | css_put(&victim->css); | 1497 | css_put(&victim->css); |
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1928 | return CHARGE_WOULDBLOCK; | 1932 | return CHARGE_WOULDBLOCK; |
1929 | 1933 | ||
1930 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1934 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1931 | gfp_mask, flags); | 1935 | gfp_mask, flags, NULL); |
1932 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) | 1936 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
1933 | return CHARGE_RETRY; | 1937 | return CHARGE_RETRY; |
1934 | /* | 1938 | /* |
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
3211 | break; | 3215 | break; |
3212 | 3216 | ||
3213 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3217 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3214 | MEM_CGROUP_RECLAIM_SHRINK); | 3218 | MEM_CGROUP_RECLAIM_SHRINK, |
3219 | NULL); | ||
3215 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 3220 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
3216 | /* Usage is reduced ? */ | 3221 | /* Usage is reduced ? */ |
3217 | if (curusage >= oldusage) | 3222 | if (curusage >= oldusage) |
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3271 | 3276 | ||
3272 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3277 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3273 | MEM_CGROUP_RECLAIM_NOSWAP | | 3278 | MEM_CGROUP_RECLAIM_NOSWAP | |
3274 | MEM_CGROUP_RECLAIM_SHRINK); | 3279 | MEM_CGROUP_RECLAIM_SHRINK, |
3280 | NULL); | ||
3275 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 3281 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3276 | /* Usage is reduced ? */ | 3282 | /* Usage is reduced ? */ |
3277 | if (curusage >= oldusage) | 3283 | if (curusage >= oldusage) |
@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3285 | } | 3291 | } |
3286 | 3292 | ||
3287 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 3293 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
3288 | gfp_t gfp_mask) | 3294 | gfp_t gfp_mask, |
3295 | unsigned long *total_scanned) | ||
3289 | { | 3296 | { |
3290 | unsigned long nr_reclaimed = 0; | 3297 | unsigned long nr_reclaimed = 0; |
3291 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | 3298 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; |
@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3293 | int loop = 0; | 3300 | int loop = 0; |
3294 | struct mem_cgroup_tree_per_zone *mctz; | 3301 | struct mem_cgroup_tree_per_zone *mctz; |
3295 | unsigned long long excess; | 3302 | unsigned long long excess; |
3303 | unsigned long nr_scanned; | ||
3296 | 3304 | ||
3297 | if (order > 0) | 3305 | if (order > 0) |
3298 | return 0; | 3306 | return 0; |
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3311 | if (!mz) | 3319 | if (!mz) |
3312 | break; | 3320 | break; |
3313 | 3321 | ||
3322 | nr_scanned = 0; | ||
3314 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, | 3323 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, |
3315 | gfp_mask, | 3324 | gfp_mask, |
3316 | MEM_CGROUP_RECLAIM_SOFT); | 3325 | MEM_CGROUP_RECLAIM_SOFT, |
3326 | &nr_scanned); | ||
3317 | nr_reclaimed += reclaimed; | 3327 | nr_reclaimed += reclaimed; |
3328 | *total_scanned += nr_scanned; | ||
3318 | spin_lock(&mctz->lock); | 3329 | spin_lock(&mctz->lock); |
3319 | 3330 | ||
3320 | /* | 3331 | /* |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7e0116150dc7..9ce6ec84328e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2171 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2171 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2172 | gfp_t gfp_mask, bool noswap, | 2172 | gfp_t gfp_mask, bool noswap, |
2173 | unsigned int swappiness, | 2173 | unsigned int swappiness, |
2174 | struct zone *zone) | 2174 | struct zone *zone, |
2175 | unsigned long *nr_scanned) | ||
2175 | { | 2176 | { |
2176 | struct scan_control sc = { | 2177 | struct scan_control sc = { |
2178 | .nr_scanned = 0, | ||
2177 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2179 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2178 | .may_writepage = !laptop_mode, | 2180 | .may_writepage = !laptop_mode, |
2179 | .may_unmap = 1, | 2181 | .may_unmap = 1, |
@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2182 | .order = 0, | 2184 | .order = 0, |
2183 | .mem_cgroup = mem, | 2185 | .mem_cgroup = mem, |
2184 | }; | 2186 | }; |
2187 | |||
2185 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2188 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2186 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2189 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
2187 | 2190 | ||
@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2200 | 2203 | ||
2201 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2204 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2202 | 2205 | ||
2206 | *nr_scanned = sc.nr_scanned; | ||
2203 | return sc.nr_reclaimed; | 2207 | return sc.nr_reclaimed; |
2204 | } | 2208 | } |
2205 | 2209 | ||
@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2347 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2351 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2348 | unsigned long total_scanned; | 2352 | unsigned long total_scanned; |
2349 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2353 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2354 | unsigned long nr_soft_reclaimed; | ||
2355 | unsigned long nr_soft_scanned; | ||
2350 | struct scan_control sc = { | 2356 | struct scan_control sc = { |
2351 | .gfp_mask = GFP_KERNEL, | 2357 | .gfp_mask = GFP_KERNEL, |
2352 | .may_unmap = 1, | 2358 | .may_unmap = 1, |
@@ -2439,11 +2445,15 @@ loop_again: | |||
2439 | 2445 | ||
2440 | sc.nr_scanned = 0; | 2446 | sc.nr_scanned = 0; |
2441 | 2447 | ||
2448 | nr_soft_scanned = 0; | ||
2442 | /* | 2449 | /* |
2443 | * Call soft limit reclaim before calling shrink_zone. | 2450 | * Call soft limit reclaim before calling shrink_zone. |
2444 | * For now we ignore the return value | ||
2445 | */ | 2451 | */ |
2446 | mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); | 2452 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, |
2453 | order, sc.gfp_mask, | ||
2454 | &nr_soft_scanned); | ||
2455 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
2456 | total_scanned += nr_soft_scanned; | ||
2447 | 2457 | ||
2448 | /* | 2458 | /* |
2449 | * We put equal pressure on every zone, unless | 2459 | * We put equal pressure on every zone, unless |