diff options
author | Ying Han <yinghan@google.com> | 2011-05-26 19:25:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:35 -0400 |
commit | 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad (patch) | |
tree | 0d509fd83ac7e7d2f52dfcbba769c43aeeb68b5f /mm/vmscan.c | |
parent | f042e707ee671e4beb5389abeb9a1819a2cf5532 (diff) |
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.
This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.
Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.
$ cat /dev/cgroup/A/memory.stat | grep 'soft'
soft_steal 240000
soft_scan 240000
total_soft_steal 240000
total_soft_scan 240000
This patch:
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored.
We would like to skip shrink_zone() if soft_limit reclaim does enough
work. Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core. This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7e0116150dc7..9ce6ec84328e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2171,9 +2171,11 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2171 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2171 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2172 | gfp_t gfp_mask, bool noswap, | 2172 | gfp_t gfp_mask, bool noswap, |
2173 | unsigned int swappiness, | 2173 | unsigned int swappiness, |
2174 | struct zone *zone) | 2174 | struct zone *zone, |
2175 | unsigned long *nr_scanned) | ||
2175 | { | 2176 | { |
2176 | struct scan_control sc = { | 2177 | struct scan_control sc = { |
2178 | .nr_scanned = 0, | ||
2177 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2179 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2178 | .may_writepage = !laptop_mode, | 2180 | .may_writepage = !laptop_mode, |
2179 | .may_unmap = 1, | 2181 | .may_unmap = 1, |
@@ -2182,6 +2184,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2182 | .order = 0, | 2184 | .order = 0, |
2183 | .mem_cgroup = mem, | 2185 | .mem_cgroup = mem, |
2184 | }; | 2186 | }; |
2187 | |||
2185 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2188 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2186 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2189 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
2187 | 2190 | ||
@@ -2200,6 +2203,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2200 | 2203 | ||
2201 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2204 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2202 | 2205 | ||
2206 | *nr_scanned = sc.nr_scanned; | ||
2203 | return sc.nr_reclaimed; | 2207 | return sc.nr_reclaimed; |
2204 | } | 2208 | } |
2205 | 2209 | ||
@@ -2347,6 +2351,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2347 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2351 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2348 | unsigned long total_scanned; | 2352 | unsigned long total_scanned; |
2349 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2353 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2354 | unsigned long nr_soft_reclaimed; | ||
2355 | unsigned long nr_soft_scanned; | ||
2350 | struct scan_control sc = { | 2356 | struct scan_control sc = { |
2351 | .gfp_mask = GFP_KERNEL, | 2357 | .gfp_mask = GFP_KERNEL, |
2352 | .may_unmap = 1, | 2358 | .may_unmap = 1, |
@@ -2439,11 +2445,15 @@ loop_again: | |||
2439 | 2445 | ||
2440 | sc.nr_scanned = 0; | 2446 | sc.nr_scanned = 0; |
2441 | 2447 | ||
2448 | nr_soft_scanned = 0; | ||
2442 | /* | 2449 | /* |
2443 | * Call soft limit reclaim before calling shrink_zone. | 2450 | * Call soft limit reclaim before calling shrink_zone. |
2444 | * For now we ignore the return value | ||
2445 | */ | 2451 | */ |
2446 | mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); | 2452 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, |
2453 | order, sc.gfp_mask, | ||
2454 | &nr_soft_scanned); | ||
2455 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
2456 | total_scanned += nr_soft_scanned; | ||
2447 | 2457 | ||
2448 | /* | 2458 | /* |
2449 | * We put equal pressure on every zone, unless | 2459 | * We put equal pressure on every zone, unless |