diff options
author | Ying Han <yinghan@google.com> | 2011-05-26 19:25:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:35 -0400 |
commit | 0ae5e89c60c9eb87da36a2614836bc434b0ec2ad (patch) | |
tree | 0d509fd83ac7e7d2f52dfcbba769c43aeeb68b5f /mm/memcontrol.c | |
parent | f042e707ee671e4beb5389abeb9a1819a2cf5532 (diff) |
memcg: count the soft_limit reclaim in global background reclaim
The global kswapd scans per-zone LRU and reclaims pages regardless of the
cgroup. It breaks memory isolation since one cgroup can end up reclaiming
pages from another cgroup. Instead we should rely on memcg-aware target
reclaim including per-memcg kswapd and soft_limit hierarchical reclaim under
memory pressure.
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored. This patch is the first
step to skip shrink_zone() if soft_limit reclaim does enough work.
This is part of the effort which tries to reduce reclaiming pages in global
LRU in memcg. The per-memcg background reclaim patchset further enhances the
per-cgroup targetting reclaim, which I should have V4 posted shortly.
Try running multiple memory intensive workloads within seperate memcgs. Watch
the counters of soft_steal in memory.stat.
$ cat /dev/cgroup/A/memory.stat | grep 'soft'
soft_steal 240000
soft_scan 240000
total_soft_steal 240000
total_soft_scan 240000
This patch:
In the global background reclaim, we do soft reclaim before scanning the
per-zone LRU. However, the return value is ignored.
We would like to skip shrink_zone() if soft_limit reclaim does enough
work. Also, we need to make the memory pressure balanced across per-memcg
zones, like the logic vm-core. This patch is the first step where we
start with counting the nr_scanned and nr_reclaimed from soft_limit
reclaim into the global scan_control.
Signed-off-by: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fc259926c170..e41a6c26f1e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1433,7 +1433,8 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
1433 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | 1433 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
1434 | struct zone *zone, | 1434 | struct zone *zone, |
1435 | gfp_t gfp_mask, | 1435 | gfp_t gfp_mask, |
1436 | unsigned long reclaim_options) | 1436 | unsigned long reclaim_options, |
1437 | unsigned long *total_scanned) | ||
1437 | { | 1438 | { |
1438 | struct mem_cgroup *victim; | 1439 | struct mem_cgroup *victim; |
1439 | int ret, total = 0; | 1440 | int ret, total = 0; |
@@ -1442,6 +1443,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1442 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1443 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1443 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1444 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1444 | unsigned long excess; | 1445 | unsigned long excess; |
1446 | unsigned long nr_scanned; | ||
1445 | 1447 | ||
1446 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1448 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1447 | 1449 | ||
@@ -1484,10 +1486,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1484 | continue; | 1486 | continue; |
1485 | } | 1487 | } |
1486 | /* we use swappiness of local cgroup */ | 1488 | /* we use swappiness of local cgroup */ |
1487 | if (check_soft) | 1489 | if (check_soft) { |
1488 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1490 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1489 | noswap, get_swappiness(victim), zone); | 1491 | noswap, get_swappiness(victim), zone, |
1490 | else | 1492 | &nr_scanned); |
1493 | *total_scanned += nr_scanned; | ||
1494 | } else | ||
1491 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1495 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1492 | noswap, get_swappiness(victim)); | 1496 | noswap, get_swappiness(victim)); |
1493 | css_put(&victim->css); | 1497 | css_put(&victim->css); |
@@ -1928,7 +1932,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1928 | return CHARGE_WOULDBLOCK; | 1932 | return CHARGE_WOULDBLOCK; |
1929 | 1933 | ||
1930 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1934 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1931 | gfp_mask, flags); | 1935 | gfp_mask, flags, NULL); |
1932 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) | 1936 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
1933 | return CHARGE_RETRY; | 1937 | return CHARGE_RETRY; |
1934 | /* | 1938 | /* |
@@ -3211,7 +3215,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
3211 | break; | 3215 | break; |
3212 | 3216 | ||
3213 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3217 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3214 | MEM_CGROUP_RECLAIM_SHRINK); | 3218 | MEM_CGROUP_RECLAIM_SHRINK, |
3219 | NULL); | ||
3215 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 3220 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
3216 | /* Usage is reduced ? */ | 3221 | /* Usage is reduced ? */ |
3217 | if (curusage >= oldusage) | 3222 | if (curusage >= oldusage) |
@@ -3271,7 +3276,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3271 | 3276 | ||
3272 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, | 3277 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
3273 | MEM_CGROUP_RECLAIM_NOSWAP | | 3278 | MEM_CGROUP_RECLAIM_NOSWAP | |
3274 | MEM_CGROUP_RECLAIM_SHRINK); | 3279 | MEM_CGROUP_RECLAIM_SHRINK, |
3280 | NULL); | ||
3275 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 3281 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3276 | /* Usage is reduced ? */ | 3282 | /* Usage is reduced ? */ |
3277 | if (curusage >= oldusage) | 3283 | if (curusage >= oldusage) |
@@ -3285,7 +3291,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3285 | } | 3291 | } |
3286 | 3292 | ||
3287 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 3293 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
3288 | gfp_t gfp_mask) | 3294 | gfp_t gfp_mask, |
3295 | unsigned long *total_scanned) | ||
3289 | { | 3296 | { |
3290 | unsigned long nr_reclaimed = 0; | 3297 | unsigned long nr_reclaimed = 0; |
3291 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | 3298 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; |
@@ -3293,6 +3300,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3293 | int loop = 0; | 3300 | int loop = 0; |
3294 | struct mem_cgroup_tree_per_zone *mctz; | 3301 | struct mem_cgroup_tree_per_zone *mctz; |
3295 | unsigned long long excess; | 3302 | unsigned long long excess; |
3303 | unsigned long nr_scanned; | ||
3296 | 3304 | ||
3297 | if (order > 0) | 3305 | if (order > 0) |
3298 | return 0; | 3306 | return 0; |
@@ -3311,10 +3319,13 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3311 | if (!mz) | 3319 | if (!mz) |
3312 | break; | 3320 | break; |
3313 | 3321 | ||
3322 | nr_scanned = 0; | ||
3314 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, | 3323 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, |
3315 | gfp_mask, | 3324 | gfp_mask, |
3316 | MEM_CGROUP_RECLAIM_SOFT); | 3325 | MEM_CGROUP_RECLAIM_SOFT, |
3326 | &nr_scanned); | ||
3317 | nr_reclaimed += reclaimed; | 3327 | nr_reclaimed += reclaimed; |
3328 | *total_scanned += nr_scanned; | ||
3318 | spin_lock(&mctz->lock); | 3329 | spin_lock(&mctz->lock); |
3319 | 3330 | ||
3320 | /* | 3331 | /* |