diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2009-01-07 21:08:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:08 -0500 |
commit | 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 (patch) | |
tree | a56edaa680c7c338a5a3043aa24897d7f668b6c9 | |
parent | 549927620b04a8f8073ce2ee2a8977f209af2ee5 (diff) |
memcg: add inactive_anon_is_low()
The inactive_anon_is_low() is key component of active/inactive anon
balancing on reclaim. However current inactive_anon_is_low() function
only consider global reclaim.
Therefore, we need following ugly scan_global_lru() condition.
if (lru == LRU_ACTIVE_ANON &&
(!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
shrink_active_list(nr_to_scan, zone, sc, priority, file);
return 0;
it cause that memcg reclaim always deactivate pages when shrink_list() is
called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive
anon balancing of memcgroup.
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Pekka Enberg" <penberg@cs.helsinki.fi>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 9 | ||||
-rw-r--r-- | mm/memcontrol.c | 46 | ||||
-rw-r--r-- | mm/vmscan.c | 37 |
3 files changed, 77 insertions, 15 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 59ac95a64508..aad9377c9828 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, | |||
100 | 100 | ||
101 | extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, | 101 | extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, |
102 | int priority, enum lru_list lru); | 102 | int priority, enum lru_list lru); |
103 | int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, | ||
104 | struct zone *zone); | ||
103 | 105 | ||
104 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 106 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
105 | extern int do_swap_account; | 107 | extern int do_swap_account; |
@@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) | |||
251 | { | 253 | { |
252 | return false; | 254 | return false; |
253 | } | 255 | } |
256 | |||
257 | static inline int | ||
258 | mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | ||
259 | { | ||
260 | return 1; | ||
261 | } | ||
262 | |||
254 | #endif /* CONFIG_CGROUP_MEM_CONT */ | 263 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
255 | 264 | ||
256 | #endif /* _LINUX_MEMCONTROL_H */ | 265 | #endif /* _LINUX_MEMCONTROL_H */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 457d671029b8..6611328460e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -156,6 +156,9 @@ struct mem_cgroup { | |||
156 | unsigned long last_oom_jiffies; | 156 | unsigned long last_oom_jiffies; |
157 | int obsolete; | 157 | int obsolete; |
158 | atomic_t refcnt; | 158 | atomic_t refcnt; |
159 | |||
160 | unsigned int inactive_ratio; | ||
161 | |||
159 | /* | 162 | /* |
160 | * statistics. This must be placed at the end of memcg. | 163 | * statistics. This must be placed at the end of memcg. |
161 | */ | 164 | */ |
@@ -431,6 +434,20 @@ long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, | |||
431 | return (nr_pages >> priority); | 434 | return (nr_pages >> priority); |
432 | } | 435 | } |
433 | 436 | ||
437 | int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | ||
438 | { | ||
439 | unsigned long active; | ||
440 | unsigned long inactive; | ||
441 | |||
442 | inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON); | ||
443 | active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON); | ||
444 | |||
445 | if (inactive * memcg->inactive_ratio < active) | ||
446 | return 1; | ||
447 | |||
448 | return 0; | ||
449 | } | ||
450 | |||
434 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 451 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
435 | struct list_head *dst, | 452 | struct list_head *dst, |
436 | unsigned long *scanned, int order, | 453 | unsigned long *scanned, int order, |
@@ -1360,6 +1377,29 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
1360 | return 0; | 1377 | return 0; |
1361 | } | 1378 | } |
1362 | 1379 | ||
1380 | /* | ||
1381 | * The inactive anon list should be small enough that the VM never has to | ||
1382 | * do too much work, but large enough that each inactive page has a chance | ||
1383 | * to be referenced again before it is swapped out. | ||
1384 | * | ||
1385 | * this calculation is straightforward porting from | ||
1386 | * page_alloc.c::setup_per_zone_inactive_ratio(). | ||
1387 | * it describe more detail. | ||
1388 | */ | ||
1389 | static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg) | ||
1390 | { | ||
1391 | unsigned int gb, ratio; | ||
1392 | |||
1393 | gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30; | ||
1394 | if (gb) | ||
1395 | ratio = int_sqrt(10 * gb); | ||
1396 | else | ||
1397 | ratio = 1; | ||
1398 | |||
1399 | memcg->inactive_ratio = ratio; | ||
1400 | |||
1401 | } | ||
1402 | |||
1363 | static DEFINE_MUTEX(set_limit_mutex); | 1403 | static DEFINE_MUTEX(set_limit_mutex); |
1364 | 1404 | ||
1365 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | 1405 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
@@ -1398,6 +1438,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1398 | GFP_KERNEL, false); | 1438 | GFP_KERNEL, false); |
1399 | if (!progress) retry_count--; | 1439 | if (!progress) retry_count--; |
1400 | } | 1440 | } |
1441 | |||
1442 | if (!ret) | ||
1443 | mem_cgroup_set_inactive_ratio(memcg); | ||
1444 | |||
1401 | return ret; | 1445 | return ret; |
1402 | } | 1446 | } |
1403 | 1447 | ||
@@ -1982,7 +2026,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1982 | res_counter_init(&mem->res, NULL); | 2026 | res_counter_init(&mem->res, NULL); |
1983 | res_counter_init(&mem->memsw, NULL); | 2027 | res_counter_init(&mem->memsw, NULL); |
1984 | } | 2028 | } |
1985 | 2029 | mem_cgroup_set_inactive_ratio(mem); | |
1986 | mem->last_scanned_child = NULL; | 2030 | mem->last_scanned_child = NULL; |
1987 | 2031 | ||
1988 | return &mem->css; | 2032 | return &mem->css; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e2b31a522a66..b2bc06bffcfb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1310,14 +1310,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1310 | pagevec_release(&pvec); | 1310 | pagevec_release(&pvec); |
1311 | } | 1311 | } |
1312 | 1312 | ||
1313 | /** | 1313 | static int inactive_anon_is_low_global(struct zone *zone) |
1314 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | ||
1315 | * @zone: zone to check | ||
1316 | * | ||
1317 | * Returns true if the zone does not have enough inactive anon pages, | ||
1318 | * meaning some active anon pages need to be deactivated. | ||
1319 | */ | ||
1320 | static int inactive_anon_is_low(struct zone *zone) | ||
1321 | { | 1314 | { |
1322 | unsigned long active, inactive; | 1315 | unsigned long active, inactive; |
1323 | 1316 | ||
@@ -1330,6 +1323,25 @@ static int inactive_anon_is_low(struct zone *zone) | |||
1330 | return 0; | 1323 | return 0; |
1331 | } | 1324 | } |
1332 | 1325 | ||
1326 | /** | ||
1327 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | ||
1328 | * @zone: zone to check | ||
1329 | * @sc: scan control of this context | ||
1330 | * | ||
1331 | * Returns true if the zone does not have enough inactive anon pages, | ||
1332 | * meaning some active anon pages need to be deactivated. | ||
1333 | */ | ||
1334 | static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | ||
1335 | { | ||
1336 | int low; | ||
1337 | |||
1338 | if (scan_global_lru(sc)) | ||
1339 | low = inactive_anon_is_low_global(zone); | ||
1340 | else | ||
1341 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); | ||
1342 | return low; | ||
1343 | } | ||
1344 | |||
1333 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1345 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1334 | struct zone *zone, struct scan_control *sc, int priority) | 1346 | struct zone *zone, struct scan_control *sc, int priority) |
1335 | { | 1347 | { |
@@ -1340,8 +1352,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | |||
1340 | return 0; | 1352 | return 0; |
1341 | } | 1353 | } |
1342 | 1354 | ||
1343 | if (lru == LRU_ACTIVE_ANON && | 1355 | if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { |
1344 | (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { | ||
1345 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1356 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1346 | return 0; | 1357 | return 0; |
1347 | } | 1358 | } |
@@ -1509,9 +1520,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1509 | * Even if we did not try to evict anon pages at all, we want to | 1520 | * Even if we did not try to evict anon pages at all, we want to |
1510 | * rebalance the anon lru active/inactive ratio. | 1521 | * rebalance the anon lru active/inactive ratio. |
1511 | */ | 1522 | */ |
1512 | if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) | 1523 | if (inactive_anon_is_low(zone, sc)) |
1513 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1514 | else if (!scan_global_lru(sc)) | ||
1515 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1524 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1516 | 1525 | ||
1517 | throttle_vm_writeout(sc->gfp_mask); | 1526 | throttle_vm_writeout(sc->gfp_mask); |
@@ -1807,7 +1816,7 @@ loop_again: | |||
1807 | * Do some background aging of the anon list, to give | 1816 | * Do some background aging of the anon list, to give |
1808 | * pages a chance to be referenced before reclaiming. | 1817 | * pages a chance to be referenced before reclaiming. |
1809 | */ | 1818 | */ |
1810 | if (inactive_anon_is_low(zone)) | 1819 | if (inactive_anon_is_low(zone, &sc)) |
1811 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | 1820 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
1812 | &sc, priority, 0); | 1821 | &sc, priority, 0); |
1813 | 1822 | ||