aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2009-01-07 21:08:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:08 -0500
commit14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 (patch)
treea56edaa680c7c338a5a3043aa24897d7f668b6c9
parent549927620b04a8f8073ce2ee2a8977f209af2ee5 (diff)
memcg: add inactive_anon_is_low()
The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: "Pekka Enberg" <penberg@cs.helsinki.fi> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Hugh Dickins <hugh@veritas.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h9
-rw-r--r--mm/memcontrol.c46
-rw-r--r--mm/vmscan.c37
3 files changed, 77 insertions, 15 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 59ac95a64508..aad9377c9828 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
100 100
101extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, 101extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
102 int priority, enum lru_list lru); 102 int priority, enum lru_list lru);
103int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
104 struct zone *zone);
103 105
104#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 106#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
105extern int do_swap_account; 107extern int do_swap_account;
@@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task)
251{ 253{
252 return false; 254 return false;
253} 255}
256
257static inline int
258mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
259{
260 return 1;
261}
262
254#endif /* CONFIG_CGROUP_MEM_CONT */ 263#endif /* CONFIG_CGROUP_MEM_CONT */
255 264
256#endif /* _LINUX_MEMCONTROL_H */ 265#endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 457d671029b8..6611328460e9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -156,6 +156,9 @@ struct mem_cgroup {
156 unsigned long last_oom_jiffies; 156 unsigned long last_oom_jiffies;
157 int obsolete; 157 int obsolete;
158 atomic_t refcnt; 158 atomic_t refcnt;
159
160 unsigned int inactive_ratio;
161
159 /* 162 /*
160 * statistics. This must be placed at the end of memcg. 163 * statistics. This must be placed at the end of memcg.
161 */ 164 */
@@ -431,6 +434,20 @@ long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
431 return (nr_pages >> priority); 434 return (nr_pages >> priority);
432} 435}
433 436
437int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
438{
439 unsigned long active;
440 unsigned long inactive;
441
442 inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
443 active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
444
445 if (inactive * memcg->inactive_ratio < active)
446 return 1;
447
448 return 0;
449}
450
434unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 451unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
435 struct list_head *dst, 452 struct list_head *dst,
436 unsigned long *scanned, int order, 453 unsigned long *scanned, int order,
@@ -1360,6 +1377,29 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
1360 return 0; 1377 return 0;
1361} 1378}
1362 1379
1380/*
1381 * The inactive anon list should be small enough that the VM never has to
1382 * do too much work, but large enough that each inactive page has a chance
1383 * to be referenced again before it is swapped out.
1384 *
1385 * this calculation is straightforward porting from
1386 * page_alloc.c::setup_per_zone_inactive_ratio().
1387 * it describe more detail.
1388 */
1389static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg)
1390{
1391 unsigned int gb, ratio;
1392
1393 gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30;
1394 if (gb)
1395 ratio = int_sqrt(10 * gb);
1396 else
1397 ratio = 1;
1398
1399 memcg->inactive_ratio = ratio;
1400
1401}
1402
1363static DEFINE_MUTEX(set_limit_mutex); 1403static DEFINE_MUTEX(set_limit_mutex);
1364 1404
1365static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, 1405static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
@@ -1398,6 +1438,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1398 GFP_KERNEL, false); 1438 GFP_KERNEL, false);
1399 if (!progress) retry_count--; 1439 if (!progress) retry_count--;
1400 } 1440 }
1441
1442 if (!ret)
1443 mem_cgroup_set_inactive_ratio(memcg);
1444
1401 return ret; 1445 return ret;
1402} 1446}
1403 1447
@@ -1982,7 +2026,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1982 res_counter_init(&mem->res, NULL); 2026 res_counter_init(&mem->res, NULL);
1983 res_counter_init(&mem->memsw, NULL); 2027 res_counter_init(&mem->memsw, NULL);
1984 } 2028 }
1985 2029 mem_cgroup_set_inactive_ratio(mem);
1986 mem->last_scanned_child = NULL; 2030 mem->last_scanned_child = NULL;
1987 2031
1988 return &mem->css; 2032 return &mem->css;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e2b31a522a66..b2bc06bffcfb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1310,14 +1310,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1310 pagevec_release(&pvec); 1310 pagevec_release(&pvec);
1311} 1311}
1312 1312
1313/** 1313static int inactive_anon_is_low_global(struct zone *zone)
1314 * inactive_anon_is_low - check if anonymous pages need to be deactivated
1315 * @zone: zone to check
1316 *
1317 * Returns true if the zone does not have enough inactive anon pages,
1318 * meaning some active anon pages need to be deactivated.
1319 */
1320static int inactive_anon_is_low(struct zone *zone)
1321{ 1314{
1322 unsigned long active, inactive; 1315 unsigned long active, inactive;
1323 1316
@@ -1330,6 +1323,25 @@ static int inactive_anon_is_low(struct zone *zone)
1330 return 0; 1323 return 0;
1331} 1324}
1332 1325
1326/**
1327 * inactive_anon_is_low - check if anonymous pages need to be deactivated
1328 * @zone: zone to check
1329 * @sc: scan control of this context
1330 *
1331 * Returns true if the zone does not have enough inactive anon pages,
1332 * meaning some active anon pages need to be deactivated.
1333 */
1334static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1335{
1336 int low;
1337
1338 if (scan_global_lru(sc))
1339 low = inactive_anon_is_low_global(zone);
1340 else
1341 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
1342 return low;
1343}
1344
1333static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1345static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1334 struct zone *zone, struct scan_control *sc, int priority) 1346 struct zone *zone, struct scan_control *sc, int priority)
1335{ 1347{
@@ -1340,8 +1352,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1340 return 0; 1352 return 0;
1341 } 1353 }
1342 1354
1343 if (lru == LRU_ACTIVE_ANON && 1355 if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
1344 (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
1345 shrink_active_list(nr_to_scan, zone, sc, priority, file); 1356 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1346 return 0; 1357 return 0;
1347 } 1358 }
@@ -1509,9 +1520,7 @@ static void shrink_zone(int priority, struct zone *zone,
1509 * Even if we did not try to evict anon pages at all, we want to 1520 * Even if we did not try to evict anon pages at all, we want to
1510 * rebalance the anon lru active/inactive ratio. 1521 * rebalance the anon lru active/inactive ratio.
1511 */ 1522 */
1512 if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) 1523 if (inactive_anon_is_low(zone, sc))
1513 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1514 else if (!scan_global_lru(sc))
1515 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 1524 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1516 1525
1517 throttle_vm_writeout(sc->gfp_mask); 1526 throttle_vm_writeout(sc->gfp_mask);
@@ -1807,7 +1816,7 @@ loop_again:
1807 * Do some background aging of the anon list, to give 1816 * Do some background aging of the anon list, to give
1808 * pages a chance to be referenced before reclaiming. 1817 * pages a chance to be referenced before reclaiming.
1809 */ 1818 */
1810 if (inactive_anon_is_low(zone)) 1819 if (inactive_anon_is_low(zone, &sc))
1811 shrink_active_list(SWAP_CLUSTER_MAX, zone, 1820 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1812 &sc, priority, 0); 1821 &sc, priority, 0);
1813 1822