aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2009-01-07 21:08:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:08 -0500
commita7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 (patch)
tree4f3ffaa399fbc16003cc1787228f10543dc9c3ef
parent2733c06ac864ed40b9dfbbd5270f3f16949bd4a1 (diff)
memcg: swappiness
Currently, /proc/sys/vm/swappiness can change swappiness ratio for global reclaim. However, memcg reclaim doesn't have tuning parameter for itself. In general, the optimal swappiness depend on workload. (e.g. hpc workload need to low swappiness than the others.) Then, per cgroup swappiness improve administrator tunability. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Hugh Dickins <hugh@veritas.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/controllers/memory.txt9
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/memcontrol.c78
-rw-r--r--mm/vmscan.c7
4 files changed, 86 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index d71745cc2f00..e1501964df1e 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -314,6 +314,15 @@ will be charged as a new owner of it.
314 showing for better debug please see the code for meanings. 314 showing for better debug please see the code for meanings.
315 315
316 316
3175.3 swappiness
318 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
319
320 Following cgroup's swapiness can't be changed.
321 - root cgroup (uses /proc/sys/vm/swappiness).
322 - a cgroup which uses hierarchy and it has child cgroup.
323 - a cgroup which uses hierarchy and not the root of hierarchy.
324
325
3176. Hierarchy support 3266. Hierarchy support
318 327
319The memory controller supports a deep hierarchy and hierarchical accounting. 328The memory controller supports a deep hierarchy and hierarchical accounting.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index be938ce4895a..4ccca25d0f05 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page)
214extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 214extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
215 gfp_t gfp_mask); 215 gfp_t gfp_mask);
216extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 216extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
217 gfp_t gfp_mask, bool noswap); 217 gfp_t gfp_mask, bool noswap,
218 unsigned int swappiness);
218extern int __isolate_lru_page(struct page *page, int mode, int file); 219extern int __isolate_lru_page(struct page *page, int mode, int file);
219extern unsigned long shrink_all_memory(unsigned long nr_pages); 220extern unsigned long shrink_all_memory(unsigned long nr_pages);
220extern int vm_swappiness; 221extern int vm_swappiness;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 027c0dd7a83e..ab2ecbb95b8d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -164,6 +164,9 @@ struct mem_cgroup {
164 int obsolete; 164 int obsolete;
165 atomic_t refcnt; 165 atomic_t refcnt;
166 166
167 unsigned int swappiness;
168
169
167 unsigned int inactive_ratio; 170 unsigned int inactive_ratio;
168 171
169 /* 172 /*
@@ -636,6 +639,22 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
636 return false; 639 return false;
637} 640}
638 641
642static unsigned int get_swappiness(struct mem_cgroup *memcg)
643{
644 struct cgroup *cgrp = memcg->css.cgroup;
645 unsigned int swappiness;
646
647 /* root ? */
648 if (cgrp->parent == NULL)
649 return vm_swappiness;
650
651 spin_lock(&memcg->reclaim_param_lock);
652 swappiness = memcg->swappiness;
653 spin_unlock(&memcg->reclaim_param_lock);
654
655 return swappiness;
656}
657
639/* 658/*
640 * Dance down the hierarchy if needed to reclaim memory. We remember the 659 * Dance down the hierarchy if needed to reclaim memory. We remember the
641 * last child we reclaimed from, so that we don't end up penalizing 660 * last child we reclaimed from, so that we don't end up penalizing
@@ -656,7 +675,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
656 * but there might be left over accounting, even after children 675 * but there might be left over accounting, even after children
657 * have left. 676 * have left.
658 */ 677 */
659 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap); 678 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
679 get_swappiness(root_mem));
660 if (mem_cgroup_check_under_limit(root_mem)) 680 if (mem_cgroup_check_under_limit(root_mem))
661 return 0; 681 return 0;
662 if (!root_mem->use_hierarchy) 682 if (!root_mem->use_hierarchy)
@@ -672,7 +692,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
672 cgroup_unlock(); 692 cgroup_unlock();
673 continue; 693 continue;
674 } 694 }
675 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap); 695 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
696 get_swappiness(next_mem));
676 if (mem_cgroup_check_under_limit(root_mem)) 697 if (mem_cgroup_check_under_limit(root_mem))
677 return 0; 698 return 0;
678 cgroup_lock(); 699 cgroup_lock();
@@ -1400,7 +1421,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
1400 rcu_read_unlock(); 1421 rcu_read_unlock();
1401 1422
1402 do { 1423 do {
1403 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true); 1424 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true,
1425 get_swappiness(mem));
1404 progress += mem_cgroup_check_under_limit(mem); 1426 progress += mem_cgroup_check_under_limit(mem);
1405 } while (!progress && --retry); 1427 } while (!progress && --retry);
1406 1428
@@ -1468,7 +1490,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
1468 break; 1490 break;
1469 1491
1470 progress = try_to_free_mem_cgroup_pages(memcg, 1492 progress = try_to_free_mem_cgroup_pages(memcg,
1471 GFP_KERNEL, false); 1493 GFP_KERNEL,
1494 false,
1495 get_swappiness(memcg));
1472 if (!progress) retry_count--; 1496 if (!progress) retry_count--;
1473 } 1497 }
1474 1498
@@ -1512,7 +1536,8 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
1512 break; 1536 break;
1513 1537
1514 oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 1538 oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1515 try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true); 1539 try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true,
1540 get_swappiness(memcg));
1516 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 1541 curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
1517 if (curusage >= oldusage) 1542 if (curusage >= oldusage)
1518 retry_count--; 1543 retry_count--;
@@ -1643,8 +1668,8 @@ try_to_free:
1643 ret = -EINTR; 1668 ret = -EINTR;
1644 goto out; 1669 goto out;
1645 } 1670 }
1646 progress = try_to_free_mem_cgroup_pages(mem, 1671 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
1647 GFP_KERNEL, false); 1672 false, get_swappiness(mem));
1648 if (!progress) { 1673 if (!progress) {
1649 nr_retries--; 1674 nr_retries--;
1650 /* maybe some writeback is necessary */ 1675 /* maybe some writeback is necessary */
@@ -1864,6 +1889,37 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
1864 return 0; 1889 return 0;
1865} 1890}
1866 1891
1892static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
1893{
1894 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1895
1896 return get_swappiness(memcg);
1897}
1898
1899static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1900 u64 val)
1901{
1902 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1903 struct mem_cgroup *parent;
1904 if (val > 100)
1905 return -EINVAL;
1906
1907 if (cgrp->parent == NULL)
1908 return -EINVAL;
1909
1910 parent = mem_cgroup_from_cont(cgrp->parent);
1911 /* If under hierarchy, only empty-root can set this value */
1912 if ((parent->use_hierarchy) ||
1913 (memcg->use_hierarchy && !list_empty(&cgrp->children)))
1914 return -EINVAL;
1915
1916 spin_lock(&memcg->reclaim_param_lock);
1917 memcg->swappiness = val;
1918 spin_unlock(&memcg->reclaim_param_lock);
1919
1920 return 0;
1921}
1922
1867 1923
1868static struct cftype mem_cgroup_files[] = { 1924static struct cftype mem_cgroup_files[] = {
1869 { 1925 {
@@ -1902,6 +1958,11 @@ static struct cftype mem_cgroup_files[] = {
1902 .write_u64 = mem_cgroup_hierarchy_write, 1958 .write_u64 = mem_cgroup_hierarchy_write,
1903 .read_u64 = mem_cgroup_hierarchy_read, 1959 .read_u64 = mem_cgroup_hierarchy_read,
1904 }, 1960 },
1961 {
1962 .name = "swappiness",
1963 .read_u64 = mem_cgroup_swappiness_read,
1964 .write_u64 = mem_cgroup_swappiness_write,
1965 },
1905}; 1966};
1906 1967
1907#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 1968#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -2093,6 +2154,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2093 mem->last_scanned_child = NULL; 2154 mem->last_scanned_child = NULL;
2094 spin_lock_init(&mem->reclaim_param_lock); 2155 spin_lock_init(&mem->reclaim_param_lock);
2095 2156
2157 if (parent)
2158 mem->swappiness = get_swappiness(parent);
2159
2096 return &mem->css; 2160 return &mem->css;
2097free_out: 2161free_out:
2098 for_each_node_state(node, N_POSSIBLE) 2162 for_each_node_state(node, N_POSSIBLE)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f03c239440ad..ece2f405187f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1707,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1707#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1707#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1708 1708
1709unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 1709unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1710 gfp_t gfp_mask, 1710 gfp_t gfp_mask,
1711 bool noswap) 1711 bool noswap,
1712 unsigned int swappiness)
1712{ 1713{
1713 struct scan_control sc = { 1714 struct scan_control sc = {
1714 .may_writepage = !laptop_mode, 1715 .may_writepage = !laptop_mode,
1715 .may_swap = 1, 1716 .may_swap = 1,
1716 .swap_cluster_max = SWAP_CLUSTER_MAX, 1717 .swap_cluster_max = SWAP_CLUSTER_MAX,
1717 .swappiness = vm_swappiness, 1718 .swappiness = swappiness,
1718 .order = 0, 1719 .order = 0,
1719 .mem_cgroup = mem_cont, 1720 .mem_cgroup = mem_cont,
1720 .isolate_pages = mem_cgroup_isolate_pages, 1721 .isolate_pages = mem_cgroup_isolate_pages,