diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2009-01-07 21:08:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:08 -0500 |
commit | a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 (patch) | |
tree | 4f3ffaa399fbc16003cc1787228f10543dc9c3ef /mm | |
parent | 2733c06ac864ed40b9dfbbd5270f3f16949bd4a1 (diff) |
memcg: swappiness
Currently, /proc/sys/vm/swappiness can change swappiness ratio for global
reclaim. However, memcg reclaim doesn't have tuning parameter for itself.
In general, the optimal swappiness depend on workload. (e.g. hpc
workload need to low swappiness than the others.)
Then, per cgroup swappiness improve administrator tunability.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 78 | ||||
-rw-r--r-- | mm/vmscan.c | 7 |
2 files changed, 75 insertions, 10 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 027c0dd7a83e..ab2ecbb95b8d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -164,6 +164,9 @@ struct mem_cgroup { | |||
164 | int obsolete; | 164 | int obsolete; |
165 | atomic_t refcnt; | 165 | atomic_t refcnt; |
166 | 166 | ||
167 | unsigned int swappiness; | ||
168 | |||
169 | |||
167 | unsigned int inactive_ratio; | 170 | unsigned int inactive_ratio; |
168 | 171 | ||
169 | /* | 172 | /* |
@@ -636,6 +639,22 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
636 | return false; | 639 | return false; |
637 | } | 640 | } |
638 | 641 | ||
642 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | ||
643 | { | ||
644 | struct cgroup *cgrp = memcg->css.cgroup; | ||
645 | unsigned int swappiness; | ||
646 | |||
647 | /* root ? */ | ||
648 | if (cgrp->parent == NULL) | ||
649 | return vm_swappiness; | ||
650 | |||
651 | spin_lock(&memcg->reclaim_param_lock); | ||
652 | swappiness = memcg->swappiness; | ||
653 | spin_unlock(&memcg->reclaim_param_lock); | ||
654 | |||
655 | return swappiness; | ||
656 | } | ||
657 | |||
639 | /* | 658 | /* |
640 | * Dance down the hierarchy if needed to reclaim memory. We remember the | 659 | * Dance down the hierarchy if needed to reclaim memory. We remember the |
641 | * last child we reclaimed from, so that we don't end up penalizing | 660 | * last child we reclaimed from, so that we don't end up penalizing |
@@ -656,7 +675,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
656 | * but there might be left over accounting, even after children | 675 | * but there might be left over accounting, even after children |
657 | * have left. | 676 | * have left. |
658 | */ | 677 | */ |
659 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap); | 678 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, |
679 | get_swappiness(root_mem)); | ||
660 | if (mem_cgroup_check_under_limit(root_mem)) | 680 | if (mem_cgroup_check_under_limit(root_mem)) |
661 | return 0; | 681 | return 0; |
662 | if (!root_mem->use_hierarchy) | 682 | if (!root_mem->use_hierarchy) |
@@ -672,7 +692,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
672 | cgroup_unlock(); | 692 | cgroup_unlock(); |
673 | continue; | 693 | continue; |
674 | } | 694 | } |
675 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap); | 695 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
696 | get_swappiness(next_mem)); | ||
676 | if (mem_cgroup_check_under_limit(root_mem)) | 697 | if (mem_cgroup_check_under_limit(root_mem)) |
677 | return 0; | 698 | return 0; |
678 | cgroup_lock(); | 699 | cgroup_lock(); |
@@ -1400,7 +1421,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
1400 | rcu_read_unlock(); | 1421 | rcu_read_unlock(); |
1401 | 1422 | ||
1402 | do { | 1423 | do { |
1403 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true); | 1424 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true, |
1425 | get_swappiness(mem)); | ||
1404 | progress += mem_cgroup_check_under_limit(mem); | 1426 | progress += mem_cgroup_check_under_limit(mem); |
1405 | } while (!progress && --retry); | 1427 | } while (!progress && --retry); |
1406 | 1428 | ||
@@ -1468,7 +1490,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1468 | break; | 1490 | break; |
1469 | 1491 | ||
1470 | progress = try_to_free_mem_cgroup_pages(memcg, | 1492 | progress = try_to_free_mem_cgroup_pages(memcg, |
1471 | GFP_KERNEL, false); | 1493 | GFP_KERNEL, |
1494 | false, | ||
1495 | get_swappiness(memcg)); | ||
1472 | if (!progress) retry_count--; | 1496 | if (!progress) retry_count--; |
1473 | } | 1497 | } |
1474 | 1498 | ||
@@ -1512,7 +1536,8 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
1512 | break; | 1536 | break; |
1513 | 1537 | ||
1514 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1538 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1515 | try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true); | 1539 | try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true, |
1540 | get_swappiness(memcg)); | ||
1516 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1541 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1517 | if (curusage >= oldusage) | 1542 | if (curusage >= oldusage) |
1518 | retry_count--; | 1543 | retry_count--; |
@@ -1643,8 +1668,8 @@ try_to_free: | |||
1643 | ret = -EINTR; | 1668 | ret = -EINTR; |
1644 | goto out; | 1669 | goto out; |
1645 | } | 1670 | } |
1646 | progress = try_to_free_mem_cgroup_pages(mem, | 1671 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
1647 | GFP_KERNEL, false); | 1672 | false, get_swappiness(mem)); |
1648 | if (!progress) { | 1673 | if (!progress) { |
1649 | nr_retries--; | 1674 | nr_retries--; |
1650 | /* maybe some writeback is necessary */ | 1675 | /* maybe some writeback is necessary */ |
@@ -1864,6 +1889,37 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
1864 | return 0; | 1889 | return 0; |
1865 | } | 1890 | } |
1866 | 1891 | ||
1892 | static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) | ||
1893 | { | ||
1894 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
1895 | |||
1896 | return get_swappiness(memcg); | ||
1897 | } | ||
1898 | |||
1899 | static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | ||
1900 | u64 val) | ||
1901 | { | ||
1902 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
1903 | struct mem_cgroup *parent; | ||
1904 | if (val > 100) | ||
1905 | return -EINVAL; | ||
1906 | |||
1907 | if (cgrp->parent == NULL) | ||
1908 | return -EINVAL; | ||
1909 | |||
1910 | parent = mem_cgroup_from_cont(cgrp->parent); | ||
1911 | /* If under hierarchy, only empty-root can set this value */ | ||
1912 | if ((parent->use_hierarchy) || | ||
1913 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) | ||
1914 | return -EINVAL; | ||
1915 | |||
1916 | spin_lock(&memcg->reclaim_param_lock); | ||
1917 | memcg->swappiness = val; | ||
1918 | spin_unlock(&memcg->reclaim_param_lock); | ||
1919 | |||
1920 | return 0; | ||
1921 | } | ||
1922 | |||
1867 | 1923 | ||
1868 | static struct cftype mem_cgroup_files[] = { | 1924 | static struct cftype mem_cgroup_files[] = { |
1869 | { | 1925 | { |
@@ -1902,6 +1958,11 @@ static struct cftype mem_cgroup_files[] = { | |||
1902 | .write_u64 = mem_cgroup_hierarchy_write, | 1958 | .write_u64 = mem_cgroup_hierarchy_write, |
1903 | .read_u64 = mem_cgroup_hierarchy_read, | 1959 | .read_u64 = mem_cgroup_hierarchy_read, |
1904 | }, | 1960 | }, |
1961 | { | ||
1962 | .name = "swappiness", | ||
1963 | .read_u64 = mem_cgroup_swappiness_read, | ||
1964 | .write_u64 = mem_cgroup_swappiness_write, | ||
1965 | }, | ||
1905 | }; | 1966 | }; |
1906 | 1967 | ||
1907 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 1968 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
@@ -2093,6 +2154,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2093 | mem->last_scanned_child = NULL; | 2154 | mem->last_scanned_child = NULL; |
2094 | spin_lock_init(&mem->reclaim_param_lock); | 2155 | spin_lock_init(&mem->reclaim_param_lock); |
2095 | 2156 | ||
2157 | if (parent) | ||
2158 | mem->swappiness = get_swappiness(parent); | ||
2159 | |||
2096 | return &mem->css; | 2160 | return &mem->css; |
2097 | free_out: | 2161 | free_out: |
2098 | for_each_node_state(node, N_POSSIBLE) | 2162 | for_each_node_state(node, N_POSSIBLE) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f03c239440ad..ece2f405187f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1707,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1707 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1707 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
1708 | 1708 | ||
1709 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 1709 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
1710 | gfp_t gfp_mask, | 1710 | gfp_t gfp_mask, |
1711 | bool noswap) | 1711 | bool noswap, |
1712 | unsigned int swappiness) | ||
1712 | { | 1713 | { |
1713 | struct scan_control sc = { | 1714 | struct scan_control sc = { |
1714 | .may_writepage = !laptop_mode, | 1715 | .may_writepage = !laptop_mode, |
1715 | .may_swap = 1, | 1716 | .may_swap = 1, |
1716 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1717 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1717 | .swappiness = vm_swappiness, | 1718 | .swappiness = swappiness, |
1718 | .order = 0, | 1719 | .order = 0, |
1719 | .mem_cgroup = mem_cont, | 1720 | .mem_cgroup = mem_cont, |
1720 | .isolate_pages = mem_cgroup_isolate_pages, | 1721 | .isolate_pages = mem_cgroup_isolate_pages, |