diff options
-rw-r--r-- | Documentation/controllers/memory.txt | 9 | ||||
-rw-r--r-- | include/linux/swap.h | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 78 | ||||
-rw-r--r-- | mm/vmscan.c | 7 |
4 files changed, 86 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index d71745cc2f00..e1501964df1e 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt | |||
@@ -314,6 +314,15 @@ will be charged as a new owner of it. | |||
314 | showing for better debug please see the code for meanings. | 314 | showing for better debug please see the code for meanings. |
315 | 315 | ||
316 | 316 | ||
317 | 5.3 swappiness | ||
318 | Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. | ||
319 | |||
320 | Following cgroup's swapiness can't be changed. | ||
321 | - root cgroup (uses /proc/sys/vm/swappiness). | ||
322 | - a cgroup which uses hierarchy and it has child cgroup. | ||
323 | - a cgroup which uses hierarchy and not the root of hierarchy. | ||
324 | |||
325 | |||
317 | 6. Hierarchy support | 326 | 6. Hierarchy support |
318 | 327 | ||
319 | The memory controller supports a deep hierarchy and hierarchical accounting. | 328 | The memory controller supports a deep hierarchy and hierarchical accounting. |
diff --git a/include/linux/swap.h b/include/linux/swap.h index be938ce4895a..4ccca25d0f05 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page) | |||
214 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 214 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
215 | gfp_t gfp_mask); | 215 | gfp_t gfp_mask); |
216 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | 216 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, |
217 | gfp_t gfp_mask, bool noswap); | 217 | gfp_t gfp_mask, bool noswap, |
218 | unsigned int swappiness); | ||
218 | extern int __isolate_lru_page(struct page *page, int mode, int file); | 219 | extern int __isolate_lru_page(struct page *page, int mode, int file); |
219 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 220 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
220 | extern int vm_swappiness; | 221 | extern int vm_swappiness; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 027c0dd7a83e..ab2ecbb95b8d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -164,6 +164,9 @@ struct mem_cgroup { | |||
164 | int obsolete; | 164 | int obsolete; |
165 | atomic_t refcnt; | 165 | atomic_t refcnt; |
166 | 166 | ||
167 | unsigned int swappiness; | ||
168 | |||
169 | |||
167 | unsigned int inactive_ratio; | 170 | unsigned int inactive_ratio; |
168 | 171 | ||
169 | /* | 172 | /* |
@@ -636,6 +639,22 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
636 | return false; | 639 | return false; |
637 | } | 640 | } |
638 | 641 | ||
642 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | ||
643 | { | ||
644 | struct cgroup *cgrp = memcg->css.cgroup; | ||
645 | unsigned int swappiness; | ||
646 | |||
647 | /* root ? */ | ||
648 | if (cgrp->parent == NULL) | ||
649 | return vm_swappiness; | ||
650 | |||
651 | spin_lock(&memcg->reclaim_param_lock); | ||
652 | swappiness = memcg->swappiness; | ||
653 | spin_unlock(&memcg->reclaim_param_lock); | ||
654 | |||
655 | return swappiness; | ||
656 | } | ||
657 | |||
639 | /* | 658 | /* |
640 | * Dance down the hierarchy if needed to reclaim memory. We remember the | 659 | * Dance down the hierarchy if needed to reclaim memory. We remember the |
641 | * last child we reclaimed from, so that we don't end up penalizing | 660 | * last child we reclaimed from, so that we don't end up penalizing |
@@ -656,7 +675,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
656 | * but there might be left over accounting, even after children | 675 | * but there might be left over accounting, even after children |
657 | * have left. | 676 | * have left. |
658 | */ | 677 | */ |
659 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap); | 678 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, |
679 | get_swappiness(root_mem)); | ||
660 | if (mem_cgroup_check_under_limit(root_mem)) | 680 | if (mem_cgroup_check_under_limit(root_mem)) |
661 | return 0; | 681 | return 0; |
662 | if (!root_mem->use_hierarchy) | 682 | if (!root_mem->use_hierarchy) |
@@ -672,7 +692,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
672 | cgroup_unlock(); | 692 | cgroup_unlock(); |
673 | continue; | 693 | continue; |
674 | } | 694 | } |
675 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap); | 695 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
696 | get_swappiness(next_mem)); | ||
676 | if (mem_cgroup_check_under_limit(root_mem)) | 697 | if (mem_cgroup_check_under_limit(root_mem)) |
677 | return 0; | 698 | return 0; |
678 | cgroup_lock(); | 699 | cgroup_lock(); |
@@ -1400,7 +1421,8 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
1400 | rcu_read_unlock(); | 1421 | rcu_read_unlock(); |
1401 | 1422 | ||
1402 | do { | 1423 | do { |
1403 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true); | 1424 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true, |
1425 | get_swappiness(mem)); | ||
1404 | progress += mem_cgroup_check_under_limit(mem); | 1426 | progress += mem_cgroup_check_under_limit(mem); |
1405 | } while (!progress && --retry); | 1427 | } while (!progress && --retry); |
1406 | 1428 | ||
@@ -1468,7 +1490,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1468 | break; | 1490 | break; |
1469 | 1491 | ||
1470 | progress = try_to_free_mem_cgroup_pages(memcg, | 1492 | progress = try_to_free_mem_cgroup_pages(memcg, |
1471 | GFP_KERNEL, false); | 1493 | GFP_KERNEL, |
1494 | false, | ||
1495 | get_swappiness(memcg)); | ||
1472 | if (!progress) retry_count--; | 1496 | if (!progress) retry_count--; |
1473 | } | 1497 | } |
1474 | 1498 | ||
@@ -1512,7 +1536,8 @@ int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
1512 | break; | 1536 | break; |
1513 | 1537 | ||
1514 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1538 | oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1515 | try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true); | 1539 | try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true, |
1540 | get_swappiness(memcg)); | ||
1516 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 1541 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1517 | if (curusage >= oldusage) | 1542 | if (curusage >= oldusage) |
1518 | retry_count--; | 1543 | retry_count--; |
@@ -1643,8 +1668,8 @@ try_to_free: | |||
1643 | ret = -EINTR; | 1668 | ret = -EINTR; |
1644 | goto out; | 1669 | goto out; |
1645 | } | 1670 | } |
1646 | progress = try_to_free_mem_cgroup_pages(mem, | 1671 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
1647 | GFP_KERNEL, false); | 1672 | false, get_swappiness(mem)); |
1648 | if (!progress) { | 1673 | if (!progress) { |
1649 | nr_retries--; | 1674 | nr_retries--; |
1650 | /* maybe some writeback is necessary */ | 1675 | /* maybe some writeback is necessary */ |
@@ -1864,6 +1889,37 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
1864 | return 0; | 1889 | return 0; |
1865 | } | 1890 | } |
1866 | 1891 | ||
1892 | static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) | ||
1893 | { | ||
1894 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
1895 | |||
1896 | return get_swappiness(memcg); | ||
1897 | } | ||
1898 | |||
1899 | static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | ||
1900 | u64 val) | ||
1901 | { | ||
1902 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | ||
1903 | struct mem_cgroup *parent; | ||
1904 | if (val > 100) | ||
1905 | return -EINVAL; | ||
1906 | |||
1907 | if (cgrp->parent == NULL) | ||
1908 | return -EINVAL; | ||
1909 | |||
1910 | parent = mem_cgroup_from_cont(cgrp->parent); | ||
1911 | /* If under hierarchy, only empty-root can set this value */ | ||
1912 | if ((parent->use_hierarchy) || | ||
1913 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) | ||
1914 | return -EINVAL; | ||
1915 | |||
1916 | spin_lock(&memcg->reclaim_param_lock); | ||
1917 | memcg->swappiness = val; | ||
1918 | spin_unlock(&memcg->reclaim_param_lock); | ||
1919 | |||
1920 | return 0; | ||
1921 | } | ||
1922 | |||
1867 | 1923 | ||
1868 | static struct cftype mem_cgroup_files[] = { | 1924 | static struct cftype mem_cgroup_files[] = { |
1869 | { | 1925 | { |
@@ -1902,6 +1958,11 @@ static struct cftype mem_cgroup_files[] = { | |||
1902 | .write_u64 = mem_cgroup_hierarchy_write, | 1958 | .write_u64 = mem_cgroup_hierarchy_write, |
1903 | .read_u64 = mem_cgroup_hierarchy_read, | 1959 | .read_u64 = mem_cgroup_hierarchy_read, |
1904 | }, | 1960 | }, |
1961 | { | ||
1962 | .name = "swappiness", | ||
1963 | .read_u64 = mem_cgroup_swappiness_read, | ||
1964 | .write_u64 = mem_cgroup_swappiness_write, | ||
1965 | }, | ||
1905 | }; | 1966 | }; |
1906 | 1967 | ||
1907 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 1968 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
@@ -2093,6 +2154,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2093 | mem->last_scanned_child = NULL; | 2154 | mem->last_scanned_child = NULL; |
2094 | spin_lock_init(&mem->reclaim_param_lock); | 2155 | spin_lock_init(&mem->reclaim_param_lock); |
2095 | 2156 | ||
2157 | if (parent) | ||
2158 | mem->swappiness = get_swappiness(parent); | ||
2159 | |||
2096 | return &mem->css; | 2160 | return &mem->css; |
2097 | free_out: | 2161 | free_out: |
2098 | for_each_node_state(node, N_POSSIBLE) | 2162 | for_each_node_state(node, N_POSSIBLE) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f03c239440ad..ece2f405187f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1707,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1707 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1707 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
1708 | 1708 | ||
1709 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 1709 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
1710 | gfp_t gfp_mask, | 1710 | gfp_t gfp_mask, |
1711 | bool noswap) | 1711 | bool noswap, |
1712 | unsigned int swappiness) | ||
1712 | { | 1713 | { |
1713 | struct scan_control sc = { | 1714 | struct scan_control sc = { |
1714 | .may_writepage = !laptop_mode, | 1715 | .may_writepage = !laptop_mode, |
1715 | .may_swap = 1, | 1716 | .may_swap = 1, |
1716 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1717 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1717 | .swappiness = vm_swappiness, | 1718 | .swappiness = swappiness, |
1718 | .order = 0, | 1719 | .order = 0, |
1719 | .mem_cgroup = mem_cont, | 1720 | .mem_cgroup = mem_cont, |
1720 | .isolate_pages = mem_cgroup_isolate_pages, | 1721 | .isolate_pages = mem_cgroup_isolate_pages, |