aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2014-06-06 17:38:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-06 19:08:17 -0400
commit688eb988d15af55c1d1b70b1ca9f6ce58f277c20 (patch)
tree5dac60260d3c7e0f97f82f4787a046d31c75ebf5
parent722773afd83209d4088d30946bc274f547528a0b (diff)
vmscan: memcg: always use swappiness of the reclaimed memcg
Memory reclaim always uses swappiness of the reclaim target memcg (origin of the memory pressure) or vm_swappiness for global memory reclaim. This behavior was consistent (except for difference between global and hard limit reclaim) because swappiness was enforced to be consistent within each memcg hierarchy. After "mm: memcontrol: remove hierarchy restrictions for swappiness and oom_control" each memcg can have its own swappiness independent of hierarchical parents, though, so the consistency guarantee is gone. This can lead to an unexpected behavior. Say that a group is explicitly configured to not swapout by memory.swappiness=0 but its memory gets swapped out anyway when the memory pressure comes from its parent with a It is also unexpected that the knob is meaningless without setting the hard limit which would trigger the reclaim and enforce the swappiness. There are setups where the hard limit is configured higher in the hierarchy by an administrator and children groups are under control of somebody else who is interested in the swapout behavior but not necessarily about the memory limit. From a semantic point of view swappiness is an attribute defining anon vs. file proportional scanning of LRU which is memcg specific (unlike charges which are propagated up the hierarchy) so it should be applied to the particular memcg's LRU regardless where the memory pressure comes from. This patch removes vmscan_swappiness() and stores the swappiness into the scan_control structure. mem_cgroup_swappiness is then used to provide the correct value before shrink_lruvec is called. The global vm_swappiness is used for the root memcg. [hughd@google.com: oopses immediately when booted with cgroup_disable=memory] Signed-off-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cgroups/memory.txt15
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/vmscan.c18
3 files changed, 16 insertions, 19 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 4937e6fff9b4..b3429aec444c 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -540,14 +540,13 @@ Note:
540 540
5415.3 swappiness 5415.3 swappiness
542 542
543Similar to /proc/sys/vm/swappiness, but only affecting reclaim that is 543Overrides /proc/sys/vm/swappiness for the particular group. The tunable
544triggered by this cgroup's hard limit. The tunable in the root cgroup 544in the root cgroup corresponds to the global swappiness setting.
545corresponds to the global swappiness setting. 545
546 546Please note that unlike during the global reclaim, limit reclaim
547Please note that unlike the global swappiness, memcg knob set to 0 547enforces that 0 swappiness really prevents from any swapping even if
548really prevents from any swapping even if there is a swap storage 548there is a swap storage available. This might lead to memcg OOM killer
549available. This might lead to memcg OOM killer if there are no file 549if there are no file pages to reclaim.
550pages to reclaim.
551 550
5525.4 failcnt 5515.4 failcnt
553 552
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a500cb0594c4..9bf8a84bcaae 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1550,7 +1550,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1550int mem_cgroup_swappiness(struct mem_cgroup *memcg) 1550int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1551{ 1551{
1552 /* root ? */ 1552 /* root ? */
1553 if (!css_parent(&memcg->css)) 1553 if (mem_cgroup_disabled() || !css_parent(&memcg->css))
1554 return vm_swappiness; 1554 return vm_swappiness;
1555 1555
1556 return memcg->swappiness; 1556 return memcg->swappiness;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 05d41c0d7f6c..f44476a41544 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -83,6 +83,9 @@ struct scan_control {
83 /* Scan (total_size >> priority) pages at once */ 83 /* Scan (total_size >> priority) pages at once */
84 int priority; 84 int priority;
85 85
86 /* anon vs. file LRUs scanning "ratio" */
87 int swappiness;
88
86 /* 89 /*
87 * The memory cgroup that hit its limit and as a result is the 90 * The memory cgroup that hit its limit and as a result is the
88 * primary target of this reclaim invocation. 91 * primary target of this reclaim invocation.
@@ -1845,13 +1848,6 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1845 return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); 1848 return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
1846} 1849}
1847 1850
1848static int vmscan_swappiness(struct scan_control *sc)
1849{
1850 if (global_reclaim(sc))
1851 return vm_swappiness;
1852 return mem_cgroup_swappiness(sc->target_mem_cgroup);
1853}
1854
1855enum scan_balance { 1851enum scan_balance {
1856 SCAN_EQUAL, 1852 SCAN_EQUAL,
1857 SCAN_FRACT, 1853 SCAN_FRACT,
@@ -1912,7 +1908,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1912 * using the memory controller's swap limit feature would be 1908 * using the memory controller's swap limit feature would be
1913 * too expensive. 1909 * too expensive.
1914 */ 1910 */
1915 if (!global_reclaim(sc) && !vmscan_swappiness(sc)) { 1911 if (!global_reclaim(sc) && !sc->swappiness) {
1916 scan_balance = SCAN_FILE; 1912 scan_balance = SCAN_FILE;
1917 goto out; 1913 goto out;
1918 } 1914 }
@@ -1922,7 +1918,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1922 * system is close to OOM, scan both anon and file equally 1918 * system is close to OOM, scan both anon and file equally
1923 * (unless the swappiness setting disagrees with swapping). 1919 * (unless the swappiness setting disagrees with swapping).
1924 */ 1920 */
1925 if (!sc->priority && vmscan_swappiness(sc)) { 1921 if (!sc->priority && sc->swappiness) {
1926 scan_balance = SCAN_EQUAL; 1922 scan_balance = SCAN_EQUAL;
1927 goto out; 1923 goto out;
1928 } 1924 }
@@ -1965,7 +1961,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1965 * With swappiness at 100, anonymous and file have the same priority. 1961 * With swappiness at 100, anonymous and file have the same priority.
1966 * This scanning priority is essentially the inverse of IO cost. 1962 * This scanning priority is essentially the inverse of IO cost.
1967 */ 1963 */
1968 anon_prio = vmscan_swappiness(sc); 1964 anon_prio = sc->swappiness;
1969 file_prio = 200 - anon_prio; 1965 file_prio = 200 - anon_prio;
1970 1966
1971 /* 1967 /*
@@ -2265,6 +2261,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
2265 2261
2266 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2262 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2267 2263
2264 sc->swappiness = mem_cgroup_swappiness(memcg);
2268 shrink_lruvec(lruvec, sc); 2265 shrink_lruvec(lruvec, sc);
2269 2266
2270 /* 2267 /*
@@ -2731,6 +2728,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
2731 .may_swap = !noswap, 2728 .may_swap = !noswap,
2732 .order = 0, 2729 .order = 0,
2733 .priority = 0, 2730 .priority = 0,
2731 .swappiness = mem_cgroup_swappiness(memcg),
2734 .target_mem_cgroup = memcg, 2732 .target_mem_cgroup = memcg,
2735 }; 2733 };
2736 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2734 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);