summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2018-06-07 20:07:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 20:34:36 -0400
commitbf8d5d52ffe89aac5b46ddb39dd1a4351fae5df4 (patch)
treee0b0457ddf128b0562eb403762b2f2de2292e8b1
parentfb52bbaee598f58352d8732637ebe7013b2df79f (diff)
memcg: introduce memory.min
Memory controller implements the memory.low best-effort memory protection mechanism, which works perfectly in many cases and allows protecting working sets of important workloads from sudden reclaim. But its semantics has a significant limitation: it works only as long as there is a supply of reclaimable memory. This makes it pretty useless against any sort of slow memory leaks or memory usage increases. This is especially true for swapless systems. If swap is enabled, memory soft protection effectively postpones problems, allowing a leaking application to fill all swap area, which makes no sense. The only effective way to guarantee the memory protection in this case is to invoke the OOM killer. It's possible to handle this case in userspace by reacting on MEMCG_LOW events; but there is still a place for a fail-safe in-kernel mechanism to provide stronger guarantees. This patch introduces the memory.min interface for cgroup v2 memory controller. It works very similarly to memory.low (sharing the same hierarchical behavior), except that it's not disabled if there is no more reclaimable memory in the system. If cgroup is not populated, its memory.min is ignored, because otherwise even the OOM killer wouldn't be able to reclaim the protected memory, and the system can stall. [guro@fb.com: s/low/min/ in docs] Link: http://lkml.kernel.org/r/20180510130758.GA9129@castle.DHCP.thefacebook.com Link: http://lkml.kernel.org/r/20180509180734.GA4856@castle.DHCP.thefacebook.com Signed-off-by: Roman Gushchin <guro@fb.com> Reviewed-by: Randy Dunlap <rdunlap@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst27
-rw-r--r--include/linux/memcontrol.h15
-rw-r--r--include/linux/page_counter.h11
-rw-r--r--mm/memcontrol.c118
-rw-r--r--mm/page_counter.c63
-rw-r--r--mm/vmscan.c18
6 files changed, 202 insertions, 50 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 7b56ca80e37a..e34d3c938729 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1001,6 +1001,29 @@ PAGE_SIZE multiple when read back.
1001 The total amount of memory currently being used by the cgroup 1001 The total amount of memory currently being used by the cgroup
1002 and its descendants. 1002 and its descendants.
1003 1003
1004 memory.min
1005 A read-write single value file which exists on non-root
1006 cgroups. The default is "0".
1007
1008 Hard memory protection. If the memory usage of a cgroup
1009 is within its effective min boundary, the cgroup's memory
1010 won't be reclaimed under any conditions. If there is no
1011 unprotected reclaimable memory available, OOM killer
1012 is invoked.
1013
1014 Effective min boundary is limited by memory.min values of
1015 all ancestor cgroups. If there is memory.min overcommitment
1016 (child cgroup or cgroups are requiring more protected memory
1017 than parent will allow), then each child cgroup will get
1018 the part of parent's protection proportional to its
1019 actual memory usage below memory.min.
1020
1021 Putting more memory than generally available under this
1022 protection is discouraged and may lead to constant OOMs.
1023
1024 If a memory cgroup is not populated with processes,
1025 its memory.min is ignored.
1026
1004 memory.low 1027 memory.low
1005 A read-write single value file which exists on non-root 1028 A read-write single value file which exists on non-root
1006 cgroups. The default is "0". 1029 cgroups. The default is "0".
@@ -1012,9 +1035,9 @@ PAGE_SIZE multiple when read back.
1012 1035
1013 Effective low boundary is limited by memory.low values of 1036 Effective low boundary is limited by memory.low values of
1014 all ancestor cgroups. If there is memory.low overcommitment 1037 all ancestor cgroups. If there is memory.low overcommitment
1015 (child cgroup or cgroups are requiring more protected memory, 1038 (child cgroup or cgroups are requiring more protected memory
1016 than parent will allow), then each child cgroup will get 1039 than parent will allow), then each child cgroup will get
1017 the part of parent's protection proportional to the its 1040 the part of parent's protection proportional to its
1018 actual memory usage below memory.low. 1041 actual memory usage below memory.low.
1019 1042
1020 Putting more memory than generally available under this 1043 Putting more memory than generally available under this
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 10d741e8fe51..9c04cf8e6487 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -58,6 +58,12 @@ enum memcg_memory_event {
58 MEMCG_NR_MEMORY_EVENTS, 58 MEMCG_NR_MEMORY_EVENTS,
59}; 59};
60 60
61enum mem_cgroup_protection {
62 MEMCG_PROT_NONE,
63 MEMCG_PROT_LOW,
64 MEMCG_PROT_MIN,
65};
66
61struct mem_cgroup_reclaim_cookie { 67struct mem_cgroup_reclaim_cookie {
62 pg_data_t *pgdat; 68 pg_data_t *pgdat;
63 int priority; 69 int priority;
@@ -289,7 +295,8 @@ static inline bool mem_cgroup_disabled(void)
289 return !cgroup_subsys_enabled(memory_cgrp_subsys); 295 return !cgroup_subsys_enabled(memory_cgrp_subsys);
290} 296}
291 297
292bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); 298enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
299 struct mem_cgroup *memcg);
293 300
294int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 301int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
295 gfp_t gfp_mask, struct mem_cgroup **memcgp, 302 gfp_t gfp_mask, struct mem_cgroup **memcgp,
@@ -734,10 +741,10 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
734{ 741{
735} 742}
736 743
737static inline bool mem_cgroup_low(struct mem_cgroup *root, 744static inline enum mem_cgroup_protection mem_cgroup_protected(
738 struct mem_cgroup *memcg) 745 struct mem_cgroup *root, struct mem_cgroup *memcg)
739{ 746{
740 return false; 747 return MEMCG_PROT_NONE;
741} 748}
742 749
743static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 750static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 7902a727d3b6..bab7e57f659b 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -8,10 +8,16 @@
8 8
9struct page_counter { 9struct page_counter {
10 atomic_long_t usage; 10 atomic_long_t usage;
11 unsigned long max; 11 unsigned long min;
12 unsigned long low; 12 unsigned long low;
13 unsigned long max;
13 struct page_counter *parent; 14 struct page_counter *parent;
14 15
16 /* effective memory.min and memory.min usage tracking */
17 unsigned long emin;
18 atomic_long_t min_usage;
19 atomic_long_t children_min_usage;
20
15 /* effective memory.low and memory.low usage tracking */ 21 /* effective memory.low and memory.low usage tracking */
16 unsigned long elow; 22 unsigned long elow;
17 atomic_long_t low_usage; 23 atomic_long_t low_usage;
@@ -47,8 +53,9 @@ bool page_counter_try_charge(struct page_counter *counter,
47 unsigned long nr_pages, 53 unsigned long nr_pages,
48 struct page_counter **fail); 54 struct page_counter **fail);
49void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); 55void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
50int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages); 56void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages);
51void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages); 57void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages);
58int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages);
52int page_counter_memparse(const char *buf, const char *max, 59int page_counter_memparse(const char *buf, const char *max,
53 unsigned long *nr_pages); 60 unsigned long *nr_pages);
54 61
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e6de0d6a3a8d..e3d56927a724 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4275,6 +4275,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
4275 } 4275 }
4276 spin_unlock(&memcg->event_list_lock); 4276 spin_unlock(&memcg->event_list_lock);
4277 4277
4278 page_counter_set_min(&memcg->memory, 0);
4278 page_counter_set_low(&memcg->memory, 0); 4279 page_counter_set_low(&memcg->memory, 0);
4279 4280
4280 memcg_offline_kmem(memcg); 4281 memcg_offline_kmem(memcg);
@@ -4329,6 +4330,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
4329 page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX); 4330 page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX);
4330 page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX); 4331 page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
4331 page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX); 4332 page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
4333 page_counter_set_min(&memcg->memory, 0);
4332 page_counter_set_low(&memcg->memory, 0); 4334 page_counter_set_low(&memcg->memory, 0);
4333 memcg->high = PAGE_COUNTER_MAX; 4335 memcg->high = PAGE_COUNTER_MAX;
4334 memcg->soft_limit = PAGE_COUNTER_MAX; 4336 memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5066,6 +5068,36 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
5066 return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE; 5068 return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
5067} 5069}
5068 5070
5071static int memory_min_show(struct seq_file *m, void *v)
5072{
5073 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5074 unsigned long min = READ_ONCE(memcg->memory.min);
5075
5076 if (min == PAGE_COUNTER_MAX)
5077 seq_puts(m, "max\n");
5078 else
5079 seq_printf(m, "%llu\n", (u64)min * PAGE_SIZE);
5080
5081 return 0;
5082}
5083
5084static ssize_t memory_min_write(struct kernfs_open_file *of,
5085 char *buf, size_t nbytes, loff_t off)
5086{
5087 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5088 unsigned long min;
5089 int err;
5090
5091 buf = strstrip(buf);
5092 err = page_counter_memparse(buf, "max", &min);
5093 if (err)
5094 return err;
5095
5096 page_counter_set_min(&memcg->memory, min);
5097
5098 return nbytes;
5099}
5100
5069static int memory_low_show(struct seq_file *m, void *v) 5101static int memory_low_show(struct seq_file *m, void *v)
5070{ 5102{
5071 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 5103 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -5301,6 +5333,12 @@ static struct cftype memory_files[] = {
5301 .read_u64 = memory_current_read, 5333 .read_u64 = memory_current_read,
5302 }, 5334 },
5303 { 5335 {
5336 .name = "min",
5337 .flags = CFTYPE_NOT_ON_ROOT,
5338 .seq_show = memory_min_show,
5339 .write = memory_min_write,
5340 },
5341 {
5304 .name = "low", 5342 .name = "low",
5305 .flags = CFTYPE_NOT_ON_ROOT, 5343 .flags = CFTYPE_NOT_ON_ROOT,
5306 .seq_show = memory_low_show, 5344 .seq_show = memory_low_show,
@@ -5349,19 +5387,24 @@ struct cgroup_subsys memory_cgrp_subsys = {
5349}; 5387};
5350 5388
5351/** 5389/**
5352 * mem_cgroup_low - check if memory consumption is in the normal range 5390 * mem_cgroup_protected - check if memory consumption is in the normal range
5353 * @root: the top ancestor of the sub-tree being checked 5391 * @root: the top ancestor of the sub-tree being checked
5354 * @memcg: the memory cgroup to check 5392 * @memcg: the memory cgroup to check
5355 * 5393 *
5356 * WARNING: This function is not stateless! It can only be used as part 5394 * WARNING: This function is not stateless! It can only be used as part
5357 * of a top-down tree iteration, not for isolated queries. 5395 * of a top-down tree iteration, not for isolated queries.
5358 * 5396 *
5359 * Returns %true if memory consumption of @memcg is in the normal range. 5397 * Returns one of the following:
5398 * MEMCG_PROT_NONE: cgroup memory is not protected
5399 * MEMCG_PROT_LOW: cgroup memory is protected as long there is
5400 * an unprotected supply of reclaimable memory from other cgroups.
5401 * MEMCG_PROT_MIN: cgroup memory is protected
5360 * 5402 *
5361 * @root is exclusive; it is never low when looked at directly 5403 * @root is exclusive; it is never protected when looked at directly
5362 * 5404 *
5363 * To provide a proper hierarchical behavior, effective memory.low value 5405 * To provide a proper hierarchical behavior, effective memory.min/low values
5364 * is used. 5406 * are used. Below is the description of how effective memory.low is calculated.
5407 * Effective memory.min values is calculated in the same way.
5365 * 5408 *
5366 * Effective memory.low is always equal or less than the original memory.low. 5409 * Effective memory.low is always equal or less than the original memory.low.
5367 * If there is no memory.low overcommittment (which is always true for 5410 * If there is no memory.low overcommittment (which is always true for
@@ -5406,51 +5449,78 @@ struct cgroup_subsys memory_cgrp_subsys = {
5406 * E/memory.current = 0 5449 * E/memory.current = 0
5407 * 5450 *
5408 * These calculations require constant tracking of the actual low usages 5451 * These calculations require constant tracking of the actual low usages
5409 * (see propagate_low_usage()), as well as recursive calculation of 5452 * (see propagate_protected_usage()), as well as recursive calculation of
5410 * effective memory.low values. But as we do call mem_cgroup_low() 5453 * effective memory.low values. But as we do call mem_cgroup_protected()
5411 * path for each memory cgroup top-down from the reclaim, 5454 * path for each memory cgroup top-down from the reclaim,
5412 * it's possible to optimize this part, and save calculated elow 5455 * it's possible to optimize this part, and save calculated elow
5413 * for next usage. This part is intentionally racy, but it's ok, 5456 * for next usage. This part is intentionally racy, but it's ok,
5414 * as memory.low is a best-effort mechanism. 5457 * as memory.low is a best-effort mechanism.
5415 */ 5458 */
5416bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) 5459enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
5460 struct mem_cgroup *memcg)
5417{ 5461{
5418 unsigned long usage, low_usage, siblings_low_usage;
5419 unsigned long elow, parent_elow;
5420 struct mem_cgroup *parent; 5462 struct mem_cgroup *parent;
5463 unsigned long emin, parent_emin;
5464 unsigned long elow, parent_elow;
5465 unsigned long usage;
5421 5466
5422 if (mem_cgroup_disabled()) 5467 if (mem_cgroup_disabled())
5423 return false; 5468 return MEMCG_PROT_NONE;
5424 5469
5425 if (!root) 5470 if (!root)
5426 root = root_mem_cgroup; 5471 root = root_mem_cgroup;
5427 if (memcg == root) 5472 if (memcg == root)
5428 return false; 5473 return MEMCG_PROT_NONE;
5429 5474
5430 elow = memcg->memory.low;
5431 usage = page_counter_read(&memcg->memory); 5475 usage = page_counter_read(&memcg->memory);
5432 parent = parent_mem_cgroup(memcg); 5476 if (!usage)
5477 return MEMCG_PROT_NONE;
5478
5479 emin = memcg->memory.min;
5480 elow = memcg->memory.low;
5433 5481
5482 parent = parent_mem_cgroup(memcg);
5434 if (parent == root) 5483 if (parent == root)
5435 goto exit; 5484 goto exit;
5436 5485
5486 parent_emin = READ_ONCE(parent->memory.emin);
5487 emin = min(emin, parent_emin);
5488 if (emin && parent_emin) {
5489 unsigned long min_usage, siblings_min_usage;
5490
5491 min_usage = min(usage, memcg->memory.min);
5492 siblings_min_usage = atomic_long_read(
5493 &parent->memory.children_min_usage);
5494
5495 if (min_usage && siblings_min_usage)
5496 emin = min(emin, parent_emin * min_usage /
5497 siblings_min_usage);
5498 }
5499
5437 parent_elow = READ_ONCE(parent->memory.elow); 5500 parent_elow = READ_ONCE(parent->memory.elow);
5438 elow = min(elow, parent_elow); 5501 elow = min(elow, parent_elow);
5502 if (elow && parent_elow) {
5503 unsigned long low_usage, siblings_low_usage;
5439 5504
5440 if (!elow || !parent_elow) 5505 low_usage = min(usage, memcg->memory.low);
5441 goto exit; 5506 siblings_low_usage = atomic_long_read(
5507 &parent->memory.children_low_usage);
5442 5508
5443 low_usage = min(usage, memcg->memory.low); 5509 if (low_usage && siblings_low_usage)
5444 siblings_low_usage = atomic_long_read( 5510 elow = min(elow, parent_elow * low_usage /
5445 &parent->memory.children_low_usage); 5511 siblings_low_usage);
5446 5512 }
5447 if (!low_usage || !siblings_low_usage)
5448 goto exit;
5449 5513
5450 elow = min(elow, parent_elow * low_usage / siblings_low_usage);
5451exit: 5514exit:
5515 memcg->memory.emin = emin;
5452 memcg->memory.elow = elow; 5516 memcg->memory.elow = elow;
5453 return usage && usage <= elow; 5517
5518 if (usage <= emin)
5519 return MEMCG_PROT_MIN;
5520 else if (usage <= elow)
5521 return MEMCG_PROT_LOW;
5522 else
5523 return MEMCG_PROT_NONE;
5454} 5524}
5455 5525
5456/** 5526/**
diff --git a/mm/page_counter.c b/mm/page_counter.c
index a5ff4cbc355a..de31470655f6 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -13,26 +13,38 @@
13#include <linux/bug.h> 13#include <linux/bug.h>
14#include <asm/page.h> 14#include <asm/page.h>
15 15
16static void propagate_low_usage(struct page_counter *c, unsigned long usage) 16static void propagate_protected_usage(struct page_counter *c,
17 unsigned long usage)
17{ 18{
18 unsigned long low_usage, old; 19 unsigned long protected, old_protected;
19 long delta; 20 long delta;
20 21
21 if (!c->parent) 22 if (!c->parent)
22 return; 23 return;
23 24
24 if (!c->low && !atomic_long_read(&c->low_usage)) 25 if (c->min || atomic_long_read(&c->min_usage)) {
25 return; 26 if (usage <= c->min)
27 protected = usage;
28 else
29 protected = 0;
30
31 old_protected = atomic_long_xchg(&c->min_usage, protected);
32 delta = protected - old_protected;
33 if (delta)
34 atomic_long_add(delta, &c->parent->children_min_usage);
35 }
26 36
27 if (usage <= c->low) 37 if (c->low || atomic_long_read(&c->low_usage)) {
28 low_usage = usage; 38 if (usage <= c->low)
29 else 39 protected = usage;
30 low_usage = 0; 40 else
41 protected = 0;
31 42
32 old = atomic_long_xchg(&c->low_usage, low_usage); 43 old_protected = atomic_long_xchg(&c->low_usage, protected);
33 delta = low_usage - old; 44 delta = protected - old_protected;
34 if (delta) 45 if (delta)
35 atomic_long_add(delta, &c->parent->children_low_usage); 46 atomic_long_add(delta, &c->parent->children_low_usage);
47 }
36} 48}
37 49
38/** 50/**
@@ -45,7 +57,7 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
45 long new; 57 long new;
46 58
47 new = atomic_long_sub_return(nr_pages, &counter->usage); 59 new = atomic_long_sub_return(nr_pages, &counter->usage);
48 propagate_low_usage(counter, new); 60 propagate_protected_usage(counter, new);
49 /* More uncharges than charges? */ 61 /* More uncharges than charges? */
50 WARN_ON_ONCE(new < 0); 62 WARN_ON_ONCE(new < 0);
51} 63}
@@ -65,7 +77,7 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
65 long new; 77 long new;
66 78
67 new = atomic_long_add_return(nr_pages, &c->usage); 79 new = atomic_long_add_return(nr_pages, &c->usage);
68 propagate_low_usage(counter, new); 80 propagate_protected_usage(counter, new);
69 /* 81 /*
70 * This is indeed racy, but we can live with some 82 * This is indeed racy, but we can live with some
71 * inaccuracy in the watermark. 83 * inaccuracy in the watermark.
@@ -109,7 +121,7 @@ bool page_counter_try_charge(struct page_counter *counter,
109 new = atomic_long_add_return(nr_pages, &c->usage); 121 new = atomic_long_add_return(nr_pages, &c->usage);
110 if (new > c->max) { 122 if (new > c->max) {
111 atomic_long_sub(nr_pages, &c->usage); 123 atomic_long_sub(nr_pages, &c->usage);
112 propagate_low_usage(counter, new); 124 propagate_protected_usage(counter, new);
113 /* 125 /*
114 * This is racy, but we can live with some 126 * This is racy, but we can live with some
115 * inaccuracy in the failcnt. 127 * inaccuracy in the failcnt.
@@ -118,7 +130,7 @@ bool page_counter_try_charge(struct page_counter *counter,
118 *fail = c; 130 *fail = c;
119 goto failed; 131 goto failed;
120 } 132 }
121 propagate_low_usage(counter, new); 133 propagate_protected_usage(counter, new);
122 /* 134 /*
123 * Just like with failcnt, we can live with some 135 * Just like with failcnt, we can live with some
124 * inaccuracy in the watermark. 136 * inaccuracy in the watermark.
@@ -191,6 +203,23 @@ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
191} 203}
192 204
193/** 205/**
206 * page_counter_set_min - set the amount of protected memory
207 * @counter: counter
208 * @nr_pages: value to set
209 *
210 * The caller must serialize invocations on the same counter.
211 */
212void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
213{
214 struct page_counter *c;
215
216 counter->min = nr_pages;
217
218 for (c = counter; c; c = c->parent)
219 propagate_protected_usage(c, atomic_long_read(&c->usage));
220}
221
222/**
194 * page_counter_set_low - set the amount of protected memory 223 * page_counter_set_low - set the amount of protected memory
195 * @counter: counter 224 * @counter: counter
196 * @nr_pages: value to set 225 * @nr_pages: value to set
@@ -204,7 +233,7 @@ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
204 counter->low = nr_pages; 233 counter->low = nr_pages;
205 234
206 for (c = counter; c; c = c->parent) 235 for (c = counter; c; c = c->parent)
207 propagate_low_usage(c, atomic_long_read(&c->usage)); 236 propagate_protected_usage(c, atomic_long_read(&c->usage));
208} 237}
209 238
210/** 239/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0e67b477ecef..03822f86f288 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2544,12 +2544,28 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
2544 unsigned long reclaimed; 2544 unsigned long reclaimed;
2545 unsigned long scanned; 2545 unsigned long scanned;
2546 2546
2547 if (mem_cgroup_low(root, memcg)) { 2547 switch (mem_cgroup_protected(root, memcg)) {
2548 case MEMCG_PROT_MIN:
2549 /*
2550 * Hard protection.
2551 * If there is no reclaimable memory, OOM.
2552 */
2553 continue;
2554 case MEMCG_PROT_LOW:
2555 /*
2556 * Soft protection.
2557 * Respect the protection only as long as
2558 * there is an unprotected supply
2559 * of reclaimable memory from other cgroups.
2560 */
2548 if (!sc->memcg_low_reclaim) { 2561 if (!sc->memcg_low_reclaim) {
2549 sc->memcg_low_skipped = 1; 2562 sc->memcg_low_skipped = 1;
2550 continue; 2563 continue;
2551 } 2564 }
2552 memcg_memory_event(memcg, MEMCG_LOW); 2565 memcg_memory_event(memcg, MEMCG_LOW);
2566 break;
2567 case MEMCG_PROT_NONE:
2568 break;
2553 } 2569 }
2554 2570
2555 reclaimed = sc->nr_reclaimed; 2571 reclaimed = sc->nr_reclaimed;