aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAnton Vorontsov <anton.vorontsov@linaro.org>2013-04-29 18:08:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 18:54:38 -0400
commit70ddf637eebe47e61fb2be08a59315581b6d2f38 (patch)
tree7fdb9e04da11c191daa225cad2314e440effc176 /mm
parent84d96d897671cfb386e722acbefdb3a79e115a8a (diff)
memcg: add memory.pressure_level events
With this patch userland applications that want to maintain the interactivity/memory allocation cost can use the pressure level notifications. The levels are defined like this: The "low" level means that the system is reclaiming memory for new allocations. Monitoring this reclaiming activity might be useful for maintaining cache level. Upon notification, the program (typically "Activity Manager") might analyze vmstat and act in advance (i.e. prematurely shutdown unimportant services). The "medium" level means that the system is experiencing medium memory pressure, the system might be making swap, paging out active file caches, etc. Upon this event applications may decide to further analyze vmstat/zoneinfo/memcg or internal memory usage statistics and free any resources that can be easily reconstructed or re-read from a disk. The "critical" level means that the system is actively thrashing, it is about to out of memory (OOM) or even the in-kernel OOM killer is on its way to trigger. Applications should do whatever they can to help the system. It might be too late to consult with vmstat or any other statistics, so it's advisable to take an immediate action. The events are propagated upward until the event is handled, i.e. the events are not pass-through. Here is what this means: for example you have three cgroups: A->B->C. Now you set up an event listener on cgroups A, B and C, and suppose group C experiences some pressure. In this situation, only group C will receive the notification, i.e. groups A and B will not receive it. This is done to avoid excessive "broadcasting" of messages, which disturbs the system and which is especially bad if we are low on memory or thrashing. So, organize the cgroups wisely, or propagate the events manually (or, ask us to implement the pass-through events, explaining why would you need them.) Performance wise, the memory pressure notifications feature itself is lightweight and does not require much of bookkeeping, in contrast to the rest of memcg features. Unfortunately, as of current memcg implementation, pages accounting is an inseparable part and cannot be turned off. The good news is that there are some efforts[1] to improve the situation; plus, implementing the same, fully API-compatible[2] interface for CONFIG_MEMCG=n case (e.g. embedded) is also a viable option, so it will not require any changes on the userland side. [1] http://permalink.gmane.org/gmane.linux.kernel.cgroups/6291 [2] http://lkml.org/lkml/2013/2/21/454 [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix CONFIG_CGROPUPS=n warnings] Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org> Acked-by: Kirill A. Shutemov <kirill@shutemov.name> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Tejun Heo <tj@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Glauber Costa <glommer@parallels.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Luiz Capitulino <lcapitulino@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Leonid Moiseichuk <leonid.moiseichuk@nokia.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Cc: John Stultz <john.stultz@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/memcontrol.c29
-rw-r--r--mm/vmpressure.c374
-rw-r--r--mm/vmscan.c8
4 files changed, 412 insertions, 1 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 3a4628751f89..72c5acb9345f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -50,7 +50,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
50obj-$(CONFIG_MIGRATION) += migrate.o 50obj-$(CONFIG_MIGRATION) += migrate.o
51obj-$(CONFIG_QUICKLIST) += quicklist.o 51obj-$(CONFIG_QUICKLIST) += quicklist.o
52obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o 52obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
53obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o 53obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
54obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o 54obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
55obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o 55obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
56obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o 56obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7e5bc43c2d1f..360464f40e96 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -49,6 +49,7 @@
49#include <linux/fs.h> 49#include <linux/fs.h>
50#include <linux/seq_file.h> 50#include <linux/seq_file.h>
51#include <linux/vmalloc.h> 51#include <linux/vmalloc.h>
52#include <linux/vmpressure.h>
52#include <linux/mm_inline.h> 53#include <linux/mm_inline.h>
53#include <linux/page_cgroup.h> 54#include <linux/page_cgroup.h>
54#include <linux/cpu.h> 55#include <linux/cpu.h>
@@ -261,6 +262,9 @@ struct mem_cgroup {
261 */ 262 */
262 struct res_counter res; 263 struct res_counter res;
263 264
265 /* vmpressure notifications */
266 struct vmpressure vmpressure;
267
264 union { 268 union {
265 /* 269 /*
266 * the counter to account for mem+swap usage. 270 * the counter to account for mem+swap usage.
@@ -359,6 +363,7 @@ struct mem_cgroup {
359 atomic_t numainfo_events; 363 atomic_t numainfo_events;
360 atomic_t numainfo_updating; 364 atomic_t numainfo_updating;
361#endif 365#endif
366
362 /* 367 /*
363 * Per cgroup active and inactive list, similar to the 368 * Per cgroup active and inactive list, similar to the
364 * per zone LRU lists. 369 * per zone LRU lists.
@@ -510,6 +515,24 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
510 return container_of(s, struct mem_cgroup, css); 515 return container_of(s, struct mem_cgroup, css);
511} 516}
512 517
518/* Some nice accessors for the vmpressure. */
519struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
520{
521 if (!memcg)
522 memcg = root_mem_cgroup;
523 return &memcg->vmpressure;
524}
525
526struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
527{
528 return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
529}
530
531struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
532{
533 return &mem_cgroup_from_css(css)->vmpressure;
534}
535
513static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 536static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
514{ 537{
515 return (memcg == root_mem_cgroup); 538 return (memcg == root_mem_cgroup);
@@ -5907,6 +5930,11 @@ static struct cftype mem_cgroup_files[] = {
5907 .unregister_event = mem_cgroup_oom_unregister_event, 5930 .unregister_event = mem_cgroup_oom_unregister_event,
5908 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), 5931 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
5909 }, 5932 },
5933 {
5934 .name = "pressure_level",
5935 .register_event = vmpressure_register_event,
5936 .unregister_event = vmpressure_unregister_event,
5937 },
5910#ifdef CONFIG_NUMA 5938#ifdef CONFIG_NUMA
5911 { 5939 {
5912 .name = "numa_stat", 5940 .name = "numa_stat",
@@ -6188,6 +6216,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
6188 memcg->move_charge_at_immigrate = 0; 6216 memcg->move_charge_at_immigrate = 0;
6189 mutex_init(&memcg->thresholds_lock); 6217 mutex_init(&memcg->thresholds_lock);
6190 spin_lock_init(&memcg->move_lock); 6218 spin_lock_init(&memcg->move_lock);
6219 vmpressure_init(&memcg->vmpressure);
6191 6220
6192 return &memcg->css; 6221 return &memcg->css;
6193 6222
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
new file mode 100644
index 000000000000..736a6011c2c8
--- /dev/null
+++ b/mm/vmpressure.c
@@ -0,0 +1,374 @@
1/*
2 * Linux VM pressure
3 *
4 * Copyright 2012 Linaro Ltd.
5 * Anton Vorontsov <anton.vorontsov@linaro.org>
6 *
7 * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro,
8 * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License version 2 as published
12 * by the Free Software Foundation.
13 */
14
15#include <linux/cgroup.h>
16#include <linux/fs.h>
17#include <linux/log2.h>
18#include <linux/sched.h>
19#include <linux/mm.h>
20#include <linux/vmstat.h>
21#include <linux/eventfd.h>
22#include <linux/swap.h>
23#include <linux/printk.h>
24#include <linux/vmpressure.h>
25
26/*
27 * The window size (vmpressure_win) is the number of scanned pages before
28 * we try to analyze scanned/reclaimed ratio. So the window is used as a
29 * rate-limit tunable for the "low" level notification, and also for
30 * averaging the ratio for medium/critical levels. Using small window
31 * sizes can cause lot of false positives, but too big window size will
32 * delay the notifications.
33 *
34 * As the vmscan reclaimer logic works with chunks which are multiple of
35 * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well.
36 *
37 * TODO: Make the window size depend on machine size, as we do for vmstat
38 * thresholds. Currently we set it to 512 pages (2MB for 4KB pages).
39 */
40static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
41
42/*
43 * These thresholds are used when we account memory pressure through
44 * scanned/reclaimed ratio. The current values were chosen empirically. In
45 * essence, they are percents: the higher the value, the more number
46 * unsuccessful reclaims there were.
47 */
48static const unsigned int vmpressure_level_med = 60;
49static const unsigned int vmpressure_level_critical = 95;
50
51/*
52 * When there are too little pages left to scan, vmpressure() may miss the
53 * critical pressure as number of pages will be less than "window size".
54 * However, in that case the vmscan priority will raise fast as the
55 * reclaimer will try to scan LRUs more deeply.
56 *
57 * The vmscan logic considers these special priorities:
58 *
59 * prio == DEF_PRIORITY (12): reclaimer starts with that value
60 * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed
61 * prio == 0 : close to OOM, kernel scans every page in an lru
62 *
63 * Any value in this range is acceptable for this tunable (i.e. from 12 to
64 * 0). Current value for the vmpressure_level_critical_prio is chosen
65 * empirically, but the number, in essence, means that we consider
66 * critical level when scanning depth is ~10% of the lru size (vmscan
67 * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one
68 * eights).
69 */
70static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10);
71
72static struct vmpressure *work_to_vmpressure(struct work_struct *work)
73{
74 return container_of(work, struct vmpressure, work);
75}
76
77static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
78{
79 return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id));
80}
81
82static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
83{
84 struct cgroup *cg = vmpressure_to_css(vmpr)->cgroup;
85 struct mem_cgroup *memcg = mem_cgroup_from_cont(cg);
86
87 memcg = parent_mem_cgroup(memcg);
88 if (!memcg)
89 return NULL;
90 return memcg_to_vmpressure(memcg);
91}
92
93enum vmpressure_levels {
94 VMPRESSURE_LOW = 0,
95 VMPRESSURE_MEDIUM,
96 VMPRESSURE_CRITICAL,
97 VMPRESSURE_NUM_LEVELS,
98};
99
100static const char * const vmpressure_str_levels[] = {
101 [VMPRESSURE_LOW] = "low",
102 [VMPRESSURE_MEDIUM] = "medium",
103 [VMPRESSURE_CRITICAL] = "critical",
104};
105
106static enum vmpressure_levels vmpressure_level(unsigned long pressure)
107{
108 if (pressure >= vmpressure_level_critical)
109 return VMPRESSURE_CRITICAL;
110 else if (pressure >= vmpressure_level_med)
111 return VMPRESSURE_MEDIUM;
112 return VMPRESSURE_LOW;
113}
114
115static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
116 unsigned long reclaimed)
117{
118 unsigned long scale = scanned + reclaimed;
119 unsigned long pressure;
120
121 /*
122 * We calculate the ratio (in percents) of how many pages were
123 * scanned vs. reclaimed in a given time frame (window). Note that
124 * time is in VM reclaimer's "ticks", i.e. number of pages
125 * scanned. This makes it possible to set desired reaction time
126 * and serves as a ratelimit.
127 */
128 pressure = scale - (reclaimed * scale / scanned);
129 pressure = pressure * 100 / scale;
130
131 pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure,
132 scanned, reclaimed);
133
134 return vmpressure_level(pressure);
135}
136
137struct vmpressure_event {
138 struct eventfd_ctx *efd;
139 enum vmpressure_levels level;
140 struct list_head node;
141};
142
143static bool vmpressure_event(struct vmpressure *vmpr,
144 unsigned long scanned, unsigned long reclaimed)
145{
146 struct vmpressure_event *ev;
147 enum vmpressure_levels level;
148 bool signalled = false;
149
150 level = vmpressure_calc_level(scanned, reclaimed);
151
152 mutex_lock(&vmpr->events_lock);
153
154 list_for_each_entry(ev, &vmpr->events, node) {
155 if (level >= ev->level) {
156 eventfd_signal(ev->efd, 1);
157 signalled = true;
158 }
159 }
160
161 mutex_unlock(&vmpr->events_lock);
162
163 return signalled;
164}
165
166static void vmpressure_work_fn(struct work_struct *work)
167{
168 struct vmpressure *vmpr = work_to_vmpressure(work);
169 unsigned long scanned;
170 unsigned long reclaimed;
171
172 /*
173 * Several contexts might be calling vmpressure(), so it is
174 * possible that the work was rescheduled again before the old
175 * work context cleared the counters. In that case we will run
176 * just after the old work returns, but then scanned might be zero
177 * here. No need for any locks here since we don't care if
178 * vmpr->reclaimed is in sync.
179 */
180 if (!vmpr->scanned)
181 return;
182
183 mutex_lock(&vmpr->sr_lock);
184 scanned = vmpr->scanned;
185 reclaimed = vmpr->reclaimed;
186 vmpr->scanned = 0;
187 vmpr->reclaimed = 0;
188 mutex_unlock(&vmpr->sr_lock);
189
190 do {
191 if (vmpressure_event(vmpr, scanned, reclaimed))
192 break;
193 /*
194 * If not handled, propagate the event upward into the
195 * hierarchy.
196 */
197 } while ((vmpr = vmpressure_parent(vmpr)));
198}
199
200/**
201 * vmpressure() - Account memory pressure through scanned/reclaimed ratio
202 * @gfp: reclaimer's gfp mask
203 * @memcg: cgroup memory controller handle
204 * @scanned: number of pages scanned
205 * @reclaimed: number of pages reclaimed
206 *
207 * This function should be called from the vmscan reclaim path to account
208 * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
209 * pressure index is then further refined and averaged over time.
210 *
211 * This function does not return any value.
212 */
213void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
214 unsigned long scanned, unsigned long reclaimed)
215{
216 struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
217
218 /*
219 * Here we only want to account pressure that userland is able to
220 * help us with. For example, suppose that DMA zone is under
221 * pressure; if we notify userland about that kind of pressure,
222 * then it will be mostly a waste as it will trigger unnecessary
223 * freeing of memory by userland (since userland is more likely to
224 * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That
225 * is why we include only movable, highmem and FS/IO pages.
226 * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so
227 * we account it too.
228 */
229 if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
230 return;
231
232 /*
233 * If we got here with no pages scanned, then that is an indicator
234 * that reclaimer was unable to find any shrinkable LRUs at the
235 * current scanning depth. But it does not mean that we should
236 * report the critical pressure, yet. If the scanning priority
237 * (scanning depth) goes too high (deep), we will be notified
238 * through vmpressure_prio(). But so far, keep calm.
239 */
240 if (!scanned)
241 return;
242
243 mutex_lock(&vmpr->sr_lock);
244 vmpr->scanned += scanned;
245 vmpr->reclaimed += reclaimed;
246 scanned = vmpr->scanned;
247 mutex_unlock(&vmpr->sr_lock);
248
249 if (scanned < vmpressure_win || work_pending(&vmpr->work))
250 return;
251 schedule_work(&vmpr->work);
252}
253
254/**
255 * vmpressure_prio() - Account memory pressure through reclaimer priority level
256 * @gfp: reclaimer's gfp mask
257 * @memcg: cgroup memory controller handle
258 * @prio: reclaimer's priority
259 *
260 * This function should be called from the reclaim path every time when
261 * the vmscan's reclaiming priority (scanning depth) changes.
262 *
263 * This function does not return any value.
264 */
265void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
266{
267 /*
268 * We only use prio for accounting critical level. For more info
269 * see comment for vmpressure_level_critical_prio variable above.
270 */
271 if (prio > vmpressure_level_critical_prio)
272 return;
273
274 /*
275 * OK, the prio is below the threshold, updating vmpressure
276 * information before shrinker dives into long shrinking of long
277 * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0
278 * to the vmpressure() basically means that we signal 'critical'
279 * level.
280 */
281 vmpressure(gfp, memcg, vmpressure_win, 0);
282}
283
284/**
285 * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
286 * @cg: cgroup that is interested in vmpressure notifications
287 * @cft: cgroup control files handle
288 * @eventfd: eventfd context to link notifications with
289 * @args: event arguments (used to set up a pressure level threshold)
290 *
291 * This function associates eventfd context with the vmpressure
292 * infrastructure, so that the notifications will be delivered to the
293 * @eventfd. The @args parameter is a string that denotes pressure level
294 * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
295 * "critical").
296 *
297 * This function should not be used directly, just pass it to (struct
298 * cftype).register_event, and then cgroup core will handle everything by
299 * itself.
300 */
301int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
302 struct eventfd_ctx *eventfd, const char *args)
303{
304 struct vmpressure *vmpr = cg_to_vmpressure(cg);
305 struct vmpressure_event *ev;
306 int level;
307
308 for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) {
309 if (!strcmp(vmpressure_str_levels[level], args))
310 break;
311 }
312
313 if (level >= VMPRESSURE_NUM_LEVELS)
314 return -EINVAL;
315
316 ev = kzalloc(sizeof(*ev), GFP_KERNEL);
317 if (!ev)
318 return -ENOMEM;
319
320 ev->efd = eventfd;
321 ev->level = level;
322
323 mutex_lock(&vmpr->events_lock);
324 list_add(&ev->node, &vmpr->events);
325 mutex_unlock(&vmpr->events_lock);
326
327 return 0;
328}
329
330/**
331 * vmpressure_unregister_event() - Unbind eventfd from vmpressure
332 * @cg: cgroup handle
333 * @cft: cgroup control files handle
334 * @eventfd: eventfd context that was used to link vmpressure with the @cg
335 *
336 * This function does internal manipulations to detach the @eventfd from
337 * the vmpressure notifications, and then frees internal resources
338 * associated with the @eventfd (but the @eventfd itself is not freed).
339 *
340 * This function should not be used directly, just pass it to (struct
341 * cftype).unregister_event, and then cgroup core will handle everything
342 * by itself.
343 */
344void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
345 struct eventfd_ctx *eventfd)
346{
347 struct vmpressure *vmpr = cg_to_vmpressure(cg);
348 struct vmpressure_event *ev;
349
350 mutex_lock(&vmpr->events_lock);
351 list_for_each_entry(ev, &vmpr->events, node) {
352 if (ev->efd != eventfd)
353 continue;
354 list_del(&ev->node);
355 kfree(ev);
356 break;
357 }
358 mutex_unlock(&vmpr->events_lock);
359}
360
361/**
362 * vmpressure_init() - Initialize vmpressure control structure
363 * @vmpr: Structure to be initialized
364 *
365 * This function should be called on every allocated vmpressure structure
366 * before any usage.
367 */
368void vmpressure_init(struct vmpressure *vmpr)
369{
370 mutex_init(&vmpr->sr_lock);
371 mutex_init(&vmpr->events_lock);
372 INIT_LIST_HEAD(&vmpr->events);
373 INIT_WORK(&vmpr->work, vmpressure_work_fn);
374}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e03a00b09da9..e53e49584cf3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -19,6 +19,7 @@
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/highmem.h> 21#include <linux/highmem.h>
22#include <linux/vmpressure.h>
22#include <linux/vmstat.h> 23#include <linux/vmstat.h>
23#include <linux/file.h> 24#include <linux/file.h>
24#include <linux/writeback.h> 25#include <linux/writeback.h>
@@ -1982,6 +1983,11 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
1982 } 1983 }
1983 memcg = mem_cgroup_iter(root, memcg, &reclaim); 1984 memcg = mem_cgroup_iter(root, memcg, &reclaim);
1984 } while (memcg); 1985 } while (memcg);
1986
1987 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
1988 sc->nr_scanned - nr_scanned,
1989 sc->nr_reclaimed - nr_reclaimed);
1990
1985 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, 1991 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
1986 sc->nr_scanned - nr_scanned, sc)); 1992 sc->nr_scanned - nr_scanned, sc));
1987} 1993}
@@ -2167,6 +2173,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2167 count_vm_event(ALLOCSTALL); 2173 count_vm_event(ALLOCSTALL);
2168 2174
2169 do { 2175 do {
2176 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
2177 sc->priority);
2170 sc->nr_scanned = 0; 2178 sc->nr_scanned = 0;
2171 aborted_reclaim = shrink_zones(zonelist, sc); 2179 aborted_reclaim = shrink_zones(zonelist, sc);
2172 2180