summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@virtuozzo.com>2016-10-07 19:57:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-07 21:46:26 -0400
commit7c5f64f84483bd13886348edda8b3e7b799a7fdb (patch)
tree426501049f6999c5f1a8c95ad17926198003c466
parent48e509ece97e00b68e52d1d18e3e4b809c5b3991 (diff)
mm: oom: deduplicate victim selection code for memcg and global oom
When selecting an oom victim, we use the same heuristic for both memory cgroup and global oom. The only difference is the scope of tasks to select the victim from. So we could just export an iterator over all memcg tasks and keep all oom related logic in oom_kill.c, but instead we duplicate pieces of it in memcontrol.c reusing some initially private functions of oom_kill.c in order to not duplicate all of it. That looks ugly and error prone, because any modification of select_bad_process should also be propagated to mem_cgroup_out_of_memory. Let's rework this as follows: keep all oom heuristic related code private to oom_kill.c and make oom_kill.c use exported memcg functions when it's really necessary (like in case of iterating over memcg tasks). Link: http://lkml.kernel.org/r/1470056933-7505-1-git-send-email-vdavydov@virtuozzo.com Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h15
-rw-r--r--include/linux/oom.h43
-rw-r--r--mm/memcontrol.c114
-rw-r--r--mm/oom_kill.c200
4 files changed, 167 insertions, 205 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5d8ca6e02e39..0710143723bc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
366 struct mem_cgroup *, 366 struct mem_cgroup *,
367 struct mem_cgroup_reclaim_cookie *); 367 struct mem_cgroup_reclaim_cookie *);
368void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 368void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
369int mem_cgroup_scan_tasks(struct mem_cgroup *,
370 int (*)(struct task_struct *, void *), void *);
369 371
370static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 372static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
371{ 373{
@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
446 448
447void mem_cgroup_handle_over_high(void); 449void mem_cgroup_handle_over_high(void);
448 450
451unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
452
449void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 453void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
450 struct task_struct *p); 454 struct task_struct *p);
451 455
@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
639{ 643{
640} 644}
641 645
646static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
647 int (*fn)(struct task_struct *, void *), void *arg)
648{
649 return 0;
650}
651
642static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 652static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
643{ 653{
644 return 0; 654 return 0;
@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
669 return 0; 679 return 0;
670} 680}
671 681
682static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
683{
684 return 0;
685}
686
672static inline void 687static inline void
673mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) 688mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
674{ 689{
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bc0457ee3a8..17946e5121b6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -34,23 +34,11 @@ struct oom_control {
34 * for display purposes. 34 * for display purposes.
35 */ 35 */
36 const int order; 36 const int order;
37};
38 37
39/* 38 /* Used by oom implementation, do not set */
40 * Types of limitations to the nodes from which allocations may occur 39 unsigned long totalpages;
41 */ 40 struct task_struct *chosen;
42enum oom_constraint { 41 unsigned long chosen_points;
43 CONSTRAINT_NONE,
44 CONSTRAINT_CPUSET,
45 CONSTRAINT_MEMORY_POLICY,
46 CONSTRAINT_MEMCG,
47};
48
49enum oom_scan_t {
50 OOM_SCAN_OK, /* scan thread and find its badness */
51 OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */
52 OOM_SCAN_ABORT, /* abort the iteration and return */
53 OOM_SCAN_SELECT, /* always select this thread first */
54}; 42};
55 43
56extern struct mutex oom_lock; 44extern struct mutex oom_lock;
@@ -70,30 +58,10 @@ static inline bool oom_task_origin(const struct task_struct *p)
70 return p->signal->oom_flag_origin; 58 return p->signal->oom_flag_origin;
71} 59}
72 60
73extern void mark_oom_victim(struct task_struct *tsk);
74
75#ifdef CONFIG_MMU
76extern void wake_oom_reaper(struct task_struct *tsk);
77#else
78static inline void wake_oom_reaper(struct task_struct *tsk)
79{
80}
81#endif
82
83extern unsigned long oom_badness(struct task_struct *p, 61extern unsigned long oom_badness(struct task_struct *p,
84 struct mem_cgroup *memcg, const nodemask_t *nodemask, 62 struct mem_cgroup *memcg, const nodemask_t *nodemask,
85 unsigned long totalpages); 63 unsigned long totalpages);
86 64
87extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
88 unsigned int points, unsigned long totalpages,
89 const char *message);
90
91extern void check_panic_on_oom(struct oom_control *oc,
92 enum oom_constraint constraint);
93
94extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
95 struct task_struct *task);
96
97extern bool out_of_memory(struct oom_control *oc); 65extern bool out_of_memory(struct oom_control *oc);
98 66
99extern void exit_oom_victim(struct task_struct *tsk); 67extern void exit_oom_victim(struct task_struct *tsk);
@@ -101,14 +69,11 @@ extern void exit_oom_victim(struct task_struct *tsk);
101extern int register_oom_notifier(struct notifier_block *nb); 69extern int register_oom_notifier(struct notifier_block *nb);
102extern int unregister_oom_notifier(struct notifier_block *nb); 70extern int unregister_oom_notifier(struct notifier_block *nb);
103 71
104extern bool oom_killer_disabled;
105extern bool oom_killer_disable(void); 72extern bool oom_killer_disable(void);
106extern void oom_killer_enable(void); 73extern void oom_killer_enable(void);
107 74
108extern struct task_struct *find_lock_task_mm(struct task_struct *p); 75extern struct task_struct *find_lock_task_mm(struct task_struct *p);
109 76
110bool task_will_free_mem(struct task_struct *task);
111
112/* sysctls */ 77/* sysctls */
113extern int sysctl_oom_dump_tasks; 78extern int sysctl_oom_dump_tasks;
114extern int sysctl_oom_kill_allocating_task; 79extern int sysctl_oom_kill_allocating_task;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4be518d4e68a..48747ef5b88f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -921,6 +921,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
921 iter = mem_cgroup_iter(NULL, iter, NULL)) 921 iter = mem_cgroup_iter(NULL, iter, NULL))
922 922
923/** 923/**
924 * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
925 * @memcg: hierarchy root
926 * @fn: function to call for each task
927 * @arg: argument passed to @fn
928 *
929 * This function iterates over tasks attached to @memcg or to any of its
930 * descendants and calls @fn for each task. If @fn returns a non-zero
931 * value, the function breaks the iteration loop and returns the value.
932 * Otherwise, it will iterate over all tasks and return 0.
933 *
934 * This function must not be called for the root memory cgroup.
935 */
936int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
937 int (*fn)(struct task_struct *, void *), void *arg)
938{
939 struct mem_cgroup *iter;
940 int ret = 0;
941
942 BUG_ON(memcg == root_mem_cgroup);
943
944 for_each_mem_cgroup_tree(iter, memcg) {
945 struct css_task_iter it;
946 struct task_struct *task;
947
948 css_task_iter_start(&iter->css, &it);
949 while (!ret && (task = css_task_iter_next(&it)))
950 ret = fn(task, arg);
951 css_task_iter_end(&it);
952 if (ret) {
953 mem_cgroup_iter_break(memcg, iter);
954 break;
955 }
956 }
957 return ret;
958}
959
960/**
924 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page 961 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
925 * @page: the page 962 * @page: the page
926 * @zone: zone of the page 963 * @zone: zone of the page
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1178/* 1215/*
1179 * Return the memory (and swap, if configured) limit for a memcg. 1216 * Return the memory (and swap, if configured) limit for a memcg.
1180 */ 1217 */
1181static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) 1218unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
1182{ 1219{
1183 unsigned long limit; 1220 unsigned long limit;
1184 1221
@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1205 .gfp_mask = gfp_mask, 1242 .gfp_mask = gfp_mask,
1206 .order = order, 1243 .order = order,
1207 }; 1244 };
1208 struct mem_cgroup *iter; 1245 bool ret;
1209 unsigned long chosen_points = 0;
1210 unsigned long totalpages;
1211 unsigned int points = 0;
1212 struct task_struct *chosen = NULL;
1213 1246
1214 mutex_lock(&oom_lock); 1247 mutex_lock(&oom_lock);
1215 1248 ret = out_of_memory(&oc);
1216 /*
1217 * If current has a pending SIGKILL or is exiting, then automatically
1218 * select it. The goal is to allow it to allocate so that it may
1219 * quickly exit and free its memory.
1220 */
1221 if (task_will_free_mem(current)) {
1222 mark_oom_victim(current);
1223 wake_oom_reaper(current);
1224 goto unlock;
1225 }
1226
1227 check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
1228 totalpages = mem_cgroup_get_limit(memcg) ? : 1;
1229 for_each_mem_cgroup_tree(iter, memcg) {
1230 struct css_task_iter it;
1231 struct task_struct *task;
1232
1233 css_task_iter_start(&iter->css, &it);
1234 while ((task = css_task_iter_next(&it))) {
1235 switch (oom_scan_process_thread(&oc, task)) {
1236 case OOM_SCAN_SELECT:
1237 if (chosen)
1238 put_task_struct(chosen);
1239 chosen = task;
1240 chosen_points = ULONG_MAX;
1241 get_task_struct(chosen);
1242 /* fall through */
1243 case OOM_SCAN_CONTINUE:
1244 continue;
1245 case OOM_SCAN_ABORT:
1246 css_task_iter_end(&it);
1247 mem_cgroup_iter_break(memcg, iter);
1248 if (chosen)
1249 put_task_struct(chosen);
1250 /* Set a dummy value to return "true". */
1251 chosen = (void *) 1;
1252 goto unlock;
1253 case OOM_SCAN_OK:
1254 break;
1255 };
1256 points = oom_badness(task, memcg, NULL, totalpages);
1257 if (!points || points < chosen_points)
1258 continue;
1259 /* Prefer thread group leaders for display purposes */
1260 if (points == chosen_points &&
1261 thread_group_leader(chosen))
1262 continue;
1263
1264 if (chosen)
1265 put_task_struct(chosen);
1266 chosen = task;
1267 chosen_points = points;
1268 get_task_struct(chosen);
1269 }
1270 css_task_iter_end(&it);
1271 }
1272
1273 if (chosen) {
1274 points = chosen_points * 1000 / totalpages;
1275 oom_kill_process(&oc, chosen, points, totalpages,
1276 "Memory cgroup out of memory");
1277 }
1278unlock:
1279 mutex_unlock(&oom_lock); 1249 mutex_unlock(&oom_lock);
1280 return chosen; 1250 return ret;
1281} 1251}
1282 1252
1283#if MAX_NUMNODES > 1 1253#if MAX_NUMNODES > 1
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
1600 if (!memcg) 1570 if (!memcg)
1601 return false; 1571 return false;
1602 1572
1603 if (!handle || oom_killer_disabled) 1573 if (!handle)
1604 goto cleanup; 1574 goto cleanup;
1605 1575
1606 owait.memcg = memcg; 1576 owait.memcg = memcg;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d53a9aa00977..ef175518f05f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -132,6 +132,11 @@ static inline bool is_sysrq_oom(struct oom_control *oc)
132 return oc->order == -1; 132 return oc->order == -1;
133} 133}
134 134
135static inline bool is_memcg_oom(struct oom_control *oc)
136{
137 return oc->memcg != NULL;
138}
139
135/* return true if the task is not adequate as candidate victim task. */ 140/* return true if the task is not adequate as candidate victim task. */
136static bool oom_unkillable_task(struct task_struct *p, 141static bool oom_unkillable_task(struct task_struct *p,
137 struct mem_cgroup *memcg, const nodemask_t *nodemask) 142 struct mem_cgroup *memcg, const nodemask_t *nodemask)
@@ -213,12 +218,17 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
213 return points > 0 ? points : 1; 218 return points > 0 ? points : 1;
214} 219}
215 220
221enum oom_constraint {
222 CONSTRAINT_NONE,
223 CONSTRAINT_CPUSET,
224 CONSTRAINT_MEMORY_POLICY,
225 CONSTRAINT_MEMCG,
226};
227
216/* 228/*
217 * Determine the type of allocation constraint. 229 * Determine the type of allocation constraint.
218 */ 230 */
219#ifdef CONFIG_NUMA 231static enum oom_constraint constrained_alloc(struct oom_control *oc)
220static enum oom_constraint constrained_alloc(struct oom_control *oc,
221 unsigned long *totalpages)
222{ 232{
223 struct zone *zone; 233 struct zone *zone;
224 struct zoneref *z; 234 struct zoneref *z;
@@ -226,8 +236,16 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
226 bool cpuset_limited = false; 236 bool cpuset_limited = false;
227 int nid; 237 int nid;
228 238
239 if (is_memcg_oom(oc)) {
240 oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1;
241 return CONSTRAINT_MEMCG;
242 }
243
229 /* Default to all available memory */ 244 /* Default to all available memory */
230 *totalpages = totalram_pages + total_swap_pages; 245 oc->totalpages = totalram_pages + total_swap_pages;
246
247 if (!IS_ENABLED(CONFIG_NUMA))
248 return CONSTRAINT_NONE;
231 249
232 if (!oc->zonelist) 250 if (!oc->zonelist)
233 return CONSTRAINT_NONE; 251 return CONSTRAINT_NONE;
@@ -246,9 +264,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
246 */ 264 */
247 if (oc->nodemask && 265 if (oc->nodemask &&
248 !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { 266 !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
249 *totalpages = total_swap_pages; 267 oc->totalpages = total_swap_pages;
250 for_each_node_mask(nid, *oc->nodemask) 268 for_each_node_mask(nid, *oc->nodemask)
251 *totalpages += node_spanned_pages(nid); 269 oc->totalpages += node_spanned_pages(nid);
252 return CONSTRAINT_MEMORY_POLICY; 270 return CONSTRAINT_MEMORY_POLICY;
253 } 271 }
254 272
@@ -259,27 +277,21 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
259 cpuset_limited = true; 277 cpuset_limited = true;
260 278
261 if (cpuset_limited) { 279 if (cpuset_limited) {
262 *totalpages = total_swap_pages; 280 oc->totalpages = total_swap_pages;
263 for_each_node_mask(nid, cpuset_current_mems_allowed) 281 for_each_node_mask(nid, cpuset_current_mems_allowed)
264 *totalpages += node_spanned_pages(nid); 282 oc->totalpages += node_spanned_pages(nid);
265 return CONSTRAINT_CPUSET; 283 return CONSTRAINT_CPUSET;
266 } 284 }
267 return CONSTRAINT_NONE; 285 return CONSTRAINT_NONE;
268} 286}
269#else
270static enum oom_constraint constrained_alloc(struct oom_control *oc,
271 unsigned long *totalpages)
272{
273 *totalpages = totalram_pages + total_swap_pages;
274 return CONSTRAINT_NONE;
275}
276#endif
277 287
278enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, 288static int oom_evaluate_task(struct task_struct *task, void *arg)
279 struct task_struct *task)
280{ 289{
290 struct oom_control *oc = arg;
291 unsigned long points;
292
281 if (oom_unkillable_task(task, NULL, oc->nodemask)) 293 if (oom_unkillable_task(task, NULL, oc->nodemask))
282 return OOM_SCAN_CONTINUE; 294 goto next;
283 295
284 /* 296 /*
285 * This task already has access to memory reserves and is being killed. 297 * This task already has access to memory reserves and is being killed.
@@ -289,68 +301,67 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
289 */ 301 */
290 if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { 302 if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
291 struct task_struct *p = find_lock_task_mm(task); 303 struct task_struct *p = find_lock_task_mm(task);
292 enum oom_scan_t ret = OOM_SCAN_ABORT; 304 bool reaped = false;
293 305
294 if (p) { 306 if (p) {
295 if (test_bit(MMF_OOM_REAPED, &p->mm->flags)) 307 reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags);
296 ret = OOM_SCAN_CONTINUE;
297 task_unlock(p); 308 task_unlock(p);
298 } 309 }
299 310 if (reaped)
300 return ret; 311 goto next;
312 goto abort;
301 } 313 }
302 314
303 /* 315 /*
304 * If task is allocating a lot of memory and has been marked to be 316 * If task is allocating a lot of memory and has been marked to be
305 * killed first if it triggers an oom, then select it. 317 * killed first if it triggers an oom, then select it.
306 */ 318 */
307 if (oom_task_origin(task)) 319 if (oom_task_origin(task)) {
308 return OOM_SCAN_SELECT; 320 points = ULONG_MAX;
321 goto select;
322 }
309 323
310 return OOM_SCAN_OK; 324 points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
325 if (!points || points < oc->chosen_points)
326 goto next;
327
328 /* Prefer thread group leaders for display purposes */
329 if (points == oc->chosen_points && thread_group_leader(oc->chosen))
330 goto next;
331select:
332 if (oc->chosen)
333 put_task_struct(oc->chosen);
334 get_task_struct(task);
335 oc->chosen = task;
336 oc->chosen_points = points;
337next:
338 return 0;
339abort:
340 if (oc->chosen)
341 put_task_struct(oc->chosen);
342 oc->chosen = (void *)-1UL;
343 return 1;
311} 344}
312 345
313/* 346/*
314 * Simple selection loop. We chose the process with the highest 347 * Simple selection loop. We choose the process with the highest number of
315 * number of 'points'. Returns -1 on scan abort. 348 * 'points'. In case scan was aborted, oc->chosen is set to -1.
316 */ 349 */
317static struct task_struct *select_bad_process(struct oom_control *oc, 350static void select_bad_process(struct oom_control *oc)
318 unsigned int *ppoints, unsigned long totalpages)
319{ 351{
320 struct task_struct *p; 352 if (is_memcg_oom(oc))
321 struct task_struct *chosen = NULL; 353 mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
322 unsigned long chosen_points = 0; 354 else {
323 355 struct task_struct *p;
324 rcu_read_lock();
325 for_each_process(p) {
326 unsigned int points;
327
328 switch (oom_scan_process_thread(oc, p)) {
329 case OOM_SCAN_SELECT:
330 chosen = p;
331 chosen_points = ULONG_MAX;
332 /* fall through */
333 case OOM_SCAN_CONTINUE:
334 continue;
335 case OOM_SCAN_ABORT:
336 rcu_read_unlock();
337 return (struct task_struct *)(-1UL);
338 case OOM_SCAN_OK:
339 break;
340 };
341 points = oom_badness(p, NULL, oc->nodemask, totalpages);
342 if (!points || points < chosen_points)
343 continue;
344 356
345 chosen = p; 357 rcu_read_lock();
346 chosen_points = points; 358 for_each_process(p)
359 if (oom_evaluate_task(p, oc))
360 break;
361 rcu_read_unlock();
347 } 362 }
348 if (chosen)
349 get_task_struct(chosen);
350 rcu_read_unlock();
351 363
352 *ppoints = chosen_points * 1000 / totalpages; 364 oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
353 return chosen;
354} 365}
355 366
356/** 367/**
@@ -419,7 +430,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
419static atomic_t oom_victims = ATOMIC_INIT(0); 430static atomic_t oom_victims = ATOMIC_INIT(0);
420static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); 431static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
421 432
422bool oom_killer_disabled __read_mostly; 433static bool oom_killer_disabled __read_mostly;
423 434
424#define K(x) ((x) << (PAGE_SHIFT-10)) 435#define K(x) ((x) << (PAGE_SHIFT-10))
425 436
@@ -627,7 +638,7 @@ static int oom_reaper(void *unused)
627 return 0; 638 return 0;
628} 639}
629 640
630void wake_oom_reaper(struct task_struct *tsk) 641static void wake_oom_reaper(struct task_struct *tsk)
631{ 642{
632 if (!oom_reaper_th) 643 if (!oom_reaper_th)
633 return; 644 return;
@@ -656,7 +667,11 @@ static int __init oom_init(void)
656 return 0; 667 return 0;
657} 668}
658subsys_initcall(oom_init) 669subsys_initcall(oom_init)
659#endif 670#else
671static inline void wake_oom_reaper(struct task_struct *tsk)
672{
673}
674#endif /* CONFIG_MMU */
660 675
661/** 676/**
662 * mark_oom_victim - mark the given task as OOM victim 677 * mark_oom_victim - mark the given task as OOM victim
@@ -665,7 +680,7 @@ subsys_initcall(oom_init)
665 * Has to be called with oom_lock held and never after 680 * Has to be called with oom_lock held and never after
666 * oom has been disabled already. 681 * oom has been disabled already.
667 */ 682 */
668void mark_oom_victim(struct task_struct *tsk) 683static void mark_oom_victim(struct task_struct *tsk)
669{ 684{
670 WARN_ON(oom_killer_disabled); 685 WARN_ON(oom_killer_disabled);
671 /* OOM killer might race with memcg OOM */ 686 /* OOM killer might race with memcg OOM */
@@ -760,7 +775,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
760 * Caller has to make sure that task->mm is stable (hold task_lock or 775 * Caller has to make sure that task->mm is stable (hold task_lock or
761 * it operates on the current). 776 * it operates on the current).
762 */ 777 */
763bool task_will_free_mem(struct task_struct *task) 778static bool task_will_free_mem(struct task_struct *task)
764{ 779{
765 struct mm_struct *mm = task->mm; 780 struct mm_struct *mm = task->mm;
766 struct task_struct *p; 781 struct task_struct *p;
@@ -806,14 +821,10 @@ bool task_will_free_mem(struct task_struct *task)
806 return ret; 821 return ret;
807} 822}
808 823
809/* 824static void oom_kill_process(struct oom_control *oc, const char *message)
810 * Must be called while holding a reference to p, which will be released upon
811 * returning.
812 */
813void oom_kill_process(struct oom_control *oc, struct task_struct *p,
814 unsigned int points, unsigned long totalpages,
815 const char *message)
816{ 825{
826 struct task_struct *p = oc->chosen;
827 unsigned int points = oc->chosen_points;
817 struct task_struct *victim = p; 828 struct task_struct *victim = p;
818 struct task_struct *child; 829 struct task_struct *child;
819 struct task_struct *t; 830 struct task_struct *t;
@@ -860,7 +871,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
860 * oom_badness() returns 0 if the thread is unkillable 871 * oom_badness() returns 0 if the thread is unkillable
861 */ 872 */
862 child_points = oom_badness(child, 873 child_points = oom_badness(child,
863 oc->memcg, oc->nodemask, totalpages); 874 oc->memcg, oc->nodemask, oc->totalpages);
864 if (child_points > victim_points) { 875 if (child_points > victim_points) {
865 put_task_struct(victim); 876 put_task_struct(victim);
866 victim = child; 877 victim = child;
@@ -942,7 +953,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
942/* 953/*
943 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 954 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
944 */ 955 */
945void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) 956static void check_panic_on_oom(struct oom_control *oc,
957 enum oom_constraint constraint)
946{ 958{
947 if (likely(!sysctl_panic_on_oom)) 959 if (likely(!sysctl_panic_on_oom))
948 return; 960 return;
@@ -988,19 +1000,18 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
988 */ 1000 */
989bool out_of_memory(struct oom_control *oc) 1001bool out_of_memory(struct oom_control *oc)
990{ 1002{
991 struct task_struct *p;
992 unsigned long totalpages;
993 unsigned long freed = 0; 1003 unsigned long freed = 0;
994 unsigned int uninitialized_var(points);
995 enum oom_constraint constraint = CONSTRAINT_NONE; 1004 enum oom_constraint constraint = CONSTRAINT_NONE;
996 1005
997 if (oom_killer_disabled) 1006 if (oom_killer_disabled)
998 return false; 1007 return false;
999 1008
1000 blocking_notifier_call_chain(&oom_notify_list, 0, &freed); 1009 if (!is_memcg_oom(oc)) {
1001 if (freed > 0) 1010 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
1002 /* Got some memory back in the last second. */ 1011 if (freed > 0)
1003 return true; 1012 /* Got some memory back in the last second. */
1013 return true;
1014 }
1004 1015
1005 /* 1016 /*
1006 * If current has a pending SIGKILL or is exiting, then automatically 1017 * If current has a pending SIGKILL or is exiting, then automatically
@@ -1024,37 +1035,38 @@ bool out_of_memory(struct oom_control *oc)
1024 1035
1025 /* 1036 /*
1026 * Check if there were limitations on the allocation (only relevant for 1037 * Check if there were limitations on the allocation (only relevant for
1027 * NUMA) that may require different handling. 1038 * NUMA and memcg) that may require different handling.
1028 */ 1039 */
1029 constraint = constrained_alloc(oc, &totalpages); 1040 constraint = constrained_alloc(oc);
1030 if (constraint != CONSTRAINT_MEMORY_POLICY) 1041 if (constraint != CONSTRAINT_MEMORY_POLICY)
1031 oc->nodemask = NULL; 1042 oc->nodemask = NULL;
1032 check_panic_on_oom(oc, constraint); 1043 check_panic_on_oom(oc, constraint);
1033 1044
1034 if (sysctl_oom_kill_allocating_task && current->mm && 1045 if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
1035 !oom_unkillable_task(current, NULL, oc->nodemask) && 1046 current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
1036 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { 1047 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
1037 get_task_struct(current); 1048 get_task_struct(current);
1038 oom_kill_process(oc, current, 0, totalpages, 1049 oc->chosen = current;
1039 "Out of memory (oom_kill_allocating_task)"); 1050 oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
1040 return true; 1051 return true;
1041 } 1052 }
1042 1053
1043 p = select_bad_process(oc, &points, totalpages); 1054 select_bad_process(oc);
1044 /* Found nothing?!?! Either we hang forever, or we panic. */ 1055 /* Found nothing?!?! Either we hang forever, or we panic. */
1045 if (!p && !is_sysrq_oom(oc)) { 1056 if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
1046 dump_header(oc, NULL); 1057 dump_header(oc, NULL);
1047 panic("Out of memory and no killable processes...\n"); 1058 panic("Out of memory and no killable processes...\n");
1048 } 1059 }
1049 if (p && p != (void *)-1UL) { 1060 if (oc->chosen && oc->chosen != (void *)-1UL) {
1050 oom_kill_process(oc, p, points, totalpages, "Out of memory"); 1061 oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
1062 "Memory cgroup out of memory");
1051 /* 1063 /*
1052 * Give the killed process a good chance to exit before trying 1064 * Give the killed process a good chance to exit before trying
1053 * to allocate memory again. 1065 * to allocate memory again.
1054 */ 1066 */
1055 schedule_timeout_killable(1); 1067 schedule_timeout_killable(1);
1056 } 1068 }
1057 return true; 1069 return !!oc->chosen;
1058} 1070}
1059 1071
1060/* 1072/*