summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h15
-rw-r--r--include/linux/oom.h43
-rw-r--r--mm/memcontrol.c114
-rw-r--r--mm/oom_kill.c200
4 files changed, 167 insertions, 205 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5d8ca6e02e39..0710143723bc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -366,6 +366,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
366 struct mem_cgroup *, 366 struct mem_cgroup *,
367 struct mem_cgroup_reclaim_cookie *); 367 struct mem_cgroup_reclaim_cookie *);
368void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 368void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
369int mem_cgroup_scan_tasks(struct mem_cgroup *,
370 int (*)(struct task_struct *, void *), void *);
369 371
370static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 372static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
371{ 373{
@@ -446,6 +448,8 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
446 448
447void mem_cgroup_handle_over_high(void); 449void mem_cgroup_handle_over_high(void);
448 450
451unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg);
452
449void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 453void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
450 struct task_struct *p); 454 struct task_struct *p);
451 455
@@ -639,6 +643,12 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
639{ 643{
640} 644}
641 645
646static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
647 int (*fn)(struct task_struct *, void *), void *arg)
648{
649 return 0;
650}
651
642static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 652static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
643{ 653{
644 return 0; 654 return 0;
@@ -669,6 +679,11 @@ mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
669 return 0; 679 return 0;
670} 680}
671 681
682static inline unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
683{
684 return 0;
685}
686
672static inline void 687static inline void
673mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) 688mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
674{ 689{
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bc0457ee3a8..17946e5121b6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -34,23 +34,11 @@ struct oom_control {
34 * for display purposes. 34 * for display purposes.
35 */ 35 */
36 const int order; 36 const int order;
37};
38 37
39/* 38 /* Used by oom implementation, do not set */
40 * Types of limitations to the nodes from which allocations may occur 39 unsigned long totalpages;
41 */ 40 struct task_struct *chosen;
42enum oom_constraint { 41 unsigned long chosen_points;
43 CONSTRAINT_NONE,
44 CONSTRAINT_CPUSET,
45 CONSTRAINT_MEMORY_POLICY,
46 CONSTRAINT_MEMCG,
47};
48
49enum oom_scan_t {
50 OOM_SCAN_OK, /* scan thread and find its badness */
51 OOM_SCAN_CONTINUE, /* do not consider thread for oom kill */
52 OOM_SCAN_ABORT, /* abort the iteration and return */
53 OOM_SCAN_SELECT, /* always select this thread first */
54}; 42};
55 43
56extern struct mutex oom_lock; 44extern struct mutex oom_lock;
@@ -70,30 +58,10 @@ static inline bool oom_task_origin(const struct task_struct *p)
70 return p->signal->oom_flag_origin; 58 return p->signal->oom_flag_origin;
71} 59}
72 60
73extern void mark_oom_victim(struct task_struct *tsk);
74
75#ifdef CONFIG_MMU
76extern void wake_oom_reaper(struct task_struct *tsk);
77#else
78static inline void wake_oom_reaper(struct task_struct *tsk)
79{
80}
81#endif
82
83extern unsigned long oom_badness(struct task_struct *p, 61extern unsigned long oom_badness(struct task_struct *p,
84 struct mem_cgroup *memcg, const nodemask_t *nodemask, 62 struct mem_cgroup *memcg, const nodemask_t *nodemask,
85 unsigned long totalpages); 63 unsigned long totalpages);
86 64
87extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
88 unsigned int points, unsigned long totalpages,
89 const char *message);
90
91extern void check_panic_on_oom(struct oom_control *oc,
92 enum oom_constraint constraint);
93
94extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
95 struct task_struct *task);
96
97extern bool out_of_memory(struct oom_control *oc); 65extern bool out_of_memory(struct oom_control *oc);
98 66
99extern void exit_oom_victim(struct task_struct *tsk); 67extern void exit_oom_victim(struct task_struct *tsk);
@@ -101,14 +69,11 @@ extern void exit_oom_victim(struct task_struct *tsk);
101extern int register_oom_notifier(struct notifier_block *nb); 69extern int register_oom_notifier(struct notifier_block *nb);
102extern int unregister_oom_notifier(struct notifier_block *nb); 70extern int unregister_oom_notifier(struct notifier_block *nb);
103 71
104extern bool oom_killer_disabled;
105extern bool oom_killer_disable(void); 72extern bool oom_killer_disable(void);
106extern void oom_killer_enable(void); 73extern void oom_killer_enable(void);
107 74
108extern struct task_struct *find_lock_task_mm(struct task_struct *p); 75extern struct task_struct *find_lock_task_mm(struct task_struct *p);
109 76
110bool task_will_free_mem(struct task_struct *task);
111
112/* sysctls */ 77/* sysctls */
113extern int sysctl_oom_dump_tasks; 78extern int sysctl_oom_dump_tasks;
114extern int sysctl_oom_kill_allocating_task; 79extern int sysctl_oom_kill_allocating_task;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4be518d4e68a..48747ef5b88f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -921,6 +921,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
921 iter = mem_cgroup_iter(NULL, iter, NULL)) 921 iter = mem_cgroup_iter(NULL, iter, NULL))
922 922
923/** 923/**
924 * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
925 * @memcg: hierarchy root
926 * @fn: function to call for each task
927 * @arg: argument passed to @fn
928 *
929 * This function iterates over tasks attached to @memcg or to any of its
930 * descendants and calls @fn for each task. If @fn returns a non-zero
931 * value, the function breaks the iteration loop and returns the value.
932 * Otherwise, it will iterate over all tasks and return 0.
933 *
934 * This function must not be called for the root memory cgroup.
935 */
936int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
937 int (*fn)(struct task_struct *, void *), void *arg)
938{
939 struct mem_cgroup *iter;
940 int ret = 0;
941
942 BUG_ON(memcg == root_mem_cgroup);
943
944 for_each_mem_cgroup_tree(iter, memcg) {
945 struct css_task_iter it;
946 struct task_struct *task;
947
948 css_task_iter_start(&iter->css, &it);
949 while (!ret && (task = css_task_iter_next(&it)))
950 ret = fn(task, arg);
951 css_task_iter_end(&it);
952 if (ret) {
953 mem_cgroup_iter_break(memcg, iter);
954 break;
955 }
956 }
957 return ret;
958}
959
960/**
924 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page 961 * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
925 * @page: the page 962 * @page: the page
926 * @zone: zone of the page 963 * @zone: zone of the page
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1178/* 1215/*
1179 * Return the memory (and swap, if configured) limit for a memcg. 1216 * Return the memory (and swap, if configured) limit for a memcg.
1180 */ 1217 */
1181static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) 1218unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
1182{ 1219{
1183 unsigned long limit; 1220 unsigned long limit;
1184 1221
@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1205 .gfp_mask = gfp_mask, 1242 .gfp_mask = gfp_mask,
1206 .order = order, 1243 .order = order,
1207 }; 1244 };
1208 struct mem_cgroup *iter; 1245 bool ret;
1209 unsigned long chosen_points = 0;
1210 unsigned long totalpages;
1211 unsigned int points = 0;
1212 struct task_struct *chosen = NULL;
1213 1246
1214 mutex_lock(&oom_lock); 1247 mutex_lock(&oom_lock);
1215 1248 ret = out_of_memory(&oc);
1216 /*
1217 * If current has a pending SIGKILL or is exiting, then automatically
1218 * select it. The goal is to allow it to allocate so that it may
1219 * quickly exit and free its memory.
1220 */
1221 if (task_will_free_mem(current)) {
1222 mark_oom_victim(current);
1223 wake_oom_reaper(current);
1224 goto unlock;
1225 }
1226
1227 check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
1228 totalpages = mem_cgroup_get_limit(memcg) ? : 1;
1229 for_each_mem_cgroup_tree(iter, memcg) {
1230 struct css_task_iter it;
1231 struct task_struct *task;
1232
1233 css_task_iter_start(&iter->css, &it);
1234 while ((task = css_task_iter_next(&it))) {
1235 switch (oom_scan_process_thread(&oc, task)) {
1236 case OOM_SCAN_SELECT:
1237 if (chosen)
1238 put_task_struct(chosen);
1239 chosen = task;
1240 chosen_points = ULONG_MAX;
1241 get_task_struct(chosen);
1242 /* fall through */
1243 case OOM_SCAN_CONTINUE:
1244 continue;
1245 case OOM_SCAN_ABORT:
1246 css_task_iter_end(&it);
1247 mem_cgroup_iter_break(memcg, iter);
1248 if (chosen)
1249 put_task_struct(chosen);
1250 /* Set a dummy value to return "true". */
1251 chosen = (void *) 1;
1252 goto unlock;
1253 case OOM_SCAN_OK:
1254 break;
1255 };
1256 points = oom_badness(task, memcg, NULL, totalpages);
1257 if (!points || points < chosen_points)
1258 continue;
1259 /* Prefer thread group leaders for display purposes */
1260 if (points == chosen_points &&
1261 thread_group_leader(chosen))
1262 continue;
1263
1264 if (chosen)
1265 put_task_struct(chosen);
1266 chosen = task;
1267 chosen_points = points;
1268 get_task_struct(chosen);
1269 }
1270 css_task_iter_end(&it);
1271 }
1272
1273 if (chosen) {
1274 points = chosen_points * 1000 / totalpages;
1275 oom_kill_process(&oc, chosen, points, totalpages,
1276 "Memory cgroup out of memory");
1277 }
1278unlock:
1279 mutex_unlock(&oom_lock); 1249 mutex_unlock(&oom_lock);
1280 return chosen; 1250 return ret;
1281} 1251}
1282 1252
1283#if MAX_NUMNODES > 1 1253#if MAX_NUMNODES > 1
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
1600 if (!memcg) 1570 if (!memcg)
1601 return false; 1571 return false;
1602 1572
1603 if (!handle || oom_killer_disabled) 1573 if (!handle)
1604 goto cleanup; 1574 goto cleanup;
1605 1575
1606 owait.memcg = memcg; 1576 owait.memcg = memcg;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d53a9aa00977..ef175518f05f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -132,6 +132,11 @@ static inline bool is_sysrq_oom(struct oom_control *oc)
132 return oc->order == -1; 132 return oc->order == -1;
133} 133}
134 134
135static inline bool is_memcg_oom(struct oom_control *oc)
136{
137 return oc->memcg != NULL;
138}
139
135/* return true if the task is not adequate as candidate victim task. */ 140/* return true if the task is not adequate as candidate victim task. */
136static bool oom_unkillable_task(struct task_struct *p, 141static bool oom_unkillable_task(struct task_struct *p,
137 struct mem_cgroup *memcg, const nodemask_t *nodemask) 142 struct mem_cgroup *memcg, const nodemask_t *nodemask)
@@ -213,12 +218,17 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
213 return points > 0 ? points : 1; 218 return points > 0 ? points : 1;
214} 219}
215 220
221enum oom_constraint {
222 CONSTRAINT_NONE,
223 CONSTRAINT_CPUSET,
224 CONSTRAINT_MEMORY_POLICY,
225 CONSTRAINT_MEMCG,
226};
227
216/* 228/*
217 * Determine the type of allocation constraint. 229 * Determine the type of allocation constraint.
218 */ 230 */
219#ifdef CONFIG_NUMA 231static enum oom_constraint constrained_alloc(struct oom_control *oc)
220static enum oom_constraint constrained_alloc(struct oom_control *oc,
221 unsigned long *totalpages)
222{ 232{
223 struct zone *zone; 233 struct zone *zone;
224 struct zoneref *z; 234 struct zoneref *z;
@@ -226,8 +236,16 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
226 bool cpuset_limited = false; 236 bool cpuset_limited = false;
227 int nid; 237 int nid;
228 238
239 if (is_memcg_oom(oc)) {
240 oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1;
241 return CONSTRAINT_MEMCG;
242 }
243
229 /* Default to all available memory */ 244 /* Default to all available memory */
230 *totalpages = totalram_pages + total_swap_pages; 245 oc->totalpages = totalram_pages + total_swap_pages;
246
247 if (!IS_ENABLED(CONFIG_NUMA))
248 return CONSTRAINT_NONE;
231 249
232 if (!oc->zonelist) 250 if (!oc->zonelist)
233 return CONSTRAINT_NONE; 251 return CONSTRAINT_NONE;
@@ -246,9 +264,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
246 */ 264 */
247 if (oc->nodemask && 265 if (oc->nodemask &&
248 !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { 266 !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
249 *totalpages = total_swap_pages; 267 oc->totalpages = total_swap_pages;
250 for_each_node_mask(nid, *oc->nodemask) 268 for_each_node_mask(nid, *oc->nodemask)
251 *totalpages += node_spanned_pages(nid); 269 oc->totalpages += node_spanned_pages(nid);
252 return CONSTRAINT_MEMORY_POLICY; 270 return CONSTRAINT_MEMORY_POLICY;
253 } 271 }
254 272
@@ -259,27 +277,21 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc,
259 cpuset_limited = true; 277 cpuset_limited = true;
260 278
261 if (cpuset_limited) { 279 if (cpuset_limited) {
262 *totalpages = total_swap_pages; 280 oc->totalpages = total_swap_pages;
263 for_each_node_mask(nid, cpuset_current_mems_allowed) 281 for_each_node_mask(nid, cpuset_current_mems_allowed)
264 *totalpages += node_spanned_pages(nid); 282 oc->totalpages += node_spanned_pages(nid);
265 return CONSTRAINT_CPUSET; 283 return CONSTRAINT_CPUSET;
266 } 284 }
267 return CONSTRAINT_NONE; 285 return CONSTRAINT_NONE;
268} 286}
269#else
270static enum oom_constraint constrained_alloc(struct oom_control *oc,
271 unsigned long *totalpages)
272{
273 *totalpages = totalram_pages + total_swap_pages;
274 return CONSTRAINT_NONE;
275}
276#endif
277 287
278enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, 288static int oom_evaluate_task(struct task_struct *task, void *arg)
279 struct task_struct *task)
280{ 289{
290 struct oom_control *oc = arg;
291 unsigned long points;
292
281 if (oom_unkillable_task(task, NULL, oc->nodemask)) 293 if (oom_unkillable_task(task, NULL, oc->nodemask))
282 return OOM_SCAN_CONTINUE; 294 goto next;
283 295
284 /* 296 /*
285 * This task already has access to memory reserves and is being killed. 297 * This task already has access to memory reserves and is being killed.
@@ -289,68 +301,67 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc,
289 */ 301 */
290 if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { 302 if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
291 struct task_struct *p = find_lock_task_mm(task); 303 struct task_struct *p = find_lock_task_mm(task);
292 enum oom_scan_t ret = OOM_SCAN_ABORT; 304 bool reaped = false;
293 305
294 if (p) { 306 if (p) {
295 if (test_bit(MMF_OOM_REAPED, &p->mm->flags)) 307 reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags);
296 ret = OOM_SCAN_CONTINUE;
297 task_unlock(p); 308 task_unlock(p);
298 } 309 }
299 310 if (reaped)
300 return ret; 311 goto next;
312 goto abort;
301 } 313 }
302 314
303 /* 315 /*
304 * If task is allocating a lot of memory and has been marked to be 316 * If task is allocating a lot of memory and has been marked to be
305 * killed first if it triggers an oom, then select it. 317 * killed first if it triggers an oom, then select it.
306 */ 318 */
307 if (oom_task_origin(task)) 319 if (oom_task_origin(task)) {
308 return OOM_SCAN_SELECT; 320 points = ULONG_MAX;
321 goto select;
322 }
309 323
310 return OOM_SCAN_OK; 324 points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
325 if (!points || points < oc->chosen_points)
326 goto next;
327
328 /* Prefer thread group leaders for display purposes */
329 if (points == oc->chosen_points && thread_group_leader(oc->chosen))
330 goto next;
331select:
332 if (oc->chosen)
333 put_task_struct(oc->chosen);
334 get_task_struct(task);
335 oc->chosen = task;
336 oc->chosen_points = points;
337next:
338 return 0;
339abort:
340 if (oc->chosen)
341 put_task_struct(oc->chosen);
342 oc->chosen = (void *)-1UL;
343 return 1;
311} 344}
312 345
313/* 346/*
314 * Simple selection loop. We chose the process with the highest 347 * Simple selection loop. We choose the process with the highest number of
315 * number of 'points'. Returns -1 on scan abort. 348 * 'points'. In case scan was aborted, oc->chosen is set to -1.
316 */ 349 */
317static struct task_struct *select_bad_process(struct oom_control *oc, 350static void select_bad_process(struct oom_control *oc)
318 unsigned int *ppoints, unsigned long totalpages)
319{ 351{
320 struct task_struct *p; 352 if (is_memcg_oom(oc))
321 struct task_struct *chosen = NULL; 353 mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
322 unsigned long chosen_points = 0; 354 else {
323 355 struct task_struct *p;
324 rcu_read_lock();
325 for_each_process(p) {
326 unsigned int points;
327
328 switch (oom_scan_process_thread(oc, p)) {
329 case OOM_SCAN_SELECT:
330 chosen = p;
331 chosen_points = ULONG_MAX;
332 /* fall through */
333 case OOM_SCAN_CONTINUE:
334 continue;
335 case OOM_SCAN_ABORT:
336 rcu_read_unlock();
337 return (struct task_struct *)(-1UL);
338 case OOM_SCAN_OK:
339 break;
340 };
341 points = oom_badness(p, NULL, oc->nodemask, totalpages);
342 if (!points || points < chosen_points)
343 continue;
344 356
345 chosen = p; 357 rcu_read_lock();
346 chosen_points = points; 358 for_each_process(p)
359 if (oom_evaluate_task(p, oc))
360 break;
361 rcu_read_unlock();
347 } 362 }
348 if (chosen)
349 get_task_struct(chosen);
350 rcu_read_unlock();
351 363
352 *ppoints = chosen_points * 1000 / totalpages; 364 oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
353 return chosen;
354} 365}
355 366
356/** 367/**
@@ -419,7 +430,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
419static atomic_t oom_victims = ATOMIC_INIT(0); 430static atomic_t oom_victims = ATOMIC_INIT(0);
420static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); 431static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
421 432
422bool oom_killer_disabled __read_mostly; 433static bool oom_killer_disabled __read_mostly;
423 434
424#define K(x) ((x) << (PAGE_SHIFT-10)) 435#define K(x) ((x) << (PAGE_SHIFT-10))
425 436
@@ -627,7 +638,7 @@ static int oom_reaper(void *unused)
627 return 0; 638 return 0;
628} 639}
629 640
630void wake_oom_reaper(struct task_struct *tsk) 641static void wake_oom_reaper(struct task_struct *tsk)
631{ 642{
632 if (!oom_reaper_th) 643 if (!oom_reaper_th)
633 return; 644 return;
@@ -656,7 +667,11 @@ static int __init oom_init(void)
656 return 0; 667 return 0;
657} 668}
658subsys_initcall(oom_init) 669subsys_initcall(oom_init)
659#endif 670#else
671static inline void wake_oom_reaper(struct task_struct *tsk)
672{
673}
674#endif /* CONFIG_MMU */
660 675
661/** 676/**
662 * mark_oom_victim - mark the given task as OOM victim 677 * mark_oom_victim - mark the given task as OOM victim
@@ -665,7 +680,7 @@ subsys_initcall(oom_init)
665 * Has to be called with oom_lock held and never after 680 * Has to be called with oom_lock held and never after
666 * oom has been disabled already. 681 * oom has been disabled already.
667 */ 682 */
668void mark_oom_victim(struct task_struct *tsk) 683static void mark_oom_victim(struct task_struct *tsk)
669{ 684{
670 WARN_ON(oom_killer_disabled); 685 WARN_ON(oom_killer_disabled);
671 /* OOM killer might race with memcg OOM */ 686 /* OOM killer might race with memcg OOM */
@@ -760,7 +775,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
760 * Caller has to make sure that task->mm is stable (hold task_lock or 775 * Caller has to make sure that task->mm is stable (hold task_lock or
761 * it operates on the current). 776 * it operates on the current).
762 */ 777 */
763bool task_will_free_mem(struct task_struct *task) 778static bool task_will_free_mem(struct task_struct *task)
764{ 779{
765 struct mm_struct *mm = task->mm; 780 struct mm_struct *mm = task->mm;
766 struct task_struct *p; 781 struct task_struct *p;
@@ -806,14 +821,10 @@ bool task_will_free_mem(struct task_struct *task)
806 return ret; 821 return ret;
807} 822}
808 823
809/* 824static void oom_kill_process(struct oom_control *oc, const char *message)
810 * Must be called while holding a reference to p, which will be released upon
811 * returning.
812 */
813void oom_kill_process(struct oom_control *oc, struct task_struct *p,
814 unsigned int points, unsigned long totalpages,
815 const char *message)
816{ 825{
826 struct task_struct *p = oc->chosen;
827 unsigned int points = oc->chosen_points;
817 struct task_struct *victim = p; 828 struct task_struct *victim = p;
818 struct task_struct *child; 829 struct task_struct *child;
819 struct task_struct *t; 830 struct task_struct *t;
@@ -860,7 +871,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
860 * oom_badness() returns 0 if the thread is unkillable 871 * oom_badness() returns 0 if the thread is unkillable
861 */ 872 */
862 child_points = oom_badness(child, 873 child_points = oom_badness(child,
863 oc->memcg, oc->nodemask, totalpages); 874 oc->memcg, oc->nodemask, oc->totalpages);
864 if (child_points > victim_points) { 875 if (child_points > victim_points) {
865 put_task_struct(victim); 876 put_task_struct(victim);
866 victim = child; 877 victim = child;
@@ -942,7 +953,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
942/* 953/*
943 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 954 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
944 */ 955 */
945void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) 956static void check_panic_on_oom(struct oom_control *oc,
957 enum oom_constraint constraint)
946{ 958{
947 if (likely(!sysctl_panic_on_oom)) 959 if (likely(!sysctl_panic_on_oom))
948 return; 960 return;
@@ -988,19 +1000,18 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
988 */ 1000 */
989bool out_of_memory(struct oom_control *oc) 1001bool out_of_memory(struct oom_control *oc)
990{ 1002{
991 struct task_struct *p;
992 unsigned long totalpages;
993 unsigned long freed = 0; 1003 unsigned long freed = 0;
994 unsigned int uninitialized_var(points);
995 enum oom_constraint constraint = CONSTRAINT_NONE; 1004 enum oom_constraint constraint = CONSTRAINT_NONE;
996 1005
997 if (oom_killer_disabled) 1006 if (oom_killer_disabled)
998 return false; 1007 return false;
999 1008
1000 blocking_notifier_call_chain(&oom_notify_list, 0, &freed); 1009 if (!is_memcg_oom(oc)) {
1001 if (freed > 0) 1010 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
1002 /* Got some memory back in the last second. */ 1011 if (freed > 0)
1003 return true; 1012 /* Got some memory back in the last second. */
1013 return true;
1014 }
1004 1015
1005 /* 1016 /*
1006 * If current has a pending SIGKILL or is exiting, then automatically 1017 * If current has a pending SIGKILL or is exiting, then automatically
@@ -1024,37 +1035,38 @@ bool out_of_memory(struct oom_control *oc)
1024 1035
1025 /* 1036 /*
1026 * Check if there were limitations on the allocation (only relevant for 1037 * Check if there were limitations on the allocation (only relevant for
1027 * NUMA) that may require different handling. 1038 * NUMA and memcg) that may require different handling.
1028 */ 1039 */
1029 constraint = constrained_alloc(oc, &totalpages); 1040 constraint = constrained_alloc(oc);
1030 if (constraint != CONSTRAINT_MEMORY_POLICY) 1041 if (constraint != CONSTRAINT_MEMORY_POLICY)
1031 oc->nodemask = NULL; 1042 oc->nodemask = NULL;
1032 check_panic_on_oom(oc, constraint); 1043 check_panic_on_oom(oc, constraint);
1033 1044
1034 if (sysctl_oom_kill_allocating_task && current->mm && 1045 if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
1035 !oom_unkillable_task(current, NULL, oc->nodemask) && 1046 current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
1036 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { 1047 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
1037 get_task_struct(current); 1048 get_task_struct(current);
1038 oom_kill_process(oc, current, 0, totalpages, 1049 oc->chosen = current;
1039 "Out of memory (oom_kill_allocating_task)"); 1050 oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
1040 return true; 1051 return true;
1041 } 1052 }
1042 1053
1043 p = select_bad_process(oc, &points, totalpages); 1054 select_bad_process(oc);
1044 /* Found nothing?!?! Either we hang forever, or we panic. */ 1055 /* Found nothing?!?! Either we hang forever, or we panic. */
1045 if (!p && !is_sysrq_oom(oc)) { 1056 if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
1046 dump_header(oc, NULL); 1057 dump_header(oc, NULL);
1047 panic("Out of memory and no killable processes...\n"); 1058 panic("Out of memory and no killable processes...\n");
1048 } 1059 }
1049 if (p && p != (void *)-1UL) { 1060 if (oc->chosen && oc->chosen != (void *)-1UL) {
1050 oom_kill_process(oc, p, points, totalpages, "Out of memory"); 1061 oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
1062 "Memory cgroup out of memory");
1051 /* 1063 /*
1052 * Give the killed process a good chance to exit before trying 1064 * Give the killed process a good chance to exit before trying
1053 * to allocate memory again. 1065 * to allocate memory again.
1054 */ 1066 */
1055 schedule_timeout_killable(1); 1067 schedule_timeout_killable(1);
1056 } 1068 }
1057 return true; 1069 return !!oc->chosen;
1058} 1070}
1059 1071
1060/* 1072/*