diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 89 |
1 files changed, 42 insertions, 47 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7dcca55ede7c..83fb72c108b7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/mempolicy.h> | 32 | #include <linux/mempolicy.h> |
33 | #include <linux/security.h> | 33 | #include <linux/security.h> |
34 | #include <linux/ptrace.h> | ||
34 | 35 | ||
35 | int sysctl_panic_on_oom; | 36 | int sysctl_panic_on_oom; |
36 | int sysctl_oom_kill_allocating_task; | 37 | int sysctl_oom_kill_allocating_task; |
@@ -83,24 +84,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk, | |||
83 | #endif /* CONFIG_NUMA */ | 84 | #endif /* CONFIG_NUMA */ |
84 | 85 | ||
85 | /* | 86 | /* |
86 | * If this is a system OOM (not a memcg OOM) and the task selected to be | ||
87 | * killed is not already running at high (RT) priorities, speed up the | ||
88 | * recovery by boosting the dying task to the lowest FIFO priority. | ||
89 | * That helps with the recovery and avoids interfering with RT tasks. | ||
90 | */ | ||
91 | static void boost_dying_task_prio(struct task_struct *p, | ||
92 | struct mem_cgroup *mem) | ||
93 | { | ||
94 | struct sched_param param = { .sched_priority = 1 }; | ||
95 | |||
96 | if (mem) | ||
97 | return; | ||
98 | |||
99 | if (!rt_task(p)) | ||
100 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * The process p may have detached its own ->mm while exiting or through | 87 | * The process p may have detached its own ->mm while exiting or through |
105 | * use_mm(), but one or more of its subthreads may still have a valid | 88 | * use_mm(), but one or more of its subthreads may still have a valid |
106 | * pointer. Return p, or any of its subthreads with a valid ->mm, with | 89 | * pointer. Return p, or any of its subthreads with a valid ->mm, with |
@@ -292,13 +275,15 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
292 | unsigned long totalpages, struct mem_cgroup *mem, | 275 | unsigned long totalpages, struct mem_cgroup *mem, |
293 | const nodemask_t *nodemask) | 276 | const nodemask_t *nodemask) |
294 | { | 277 | { |
295 | struct task_struct *p; | 278 | struct task_struct *g, *p; |
296 | struct task_struct *chosen = NULL; | 279 | struct task_struct *chosen = NULL; |
297 | *ppoints = 0; | 280 | *ppoints = 0; |
298 | 281 | ||
299 | for_each_process(p) { | 282 | do_each_thread(g, p) { |
300 | unsigned int points; | 283 | unsigned int points; |
301 | 284 | ||
285 | if (!p->mm) | ||
286 | continue; | ||
302 | if (oom_unkillable_task(p, mem, nodemask)) | 287 | if (oom_unkillable_task(p, mem, nodemask)) |
303 | continue; | 288 | continue; |
304 | 289 | ||
@@ -314,22 +299,29 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
314 | if (test_tsk_thread_flag(p, TIF_MEMDIE)) | 299 | if (test_tsk_thread_flag(p, TIF_MEMDIE)) |
315 | return ERR_PTR(-1UL); | 300 | return ERR_PTR(-1UL); |
316 | 301 | ||
317 | /* | 302 | if (p->flags & PF_EXITING) { |
318 | * This is in the process of releasing memory so wait for it | 303 | /* |
319 | * to finish before killing some other task by mistake. | 304 | * If p is the current task and is in the process of |
320 | * | 305 | * releasing memory, we allow the "kill" to set |
321 | * However, if p is the current task, we allow the 'kill' to | 306 | * TIF_MEMDIE, which will allow it to gain access to |
322 | * go ahead if it is exiting: this will simply set TIF_MEMDIE, | 307 | * memory reserves. Otherwise, it may stall forever. |
323 | * which will allow it to gain access to memory reserves in | 308 | * |
324 | * the process of exiting and releasing its resources. | 309 | * The loop isn't broken here, however, in case other |
325 | * Otherwise we could get an easy OOM deadlock. | 310 | * threads are found to have already been oom killed. |
326 | */ | 311 | */ |
327 | if (thread_group_empty(p) && (p->flags & PF_EXITING) && p->mm) { | 312 | if (p == current) { |
328 | if (p != current) | 313 | chosen = p; |
329 | return ERR_PTR(-1UL); | 314 | *ppoints = 1000; |
330 | 315 | } else { | |
331 | chosen = p; | 316 | /* |
332 | *ppoints = 1000; | 317 | * If this task is not being ptraced on exit, |
318 | * then wait for it to finish before killing | ||
319 | * some other task unnecessarily. | ||
320 | */ | ||
321 | if (!(task_ptrace(p->group_leader) & | ||
322 | PT_TRACE_EXIT)) | ||
323 | return ERR_PTR(-1UL); | ||
324 | } | ||
333 | } | 325 | } |
334 | 326 | ||
335 | points = oom_badness(p, mem, nodemask, totalpages); | 327 | points = oom_badness(p, mem, nodemask, totalpages); |
@@ -337,7 +329,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
337 | chosen = p; | 329 | chosen = p; |
338 | *ppoints = points; | 330 | *ppoints = points; |
339 | } | 331 | } |
340 | } | 332 | } while_each_thread(g, p); |
341 | 333 | ||
342 | return chosen; | 334 | return chosen; |
343 | } | 335 | } |
@@ -396,7 +388,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
396 | task_unlock(current); | 388 | task_unlock(current); |
397 | dump_stack(); | 389 | dump_stack(); |
398 | mem_cgroup_print_oom_info(mem, p); | 390 | mem_cgroup_print_oom_info(mem, p); |
399 | show_mem(); | 391 | show_mem(SHOW_MEM_FILTER_NODES); |
400 | if (sysctl_oom_dump_tasks) | 392 | if (sysctl_oom_dump_tasks) |
401 | dump_tasks(mem, nodemask); | 393 | dump_tasks(mem, nodemask); |
402 | } | 394 | } |
@@ -442,13 +434,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) | |||
442 | set_tsk_thread_flag(p, TIF_MEMDIE); | 434 | set_tsk_thread_flag(p, TIF_MEMDIE); |
443 | force_sig(SIGKILL, p); | 435 | force_sig(SIGKILL, p); |
444 | 436 | ||
445 | /* | ||
446 | * We give our sacrificial lamb high priority and access to | ||
447 | * all the memory it needs. That way it should be able to | ||
448 | * exit() and clear out its resources quickly... | ||
449 | */ | ||
450 | boost_dying_task_prio(p, mem); | ||
451 | |||
452 | return 0; | 437 | return 0; |
453 | } | 438 | } |
454 | #undef K | 439 | #undef K |
@@ -472,7 +457,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
472 | */ | 457 | */ |
473 | if (p->flags & PF_EXITING) { | 458 | if (p->flags & PF_EXITING) { |
474 | set_tsk_thread_flag(p, TIF_MEMDIE); | 459 | set_tsk_thread_flag(p, TIF_MEMDIE); |
475 | boost_dying_task_prio(p, mem); | ||
476 | return 0; | 460 | return 0; |
477 | } | 461 | } |
478 | 462 | ||
@@ -491,6 +475,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
491 | list_for_each_entry(child, &t->children, sibling) { | 475 | list_for_each_entry(child, &t->children, sibling) { |
492 | unsigned int child_points; | 476 | unsigned int child_points; |
493 | 477 | ||
478 | if (child->mm == p->mm) | ||
479 | continue; | ||
494 | /* | 480 | /* |
495 | * oom_badness() returns 0 if the thread is unkillable | 481 | * oom_badness() returns 0 if the thread is unkillable |
496 | */ | 482 | */ |
@@ -537,6 +523,16 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | |||
537 | unsigned int points = 0; | 523 | unsigned int points = 0; |
538 | struct task_struct *p; | 524 | struct task_struct *p; |
539 | 525 | ||
526 | /* | ||
527 | * If current has a pending SIGKILL, then automatically select it. The | ||
528 | * goal is to allow it to allocate so that it may quickly exit and free | ||
529 | * its memory. | ||
530 | */ | ||
531 | if (fatal_signal_pending(current)) { | ||
532 | set_thread_flag(TIF_MEMDIE); | ||
533 | return; | ||
534 | } | ||
535 | |||
540 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); | 536 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); |
541 | limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; | 537 | limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; |
542 | read_lock(&tasklist_lock); | 538 | read_lock(&tasklist_lock); |
@@ -689,7 +685,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
689 | */ | 685 | */ |
690 | if (fatal_signal_pending(current)) { | 686 | if (fatal_signal_pending(current)) { |
691 | set_thread_flag(TIF_MEMDIE); | 687 | set_thread_flag(TIF_MEMDIE); |
692 | boost_dying_task_prio(current, NULL); | ||
693 | return; | 688 | return; |
694 | } | 689 | } |
695 | 690 | ||