diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 116 |
1 files changed, 76 insertions, 40 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ea2147dabba6..b68e802a7a7d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/oom.h> | 18 | #include <linux/oom.h> |
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/err.h> | 20 | #include <linux/err.h> |
21 | #include <linux/gfp.h> | ||
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
22 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
23 | #include <linux/timex.h> | 24 | #include <linux/timex.h> |
@@ -196,27 +197,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
196 | /* | 197 | /* |
197 | * Determine the type of allocation constraint. | 198 | * Determine the type of allocation constraint. |
198 | */ | 199 | */ |
199 | static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
200 | gfp_t gfp_mask) | ||
201 | { | ||
202 | #ifdef CONFIG_NUMA | 200 | #ifdef CONFIG_NUMA |
201 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
202 | gfp_t gfp_mask, nodemask_t *nodemask) | ||
203 | { | ||
203 | struct zone *zone; | 204 | struct zone *zone; |
204 | struct zoneref *z; | 205 | struct zoneref *z; |
205 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 206 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
206 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; | ||
207 | 207 | ||
208 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 208 | /* |
209 | if (cpuset_zone_allowed_softwall(zone, gfp_mask)) | 209 | * Reach here only when __GFP_NOFAIL is used. So, we should avoid |
210 | node_clear(zone_to_nid(zone), nodes); | 210 | * to kill current.We have to random task kill in this case. |
211 | else | 211 | * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. |
212 | return CONSTRAINT_CPUSET; | 212 | */ |
213 | if (gfp_mask & __GFP_THISNODE) | ||
214 | return CONSTRAINT_NONE; | ||
213 | 215 | ||
214 | if (!nodes_empty(nodes)) | 216 | /* |
217 | * The nodemask here is a nodemask passed to alloc_pages(). Now, | ||
218 | * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy | ||
219 | * feature. mempolicy is an only user of nodemask here. | ||
220 | * check mempolicy's nodemask contains all N_HIGH_MEMORY | ||
221 | */ | ||
222 | if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) | ||
215 | return CONSTRAINT_MEMORY_POLICY; | 223 | return CONSTRAINT_MEMORY_POLICY; |
216 | #endif | 224 | |
225 | /* Check this allocation failure is caused by cpuset's wall function */ | ||
226 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | ||
227 | high_zoneidx, nodemask) | ||
228 | if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) | ||
229 | return CONSTRAINT_CPUSET; | ||
217 | 230 | ||
218 | return CONSTRAINT_NONE; | 231 | return CONSTRAINT_NONE; |
219 | } | 232 | } |
233 | #else | ||
234 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
235 | gfp_t gfp_mask, nodemask_t *nodemask) | ||
236 | { | ||
237 | return CONSTRAINT_NONE; | ||
238 | } | ||
239 | #endif | ||
220 | 240 | ||
221 | /* | 241 | /* |
222 | * Simple selection loop. We chose the process with the highest | 242 | * Simple selection loop. We chose the process with the highest |
@@ -337,6 +357,24 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
337 | } while_each_thread(g, p); | 357 | } while_each_thread(g, p); |
338 | } | 358 | } |
339 | 359 | ||
360 | static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | ||
361 | struct mem_cgroup *mem) | ||
362 | { | ||
363 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " | ||
364 | "oom_adj=%d\n", | ||
365 | current->comm, gfp_mask, order, current->signal->oom_adj); | ||
366 | task_lock(current); | ||
367 | cpuset_print_task_mems_allowed(current); | ||
368 | task_unlock(current); | ||
369 | dump_stack(); | ||
370 | mem_cgroup_print_oom_info(mem, p); | ||
371 | show_mem(); | ||
372 | if (sysctl_oom_dump_tasks) | ||
373 | dump_tasks(mem); | ||
374 | } | ||
375 | |||
376 | #define K(x) ((x) << (PAGE_SHIFT-10)) | ||
377 | |||
340 | /* | 378 | /* |
341 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO | 379 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO |
342 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO | 380 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO |
@@ -350,15 +388,23 @@ static void __oom_kill_task(struct task_struct *p, int verbose) | |||
350 | return; | 388 | return; |
351 | } | 389 | } |
352 | 390 | ||
391 | task_lock(p); | ||
353 | if (!p->mm) { | 392 | if (!p->mm) { |
354 | WARN_ON(1); | 393 | WARN_ON(1); |
355 | printk(KERN_WARNING "tried to kill an mm-less task!\n"); | 394 | printk(KERN_WARNING "tried to kill an mm-less task %d (%s)!\n", |
395 | task_pid_nr(p), p->comm); | ||
396 | task_unlock(p); | ||
356 | return; | 397 | return; |
357 | } | 398 | } |
358 | 399 | ||
359 | if (verbose) | 400 | if (verbose) |
360 | printk(KERN_ERR "Killed process %d (%s)\n", | 401 | printk(KERN_ERR "Killed process %d (%s) " |
361 | task_pid_nr(p), p->comm); | 402 | "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n", |
403 | task_pid_nr(p), p->comm, | ||
404 | K(p->mm->total_vm), | ||
405 | K(get_mm_counter(p->mm, MM_ANONPAGES)), | ||
406 | K(get_mm_counter(p->mm, MM_FILEPAGES))); | ||
407 | task_unlock(p); | ||
362 | 408 | ||
363 | /* | 409 | /* |
364 | * We give our sacrificial lamb high priority and access to | 410 | * We give our sacrificial lamb high priority and access to |
@@ -395,20 +441,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
395 | { | 441 | { |
396 | struct task_struct *c; | 442 | struct task_struct *c; |
397 | 443 | ||
398 | if (printk_ratelimit()) { | 444 | if (printk_ratelimit()) |
399 | printk(KERN_WARNING "%s invoked oom-killer: " | 445 | dump_header(p, gfp_mask, order, mem); |
400 | "gfp_mask=0x%x, order=%d, oom_adj=%d\n", | ||
401 | current->comm, gfp_mask, order, | ||
402 | current->signal->oom_adj); | ||
403 | task_lock(current); | ||
404 | cpuset_print_task_mems_allowed(current); | ||
405 | task_unlock(current); | ||
406 | dump_stack(); | ||
407 | mem_cgroup_print_oom_info(mem, current); | ||
408 | show_mem(); | ||
409 | if (sysctl_oom_dump_tasks) | ||
410 | dump_tasks(mem); | ||
411 | } | ||
412 | 446 | ||
413 | /* | 447 | /* |
414 | * If the task is already exiting, don't alarm the sysadmin or kill | 448 | * If the task is already exiting, don't alarm the sysadmin or kill |
@@ -426,6 +460,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
426 | list_for_each_entry(c, &p->children, sibling) { | 460 | list_for_each_entry(c, &p->children, sibling) { |
427 | if (c->mm == p->mm) | 461 | if (c->mm == p->mm) |
428 | continue; | 462 | continue; |
463 | if (mem && !task_in_mem_cgroup(c, mem)) | ||
464 | continue; | ||
429 | if (!oom_kill_task(c)) | 465 | if (!oom_kill_task(c)) |
430 | return 0; | 466 | return 0; |
431 | } | 467 | } |
@@ -438,6 +474,8 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | |||
438 | unsigned long points = 0; | 474 | unsigned long points = 0; |
439 | struct task_struct *p; | 475 | struct task_struct *p; |
440 | 476 | ||
477 | if (sysctl_panic_on_oom == 2) | ||
478 | panic("out of memory(memcg). panic_on_oom is selected.\n"); | ||
441 | read_lock(&tasklist_lock); | 479 | read_lock(&tasklist_lock); |
442 | retry: | 480 | retry: |
443 | p = select_bad_process(&points, mem); | 481 | p = select_bad_process(&points, mem); |
@@ -544,6 +582,7 @@ retry: | |||
544 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 582 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
545 | if (!p) { | 583 | if (!p) { |
546 | read_unlock(&tasklist_lock); | 584 | read_unlock(&tasklist_lock); |
585 | dump_header(NULL, gfp_mask, order, NULL); | ||
547 | panic("Out of memory and no killable processes...\n"); | 586 | panic("Out of memory and no killable processes...\n"); |
548 | } | 587 | } |
549 | 588 | ||
@@ -565,13 +604,6 @@ void pagefault_out_of_memory(void) | |||
565 | /* Got some memory back in the last second. */ | 604 | /* Got some memory back in the last second. */ |
566 | return; | 605 | return; |
567 | 606 | ||
568 | /* | ||
569 | * If this is from memcg, oom-killer is already invoked. | ||
570 | * and not worth to go system-wide-oom. | ||
571 | */ | ||
572 | if (mem_cgroup_oom_called(current)) | ||
573 | goto rest_and_return; | ||
574 | |||
575 | if (sysctl_panic_on_oom) | 607 | if (sysctl_panic_on_oom) |
576 | panic("out of memory from page fault. panic_on_oom is selected.\n"); | 608 | panic("out of memory from page fault. panic_on_oom is selected.\n"); |
577 | 609 | ||
@@ -583,7 +615,6 @@ void pagefault_out_of_memory(void) | |||
583 | * Give "p" a good chance of killing itself before we | 615 | * Give "p" a good chance of killing itself before we |
584 | * retry to allocate memory. | 616 | * retry to allocate memory. |
585 | */ | 617 | */ |
586 | rest_and_return: | ||
587 | if (!test_thread_flag(TIF_MEMDIE)) | 618 | if (!test_thread_flag(TIF_MEMDIE)) |
588 | schedule_timeout_uninterruptible(1); | 619 | schedule_timeout_uninterruptible(1); |
589 | } | 620 | } |
@@ -599,7 +630,8 @@ rest_and_return: | |||
599 | * OR try to be smart about which process to kill. Note that we | 630 | * OR try to be smart about which process to kill. Note that we |
600 | * don't have to be perfect here, we just have to be good. | 631 | * don't have to be perfect here, we just have to be good. |
601 | */ | 632 | */ |
602 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | 633 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
634 | int order, nodemask_t *nodemask) | ||
603 | { | 635 | { |
604 | unsigned long freed = 0; | 636 | unsigned long freed = 0; |
605 | enum oom_constraint constraint; | 637 | enum oom_constraint constraint; |
@@ -609,14 +641,16 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
609 | /* Got some memory back in the last second. */ | 641 | /* Got some memory back in the last second. */ |
610 | return; | 642 | return; |
611 | 643 | ||
612 | if (sysctl_panic_on_oom == 2) | 644 | if (sysctl_panic_on_oom == 2) { |
645 | dump_header(NULL, gfp_mask, order, NULL); | ||
613 | panic("out of memory. Compulsory panic_on_oom is selected.\n"); | 646 | panic("out of memory. Compulsory panic_on_oom is selected.\n"); |
647 | } | ||
614 | 648 | ||
615 | /* | 649 | /* |
616 | * Check if there were limitations on the allocation (only relevant for | 650 | * Check if there were limitations on the allocation (only relevant for |
617 | * NUMA) that may require different handling. | 651 | * NUMA) that may require different handling. |
618 | */ | 652 | */ |
619 | constraint = constrained_alloc(zonelist, gfp_mask); | 653 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask); |
620 | read_lock(&tasklist_lock); | 654 | read_lock(&tasklist_lock); |
621 | 655 | ||
622 | switch (constraint) { | 656 | switch (constraint) { |
@@ -626,8 +660,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
626 | break; | 660 | break; |
627 | 661 | ||
628 | case CONSTRAINT_NONE: | 662 | case CONSTRAINT_NONE: |
629 | if (sysctl_panic_on_oom) | 663 | if (sysctl_panic_on_oom) { |
664 | dump_header(NULL, gfp_mask, order, NULL); | ||
630 | panic("out of memory. panic_on_oom is selected\n"); | 665 | panic("out of memory. panic_on_oom is selected\n"); |
666 | } | ||
631 | /* Fall-through */ | 667 | /* Fall-through */ |
632 | case CONSTRAINT_CPUSET: | 668 | case CONSTRAINT_CPUSET: |
633 | __out_of_memory(gfp_mask, order); | 669 | __out_of_memory(gfp_mask, order); |