aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2009-01-06 17:38:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:58:58 -0500
commit1c0fe6e3bda0464728c23c8d84aa47567e8b716c (patch)
tree64f7903ee7757b14464e8a06bf91f5c4d5a8ba56
parent5bd1455c239672081d0e7f086e899b8cbc7a9844 (diff)
mm: invoke oom-killer from page fault
Rather than have the pagefault handler kill a process directly if it gets a VM_FAULT_OOM, have it call into the OOM killer. With increasingly sophisticated oom behaviour (cpusets, memory cgroups, oom killing throttling, oom priority adjustment or selective disabling, panic on oom, etc), it's silly to unconditionally kill the faulting process at page fault time. Create a hook for pagefault oom path to call into instead. Only converted x86 and uml so far. [akpm@linux-foundation.org: make __out_of_memory() static] [akpm@linux-foundation.org: fix comment] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Jeff Dike <jdike@addtoit.com> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/um/kernel/trap.c24
-rw-r--r--arch/x86/mm/fault.c24
-rw-r--r--include/linux/mm.h5
-rw-r--r--mm/oom_kill.c94
4 files changed, 84 insertions, 63 deletions
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 44e490419495..7384d8accfe7 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -64,11 +64,10 @@ good_area:
64 64
65 do { 65 do {
66 int fault; 66 int fault;
67survive: 67
68 fault = handle_mm_fault(mm, vma, address, is_write); 68 fault = handle_mm_fault(mm, vma, address, is_write);
69 if (unlikely(fault & VM_FAULT_ERROR)) { 69 if (unlikely(fault & VM_FAULT_ERROR)) {
70 if (fault & VM_FAULT_OOM) { 70 if (fault & VM_FAULT_OOM) {
71 err = -ENOMEM;
72 goto out_of_memory; 71 goto out_of_memory;
73 } else if (fault & VM_FAULT_SIGBUS) { 72 } else if (fault & VM_FAULT_SIGBUS) {
74 err = -EACCES; 73 err = -EACCES;
@@ -104,18 +103,14 @@ out:
104out_nosemaphore: 103out_nosemaphore:
105 return err; 104 return err;
106 105
107/*
108 * We ran out of memory, or some other thing happened to us that made
109 * us unable to handle the page fault gracefully.
110 */
111out_of_memory: 106out_of_memory:
112 if (is_global_init(current)) { 107 /*
113 up_read(&mm->mmap_sem); 108 * We ran out of memory, call the OOM killer, and return the userspace
114 yield(); 109 * (which will retry the fault, or kill us if we got oom-killed).
115 down_read(&mm->mmap_sem); 110 */
116 goto survive; 111 up_read(&mm->mmap_sem);
117 } 112 pagefault_out_of_memory();
118 goto out; 113 return 0;
119} 114}
120 115
121static void bad_segv(struct faultinfo fi, unsigned long ip) 116static void bad_segv(struct faultinfo fi, unsigned long ip)
@@ -214,9 +209,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
214 si.si_addr = (void __user *)address; 209 si.si_addr = (void __user *)address;
215 current->thread.arch.faultinfo = fi; 210 current->thread.arch.faultinfo = fi;
216 force_sig_info(SIGBUS, &si, current); 211 force_sig_info(SIGBUS, &si, current);
217 } else if (err == -ENOMEM) {
218 printk(KERN_INFO "VM: killing process %s\n", current->comm);
219 do_exit(SIGKILL);
220 } else { 212 } else {
221 BUG_ON(err != -EFAULT); 213 BUG_ON(err != -EFAULT);
222 si.si_signo = SIGSEGV; 214 si.si_signo = SIGSEGV;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 57ec8c86a877..9e268b6b204e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
667 if (unlikely(in_atomic() || !mm)) 667 if (unlikely(in_atomic() || !mm))
668 goto bad_area_nosemaphore; 668 goto bad_area_nosemaphore;
669 669
670again:
671 /* 670 /*
672 * When running in the kernel we expect faults to occur only to 671 * When running in the kernel we expect faults to occur only to
673 * addresses in user space. All other faults represent errors in the 672 * addresses in user space. All other faults represent errors in the
@@ -859,25 +858,14 @@ no_context:
859 oops_end(flags, regs, sig); 858 oops_end(flags, regs, sig);
860#endif 859#endif
861 860
862/*
863 * We ran out of memory, or some other thing happened to us that made
864 * us unable to handle the page fault gracefully.
865 */
866out_of_memory: 861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
867 up_read(&mm->mmap_sem); 866 up_read(&mm->mmap_sem);
868 if (is_global_init(tsk)) { 867 pagefault_out_of_memory();
869 yield(); 868 return;
870 /*
871 * Re-lookup the vma - in theory the vma tree might
872 * have changed:
873 */
874 goto again;
875 }
876
877 printk("VM: killing process %s\n", tsk->comm);
878 if (error_code & PF_USER)
879 do_group_exit(SIGKILL);
880 goto no_context;
881 869
882do_sigbus: 870do_sigbus:
883 up_read(&mm->mmap_sem); 871 up_read(&mm->mmap_sem);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aaa8b843be28..4a3d28c86443 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page)
717 717
718#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) 718#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
719 719
720/*
721 * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
722 */
723extern void pagefault_out_of_memory(void);
724
720#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 725#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
721 726
722extern void show_free_areas(void); 727extern void show_free_areas(void);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 558f9afe6e4e..c592965dab2f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -509,6 +509,69 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
509 spin_unlock(&zone_scan_mutex); 509 spin_unlock(&zone_scan_mutex);
510} 510}
511 511
512/*
513 * Must be called with tasklist_lock held for read.
514 */
515static void __out_of_memory(gfp_t gfp_mask, int order)
516{
517 if (sysctl_oom_kill_allocating_task) {
518 oom_kill_process(current, gfp_mask, order, 0, NULL,
519 "Out of memory (oom_kill_allocating_task)");
520
521 } else {
522 unsigned long points;
523 struct task_struct *p;
524
525retry:
526 /*
527 * Rambo mode: Shoot down a process and hope it solves whatever
528 * issues we may have.
529 */
530 p = select_bad_process(&points, NULL);
531
532 if (PTR_ERR(p) == -1UL)
533 return;
534
535 /* Found nothing?!?! Either we hang forever, or we panic. */
536 if (!p) {
537 read_unlock(&tasklist_lock);
538 panic("Out of memory and no killable processes...\n");
539 }
540
541 if (oom_kill_process(p, gfp_mask, order, points, NULL,
542 "Out of memory"))
543 goto retry;
544 }
545}
546
547/*
548 * pagefault handler calls into here because it is out of memory but
549 * doesn't know exactly how or why.
550 */
551void pagefault_out_of_memory(void)
552{
553 unsigned long freed = 0;
554
555 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
556 if (freed > 0)
557 /* Got some memory back in the last second. */
558 return;
559
560 if (sysctl_panic_on_oom)
561 panic("out of memory from page fault. panic_on_oom is selected.\n");
562
563 read_lock(&tasklist_lock);
564 __out_of_memory(0, 0); /* unknown gfp_mask and order */
565 read_unlock(&tasklist_lock);
566
567 /*
568 * Give "p" a good chance of killing itself before we
569 * retry to allocate memory.
570 */
571 if (!test_thread_flag(TIF_MEMDIE))
572 schedule_timeout_uninterruptible(1);
573}
574
512/** 575/**
513 * out_of_memory - kill the "best" process when we run out of memory 576 * out_of_memory - kill the "best" process when we run out of memory
514 * @zonelist: zonelist pointer 577 * @zonelist: zonelist pointer
@@ -522,8 +585,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
522 */ 585 */
523void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 586void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
524{ 587{
525 struct task_struct *p;
526 unsigned long points = 0;
527 unsigned long freed = 0; 588 unsigned long freed = 0;
528 enum oom_constraint constraint; 589 enum oom_constraint constraint;
529 590
@@ -544,7 +605,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
544 605
545 switch (constraint) { 606 switch (constraint) {
546 case CONSTRAINT_MEMORY_POLICY: 607 case CONSTRAINT_MEMORY_POLICY:
547 oom_kill_process(current, gfp_mask, order, points, NULL, 608 oom_kill_process(current, gfp_mask, order, 0, NULL,
548 "No available memory (MPOL_BIND)"); 609 "No available memory (MPOL_BIND)");
549 break; 610 break;
550 611
@@ -553,35 +614,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
553 panic("out of memory. panic_on_oom is selected\n"); 614 panic("out of memory. panic_on_oom is selected\n");
554 /* Fall-through */ 615 /* Fall-through */
555 case CONSTRAINT_CPUSET: 616 case CONSTRAINT_CPUSET:
556 if (sysctl_oom_kill_allocating_task) { 617 __out_of_memory(gfp_mask, order);
557 oom_kill_process(current, gfp_mask, order, points, NULL,
558 "Out of memory (oom_kill_allocating_task)");
559 break;
560 }
561retry:
562 /*
563 * Rambo mode: Shoot down a process and hope it solves whatever
564 * issues we may have.
565 */
566 p = select_bad_process(&points, NULL);
567
568 if (PTR_ERR(p) == -1UL)
569 goto out;
570
571 /* Found nothing?!?! Either we hang forever, or we panic. */
572 if (!p) {
573 read_unlock(&tasklist_lock);
574 panic("Out of memory and no killable processes...\n");
575 }
576
577 if (oom_kill_process(p, gfp_mask, order, points, NULL,
578 "Out of memory"))
579 goto retry;
580
581 break; 618 break;
582 } 619 }
583 620
584out:
585 read_unlock(&tasklist_lock); 621 read_unlock(&tasklist_lock);
586 622
587 /* 623 /*