diff options
-rw-r--r-- | drivers/tty/sysrq.c | 5 | ||||
-rw-r--r-- | include/linux/oom.h | 14 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/power/process.c | 50 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 132 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 |
7 files changed, 132 insertions, 91 deletions
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 0071469ecbf1..259a4d5a4e8f 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c | |||
@@ -355,8 +355,9 @@ static struct sysrq_key_op sysrq_term_op = { | |||
355 | 355 | ||
356 | static void moom_callback(struct work_struct *ignored) | 356 | static void moom_callback(struct work_struct *ignored) |
357 | { | 357 | { |
358 | out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, | 358 | if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), |
359 | 0, NULL, true); | 359 | GFP_KERNEL, 0, NULL, true)) |
360 | pr_info("OOM request ignored because killer is disabled\n"); | ||
360 | } | 361 | } |
361 | 362 | ||
362 | static DECLARE_WORK(moom_work, moom_callback); | 363 | static DECLARE_WORK(moom_work, moom_callback); |
diff --git a/include/linux/oom.h b/include/linux/oom.h index b42b80f88c3a..d5771bed59c9 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
@@ -72,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, | |||
72 | unsigned long totalpages, const nodemask_t *nodemask, | 72 | unsigned long totalpages, const nodemask_t *nodemask, |
73 | bool force_kill); | 73 | bool force_kill); |
74 | 74 | ||
75 | extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | 75 | extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
76 | int order, nodemask_t *mask, bool force_kill); | 76 | int order, nodemask_t *mask, bool force_kill); |
77 | extern int register_oom_notifier(struct notifier_block *nb); | 77 | extern int register_oom_notifier(struct notifier_block *nb); |
78 | extern int unregister_oom_notifier(struct notifier_block *nb); | 78 | extern int unregister_oom_notifier(struct notifier_block *nb); |
79 | 79 | ||
80 | extern bool oom_killer_disabled; | 80 | extern bool oom_killer_disabled; |
81 | 81 | extern bool oom_killer_disable(void); | |
82 | static inline void oom_killer_disable(void) | 82 | extern void oom_killer_enable(void); |
83 | { | ||
84 | oom_killer_disabled = true; | ||
85 | } | ||
86 | |||
87 | static inline void oom_killer_enable(void) | ||
88 | { | ||
89 | oom_killer_disabled = false; | ||
90 | } | ||
91 | 83 | ||
92 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); | 84 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); |
93 | 85 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 02b3d1ab2ec0..feff10bbb307 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -435,7 +435,8 @@ static void exit_mm(struct task_struct *tsk) | |||
435 | task_unlock(tsk); | 435 | task_unlock(tsk); |
436 | mm_update_next_owner(mm); | 436 | mm_update_next_owner(mm); |
437 | mmput(mm); | 437 | mmput(mm); |
438 | unmark_oom_victim(); | 438 | if (test_thread_flag(TIF_MEMDIE)) |
439 | unmark_oom_victim(); | ||
439 | } | 440 | } |
440 | 441 | ||
441 | static struct task_struct *find_alive_thread(struct task_struct *p) | 442 | static struct task_struct *find_alive_thread(struct task_struct *p) |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 3ac45f192e9f..564f786df470 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -108,30 +108,6 @@ static int try_to_freeze_tasks(bool user_only) | |||
108 | return todo ? -EBUSY : 0; | 108 | return todo ? -EBUSY : 0; |
109 | } | 109 | } |
110 | 110 | ||
111 | static bool __check_frozen_processes(void) | ||
112 | { | ||
113 | struct task_struct *g, *p; | ||
114 | |||
115 | for_each_process_thread(g, p) | ||
116 | if (p != current && !freezer_should_skip(p) && !frozen(p)) | ||
117 | return false; | ||
118 | |||
119 | return true; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Returns true if all freezable tasks (except for current) are frozen already | ||
124 | */ | ||
125 | static bool check_frozen_processes(void) | ||
126 | { | ||
127 | bool ret; | ||
128 | |||
129 | read_lock(&tasklist_lock); | ||
130 | ret = __check_frozen_processes(); | ||
131 | read_unlock(&tasklist_lock); | ||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | /** | 111 | /** |
136 | * freeze_processes - Signal user space processes to enter the refrigerator. | 112 | * freeze_processes - Signal user space processes to enter the refrigerator. |
137 | * The current thread will not be frozen. The same process that calls | 113 | * The current thread will not be frozen. The same process that calls |
@@ -142,7 +118,6 @@ static bool check_frozen_processes(void) | |||
142 | int freeze_processes(void) | 118 | int freeze_processes(void) |
143 | { | 119 | { |
144 | int error; | 120 | int error; |
145 | int oom_kills_saved; | ||
146 | 121 | ||
147 | error = __usermodehelper_disable(UMH_FREEZING); | 122 | error = __usermodehelper_disable(UMH_FREEZING); |
148 | if (error) | 123 | if (error) |
@@ -157,29 +132,22 @@ int freeze_processes(void) | |||
157 | pm_wakeup_clear(); | 132 | pm_wakeup_clear(); |
158 | pr_info("Freezing user space processes ... "); | 133 | pr_info("Freezing user space processes ... "); |
159 | pm_freezing = true; | 134 | pm_freezing = true; |
160 | oom_kills_saved = oom_kills_count(); | ||
161 | error = try_to_freeze_tasks(true); | 135 | error = try_to_freeze_tasks(true); |
162 | if (!error) { | 136 | if (!error) { |
163 | __usermodehelper_set_disable_depth(UMH_DISABLED); | 137 | __usermodehelper_set_disable_depth(UMH_DISABLED); |
164 | oom_killer_disable(); | 138 | pr_cont("done."); |
165 | |||
166 | /* | ||
167 | * There might have been an OOM kill while we were | ||
168 | * freezing tasks and the killed task might be still | ||
169 | * on the way out so we have to double check for race. | ||
170 | */ | ||
171 | if (oom_kills_count() != oom_kills_saved && | ||
172 | !check_frozen_processes()) { | ||
173 | __usermodehelper_set_disable_depth(UMH_ENABLED); | ||
174 | pr_cont("OOM in progress."); | ||
175 | error = -EBUSY; | ||
176 | } else { | ||
177 | pr_cont("done."); | ||
178 | } | ||
179 | } | 139 | } |
180 | pr_cont("\n"); | 140 | pr_cont("\n"); |
181 | BUG_ON(in_atomic()); | 141 | BUG_ON(in_atomic()); |
182 | 142 | ||
143 | /* | ||
144 | * Now that the whole userspace is frozen we need to disbale | ||
145 | * the OOM killer to disallow any further interference with | ||
146 | * killable tasks. | ||
147 | */ | ||
148 | if (!error && !oom_killer_disable()) | ||
149 | error = -EBUSY; | ||
150 | |||
183 | if (error) | 151 | if (error) |
184 | thaw_processes(); | 152 | thaw_processes(); |
185 | return error; | 153 | return error; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fe4d258ef32b..fbf64e6f64e4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1930,7 +1930,7 @@ bool mem_cgroup_oom_synchronize(bool handle) | |||
1930 | if (!memcg) | 1930 | if (!memcg) |
1931 | return false; | 1931 | return false; |
1932 | 1932 | ||
1933 | if (!handle) | 1933 | if (!handle || oom_killer_disabled) |
1934 | goto cleanup; | 1934 | goto cleanup; |
1935 | 1935 | ||
1936 | owait.memcg = memcg; | 1936 | owait.memcg = memcg; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3cbd76b8c13b..b8df76ee2be3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
398 | } | 398 | } |
399 | 399 | ||
400 | /* | 400 | /* |
401 | * Number of OOM killer invocations (including memcg OOM killer). | 401 | * Number of OOM victims in flight |
402 | * Primarily used by PM freezer to check for potential races with | ||
403 | * OOM killed frozen task. | ||
404 | */ | 402 | */ |
405 | static atomic_t oom_kills = ATOMIC_INIT(0); | 403 | static atomic_t oom_victims = ATOMIC_INIT(0); |
404 | static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); | ||
406 | 405 | ||
407 | int oom_kills_count(void) | 406 | bool oom_killer_disabled __read_mostly; |
408 | { | 407 | static DECLARE_RWSEM(oom_sem); |
409 | return atomic_read(&oom_kills); | ||
410 | } | ||
411 | |||
412 | void note_oom_kill(void) | ||
413 | { | ||
414 | atomic_inc(&oom_kills); | ||
415 | } | ||
416 | 408 | ||
417 | /** | 409 | /** |
418 | * mark_tsk_oom_victim - marks the given taks as OOM victim. | 410 | * mark_tsk_oom_victim - marks the given taks as OOM victim. |
419 | * @tsk: task to mark | 411 | * @tsk: task to mark |
412 | * | ||
413 | * Has to be called with oom_sem taken for read and never after | ||
414 | * oom has been disabled already. | ||
420 | */ | 415 | */ |
421 | void mark_tsk_oom_victim(struct task_struct *tsk) | 416 | void mark_tsk_oom_victim(struct task_struct *tsk) |
422 | { | 417 | { |
423 | set_tsk_thread_flag(tsk, TIF_MEMDIE); | 418 | WARN_ON(oom_killer_disabled); |
424 | 419 | /* OOM killer might race with memcg OOM */ | |
420 | if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) | ||
421 | return; | ||
425 | /* | 422 | /* |
426 | * Make sure that the task is woken up from uninterruptible sleep | 423 | * Make sure that the task is woken up from uninterruptible sleep |
427 | * if it is frozen because OOM killer wouldn't be able to free | 424 | * if it is frozen because OOM killer wouldn't be able to free |
@@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk) | |||
429 | * that TIF_MEMDIE tasks should be ignored. | 426 | * that TIF_MEMDIE tasks should be ignored. |
430 | */ | 427 | */ |
431 | __thaw_task(tsk); | 428 | __thaw_task(tsk); |
429 | atomic_inc(&oom_victims); | ||
432 | } | 430 | } |
433 | 431 | ||
434 | /** | 432 | /** |
435 | * unmark_oom_victim - unmarks the current task as OOM victim. | 433 | * unmark_oom_victim - unmarks the current task as OOM victim. |
434 | * | ||
435 | * Wakes up all waiters in oom_killer_disable() | ||
436 | */ | 436 | */ |
437 | void unmark_oom_victim(void) | 437 | void unmark_oom_victim(void) |
438 | { | 438 | { |
439 | clear_thread_flag(TIF_MEMDIE); | 439 | if (!test_and_clear_thread_flag(TIF_MEMDIE)) |
440 | return; | ||
441 | |||
442 | down_read(&oom_sem); | ||
443 | /* | ||
444 | * There is no need to signal the lasst oom_victim if there | ||
445 | * is nobody who cares. | ||
446 | */ | ||
447 | if (!atomic_dec_return(&oom_victims) && oom_killer_disabled) | ||
448 | wake_up_all(&oom_victims_wait); | ||
449 | up_read(&oom_sem); | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * oom_killer_disable - disable OOM killer | ||
454 | * | ||
455 | * Forces all page allocations to fail rather than trigger OOM killer. | ||
456 | * Will block and wait until all OOM victims are killed. | ||
457 | * | ||
458 | * The function cannot be called when there are runnable user tasks because | ||
459 | * the userspace would see unexpected allocation failures as a result. Any | ||
460 | * new usage of this function should be consulted with MM people. | ||
461 | * | ||
462 | * Returns true if successful and false if the OOM killer cannot be | ||
463 | * disabled. | ||
464 | */ | ||
465 | bool oom_killer_disable(void) | ||
466 | { | ||
467 | /* | ||
468 | * Make sure to not race with an ongoing OOM killer | ||
469 | * and that the current is not the victim. | ||
470 | */ | ||
471 | down_write(&oom_sem); | ||
472 | if (test_thread_flag(TIF_MEMDIE)) { | ||
473 | up_write(&oom_sem); | ||
474 | return false; | ||
475 | } | ||
476 | |||
477 | oom_killer_disabled = true; | ||
478 | up_write(&oom_sem); | ||
479 | |||
480 | wait_event(oom_victims_wait, !atomic_read(&oom_victims)); | ||
481 | |||
482 | return true; | ||
483 | } | ||
484 | |||
485 | /** | ||
486 | * oom_killer_enable - enable OOM killer | ||
487 | */ | ||
488 | void oom_killer_enable(void) | ||
489 | { | ||
490 | down_write(&oom_sem); | ||
491 | oom_killer_disabled = false; | ||
492 | up_write(&oom_sem); | ||
440 | } | 493 | } |
441 | 494 | ||
442 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 495 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
@@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) | |||
637 | } | 690 | } |
638 | 691 | ||
639 | /** | 692 | /** |
640 | * out_of_memory - kill the "best" process when we run out of memory | 693 | * __out_of_memory - kill the "best" process when we run out of memory |
641 | * @zonelist: zonelist pointer | 694 | * @zonelist: zonelist pointer |
642 | * @gfp_mask: memory allocation flags | 695 | * @gfp_mask: memory allocation flags |
643 | * @order: amount of memory being requested as a power of 2 | 696 | * @order: amount of memory being requested as a power of 2 |
@@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) | |||
649 | * OR try to be smart about which process to kill. Note that we | 702 | * OR try to be smart about which process to kill. Note that we |
650 | * don't have to be perfect here, we just have to be good. | 703 | * don't have to be perfect here, we just have to be good. |
651 | */ | 704 | */ |
652 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | 705 | static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
653 | int order, nodemask_t *nodemask, bool force_kill) | 706 | int order, nodemask_t *nodemask, bool force_kill) |
654 | { | 707 | { |
655 | const nodemask_t *mpol_mask; | 708 | const nodemask_t *mpol_mask; |
@@ -718,6 +771,32 @@ out: | |||
718 | schedule_timeout_killable(1); | 771 | schedule_timeout_killable(1); |
719 | } | 772 | } |
720 | 773 | ||
774 | /** | ||
775 | * out_of_memory - tries to invoke OOM killer. | ||
776 | * @zonelist: zonelist pointer | ||
777 | * @gfp_mask: memory allocation flags | ||
778 | * @order: amount of memory being requested as a power of 2 | ||
779 | * @nodemask: nodemask passed to page allocator | ||
780 | * @force_kill: true if a task must be killed, even if others are exiting | ||
781 | * | ||
782 | * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable() | ||
783 | * when it returns false. Otherwise returns true. | ||
784 | */ | ||
785 | bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | ||
786 | int order, nodemask_t *nodemask, bool force_kill) | ||
787 | { | ||
788 | bool ret = false; | ||
789 | |||
790 | down_read(&oom_sem); | ||
791 | if (!oom_killer_disabled) { | ||
792 | __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill); | ||
793 | ret = true; | ||
794 | } | ||
795 | up_read(&oom_sem); | ||
796 | |||
797 | return ret; | ||
798 | } | ||
799 | |||
721 | /* | 800 | /* |
722 | * The pagefault handler calls here because it is out of memory, so kill a | 801 | * The pagefault handler calls here because it is out of memory, so kill a |
723 | * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a | 802 | * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a |
@@ -727,12 +806,25 @@ void pagefault_out_of_memory(void) | |||
727 | { | 806 | { |
728 | struct zonelist *zonelist; | 807 | struct zonelist *zonelist; |
729 | 808 | ||
809 | down_read(&oom_sem); | ||
730 | if (mem_cgroup_oom_synchronize(true)) | 810 | if (mem_cgroup_oom_synchronize(true)) |
731 | return; | 811 | goto unlock; |
732 | 812 | ||
733 | zonelist = node_zonelist(first_memory_node, GFP_KERNEL); | 813 | zonelist = node_zonelist(first_memory_node, GFP_KERNEL); |
734 | if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { | 814 | if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { |
735 | out_of_memory(NULL, 0, 0, NULL, false); | 815 | if (!oom_killer_disabled) |
816 | __out_of_memory(NULL, 0, 0, NULL, false); | ||
817 | else | ||
818 | /* | ||
819 | * There shouldn't be any user tasks runable while the | ||
820 | * OOM killer is disabled so the current task has to | ||
821 | * be a racing OOM victim for which oom_killer_disable() | ||
822 | * is waiting for. | ||
823 | */ | ||
824 | WARN_ON(test_thread_flag(TIF_MEMDIE)); | ||
825 | |||
736 | oom_zonelist_unlock(zonelist, GFP_KERNEL); | 826 | oom_zonelist_unlock(zonelist, GFP_KERNEL); |
737 | } | 827 | } |
828 | unlock: | ||
829 | up_read(&oom_sem); | ||
738 | } | 830 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 641d5a9a8617..134e25525044 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -244,8 +244,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype) | |||
244 | PB_migrate, PB_migrate_end); | 244 | PB_migrate, PB_migrate_end); |
245 | } | 245 | } |
246 | 246 | ||
247 | bool oom_killer_disabled __read_mostly; | ||
248 | |||
249 | #ifdef CONFIG_DEBUG_VM | 247 | #ifdef CONFIG_DEBUG_VM |
250 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 248 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
251 | { | 249 | { |
@@ -2317,9 +2315,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2317 | 2315 | ||
2318 | *did_some_progress = 0; | 2316 | *did_some_progress = 0; |
2319 | 2317 | ||
2320 | if (oom_killer_disabled) | ||
2321 | return NULL; | ||
2322 | |||
2323 | /* | 2318 | /* |
2324 | * Acquire the per-zone oom lock for each zone. If that | 2319 | * Acquire the per-zone oom lock for each zone. If that |
2325 | * fails, somebody else is making progress for us. | 2320 | * fails, somebody else is making progress for us. |
@@ -2331,14 +2326,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2331 | } | 2326 | } |
2332 | 2327 | ||
2333 | /* | 2328 | /* |
2334 | * PM-freezer should be notified that there might be an OOM killer on | ||
2335 | * its way to kill and wake somebody up. This is too early and we might | ||
2336 | * end up not killing anything but false positives are acceptable. | ||
2337 | * See freeze_processes. | ||
2338 | */ | ||
2339 | note_oom_kill(); | ||
2340 | |||
2341 | /* | ||
2342 | * Go through the zonelist yet one more time, keep very high watermark | 2329 | * Go through the zonelist yet one more time, keep very high watermark |
2343 | * here, this is only to catch a parallel oom killing, we must fail if | 2330 | * here, this is only to catch a parallel oom killing, we must fail if |
2344 | * we're still under heavy pressure. | 2331 | * we're still under heavy pressure. |
@@ -2372,8 +2359,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2372 | goto out; | 2359 | goto out; |
2373 | } | 2360 | } |
2374 | /* Exhausted what can be done so it's blamo time */ | 2361 | /* Exhausted what can be done so it's blamo time */ |
2375 | out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); | 2362 | if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)) |
2376 | *did_some_progress = 1; | 2363 | *did_some_progress = 1; |
2377 | out: | 2364 | out: |
2378 | oom_zonelist_unlock(ac->zonelist, gfp_mask); | 2365 | oom_zonelist_unlock(ac->zonelist, gfp_mask); |
2379 | return page; | 2366 | return page; |