diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 132 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 |
3 files changed, 115 insertions, 36 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fe4d258ef32b..fbf64e6f64e4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1930,7 +1930,7 @@ bool mem_cgroup_oom_synchronize(bool handle) | |||
1930 | if (!memcg) | 1930 | if (!memcg) |
1931 | return false; | 1931 | return false; |
1932 | 1932 | ||
1933 | if (!handle) | 1933 | if (!handle || oom_killer_disabled) |
1934 | goto cleanup; | 1934 | goto cleanup; |
1935 | 1935 | ||
1936 | owait.memcg = memcg; | 1936 | owait.memcg = memcg; |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3cbd76b8c13b..b8df76ee2be3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
398 | } | 398 | } |
399 | 399 | ||
400 | /* | 400 | /* |
401 | * Number of OOM killer invocations (including memcg OOM killer). | 401 | * Number of OOM victims in flight |
402 | * Primarily used by PM freezer to check for potential races with | ||
403 | * OOM killed frozen task. | ||
404 | */ | 402 | */ |
405 | static atomic_t oom_kills = ATOMIC_INIT(0); | 403 | static atomic_t oom_victims = ATOMIC_INIT(0); |
404 | static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); | ||
406 | 405 | ||
407 | int oom_kills_count(void) | 406 | bool oom_killer_disabled __read_mostly; |
408 | { | 407 | static DECLARE_RWSEM(oom_sem); |
409 | return atomic_read(&oom_kills); | ||
410 | } | ||
411 | |||
412 | void note_oom_kill(void) | ||
413 | { | ||
414 | atomic_inc(&oom_kills); | ||
415 | } | ||
416 | 408 | ||
417 | /** | 409 | /** |
418 | * mark_tsk_oom_victim - marks the given taks as OOM victim. | 410 | * mark_tsk_oom_victim - marks the given taks as OOM victim. |
419 | * @tsk: task to mark | 411 | * @tsk: task to mark |
412 | * | ||
413 | * Has to be called with oom_sem taken for read and never after | ||
414 | * oom has been disabled already. | ||
420 | */ | 415 | */ |
421 | void mark_tsk_oom_victim(struct task_struct *tsk) | 416 | void mark_tsk_oom_victim(struct task_struct *tsk) |
422 | { | 417 | { |
423 | set_tsk_thread_flag(tsk, TIF_MEMDIE); | 418 | WARN_ON(oom_killer_disabled); |
424 | 419 | /* OOM killer might race with memcg OOM */ | |
420 | if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) | ||
421 | return; | ||
425 | /* | 422 | /* |
426 | * Make sure that the task is woken up from uninterruptible sleep | 423 | * Make sure that the task is woken up from uninterruptible sleep |
427 | * if it is frozen because OOM killer wouldn't be able to free | 424 | * if it is frozen because OOM killer wouldn't be able to free |
@@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk) | |||
429 | * that TIF_MEMDIE tasks should be ignored. | 426 | * that TIF_MEMDIE tasks should be ignored. |
430 | */ | 427 | */ |
431 | __thaw_task(tsk); | 428 | __thaw_task(tsk); |
429 | atomic_inc(&oom_victims); | ||
432 | } | 430 | } |
433 | 431 | ||
434 | /** | 432 | /** |
435 | * unmark_oom_victim - unmarks the current task as OOM victim. | 433 | * unmark_oom_victim - unmarks the current task as OOM victim. |
434 | * | ||
435 | * Wakes up all waiters in oom_killer_disable() | ||
436 | */ | 436 | */ |
437 | void unmark_oom_victim(void) | 437 | void unmark_oom_victim(void) |
438 | { | 438 | { |
439 | clear_thread_flag(TIF_MEMDIE); | 439 | if (!test_and_clear_thread_flag(TIF_MEMDIE)) |
440 | return; | ||
441 | |||
442 | down_read(&oom_sem); | ||
443 | /* | ||
444 | * There is no need to signal the lasst oom_victim if there | ||
445 | * is nobody who cares. | ||
446 | */ | ||
447 | if (!atomic_dec_return(&oom_victims) && oom_killer_disabled) | ||
448 | wake_up_all(&oom_victims_wait); | ||
449 | up_read(&oom_sem); | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * oom_killer_disable - disable OOM killer | ||
454 | * | ||
455 | * Forces all page allocations to fail rather than trigger OOM killer. | ||
456 | * Will block and wait until all OOM victims are killed. | ||
457 | * | ||
458 | * The function cannot be called when there are runnable user tasks because | ||
459 | * the userspace would see unexpected allocation failures as a result. Any | ||
460 | * new usage of this function should be consulted with MM people. | ||
461 | * | ||
462 | * Returns true if successful and false if the OOM killer cannot be | ||
463 | * disabled. | ||
464 | */ | ||
465 | bool oom_killer_disable(void) | ||
466 | { | ||
467 | /* | ||
468 | * Make sure to not race with an ongoing OOM killer | ||
469 | * and that the current is not the victim. | ||
470 | */ | ||
471 | down_write(&oom_sem); | ||
472 | if (test_thread_flag(TIF_MEMDIE)) { | ||
473 | up_write(&oom_sem); | ||
474 | return false; | ||
475 | } | ||
476 | |||
477 | oom_killer_disabled = true; | ||
478 | up_write(&oom_sem); | ||
479 | |||
480 | wait_event(oom_victims_wait, !atomic_read(&oom_victims)); | ||
481 | |||
482 | return true; | ||
483 | } | ||
484 | |||
485 | /** | ||
486 | * oom_killer_enable - enable OOM killer | ||
487 | */ | ||
488 | void oom_killer_enable(void) | ||
489 | { | ||
490 | down_write(&oom_sem); | ||
491 | oom_killer_disabled = false; | ||
492 | up_write(&oom_sem); | ||
440 | } | 493 | } |
441 | 494 | ||
442 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 495 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
@@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) | |||
637 | } | 690 | } |
638 | 691 | ||
639 | /** | 692 | /** |
640 | * out_of_memory - kill the "best" process when we run out of memory | 693 | * __out_of_memory - kill the "best" process when we run out of memory |
641 | * @zonelist: zonelist pointer | 694 | * @zonelist: zonelist pointer |
642 | * @gfp_mask: memory allocation flags | 695 | * @gfp_mask: memory allocation flags |
643 | * @order: amount of memory being requested as a power of 2 | 696 | * @order: amount of memory being requested as a power of 2 |
@@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) | |||
649 | * OR try to be smart about which process to kill. Note that we | 702 | * OR try to be smart about which process to kill. Note that we |
650 | * don't have to be perfect here, we just have to be good. | 703 | * don't have to be perfect here, we just have to be good. |
651 | */ | 704 | */ |
652 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | 705 | static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
653 | int order, nodemask_t *nodemask, bool force_kill) | 706 | int order, nodemask_t *nodemask, bool force_kill) |
654 | { | 707 | { |
655 | const nodemask_t *mpol_mask; | 708 | const nodemask_t *mpol_mask; |
@@ -718,6 +771,32 @@ out: | |||
718 | schedule_timeout_killable(1); | 771 | schedule_timeout_killable(1); |
719 | } | 772 | } |
720 | 773 | ||
774 | /** | ||
775 | * out_of_memory - tries to invoke OOM killer. | ||
776 | * @zonelist: zonelist pointer | ||
777 | * @gfp_mask: memory allocation flags | ||
778 | * @order: amount of memory being requested as a power of 2 | ||
779 | * @nodemask: nodemask passed to page allocator | ||
780 | * @force_kill: true if a task must be killed, even if others are exiting | ||
781 | * | ||
782 | * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable() | ||
783 | * when it returns false. Otherwise returns true. | ||
784 | */ | ||
785 | bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | ||
786 | int order, nodemask_t *nodemask, bool force_kill) | ||
787 | { | ||
788 | bool ret = false; | ||
789 | |||
790 | down_read(&oom_sem); | ||
791 | if (!oom_killer_disabled) { | ||
792 | __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill); | ||
793 | ret = true; | ||
794 | } | ||
795 | up_read(&oom_sem); | ||
796 | |||
797 | return ret; | ||
798 | } | ||
799 | |||
721 | /* | 800 | /* |
722 | * The pagefault handler calls here because it is out of memory, so kill a | 801 | * The pagefault handler calls here because it is out of memory, so kill a |
723 | * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a | 802 | * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a |
@@ -727,12 +806,25 @@ void pagefault_out_of_memory(void) | |||
727 | { | 806 | { |
728 | struct zonelist *zonelist; | 807 | struct zonelist *zonelist; |
729 | 808 | ||
809 | down_read(&oom_sem); | ||
730 | if (mem_cgroup_oom_synchronize(true)) | 810 | if (mem_cgroup_oom_synchronize(true)) |
731 | return; | 811 | goto unlock; |
732 | 812 | ||
733 | zonelist = node_zonelist(first_memory_node, GFP_KERNEL); | 813 | zonelist = node_zonelist(first_memory_node, GFP_KERNEL); |
734 | if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { | 814 | if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { |
735 | out_of_memory(NULL, 0, 0, NULL, false); | 815 | if (!oom_killer_disabled) |
816 | __out_of_memory(NULL, 0, 0, NULL, false); | ||
817 | else | ||
818 | /* | ||
819 | * There shouldn't be any user tasks runable while the | ||
820 | * OOM killer is disabled so the current task has to | ||
821 | * be a racing OOM victim for which oom_killer_disable() | ||
822 | * is waiting for. | ||
823 | */ | ||
824 | WARN_ON(test_thread_flag(TIF_MEMDIE)); | ||
825 | |||
736 | oom_zonelist_unlock(zonelist, GFP_KERNEL); | 826 | oom_zonelist_unlock(zonelist, GFP_KERNEL); |
737 | } | 827 | } |
828 | unlock: | ||
829 | up_read(&oom_sem); | ||
738 | } | 830 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 641d5a9a8617..134e25525044 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -244,8 +244,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype) | |||
244 | PB_migrate, PB_migrate_end); | 244 | PB_migrate, PB_migrate_end); |
245 | } | 245 | } |
246 | 246 | ||
247 | bool oom_killer_disabled __read_mostly; | ||
248 | |||
249 | #ifdef CONFIG_DEBUG_VM | 247 | #ifdef CONFIG_DEBUG_VM |
250 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | 248 | static int page_outside_zone_boundaries(struct zone *zone, struct page *page) |
251 | { | 249 | { |
@@ -2317,9 +2315,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2317 | 2315 | ||
2318 | *did_some_progress = 0; | 2316 | *did_some_progress = 0; |
2319 | 2317 | ||
2320 | if (oom_killer_disabled) | ||
2321 | return NULL; | ||
2322 | |||
2323 | /* | 2318 | /* |
2324 | * Acquire the per-zone oom lock for each zone. If that | 2319 | * Acquire the per-zone oom lock for each zone. If that |
2325 | * fails, somebody else is making progress for us. | 2320 | * fails, somebody else is making progress for us. |
@@ -2331,14 +2326,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2331 | } | 2326 | } |
2332 | 2327 | ||
2333 | /* | 2328 | /* |
2334 | * PM-freezer should be notified that there might be an OOM killer on | ||
2335 | * its way to kill and wake somebody up. This is too early and we might | ||
2336 | * end up not killing anything but false positives are acceptable. | ||
2337 | * See freeze_processes. | ||
2338 | */ | ||
2339 | note_oom_kill(); | ||
2340 | |||
2341 | /* | ||
2342 | * Go through the zonelist yet one more time, keep very high watermark | 2329 | * Go through the zonelist yet one more time, keep very high watermark |
2343 | * here, this is only to catch a parallel oom killing, we must fail if | 2330 | * here, this is only to catch a parallel oom killing, we must fail if |
2344 | * we're still under heavy pressure. | 2331 | * we're still under heavy pressure. |
@@ -2372,8 +2359,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2372 | goto out; | 2359 | goto out; |
2373 | } | 2360 | } |
2374 | /* Exhausted what can be done so it's blamo time */ | 2361 | /* Exhausted what can be done so it's blamo time */ |
2375 | out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); | 2362 | if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)) |
2376 | *did_some_progress = 1; | 2363 | *did_some_progress = 1; |
2377 | out: | 2364 | out: |
2378 | oom_zonelist_unlock(ac->zonelist, gfp_mask); | 2365 | oom_zonelist_unlock(ac->zonelist, gfp_mask); |
2379 | return page; | 2366 | return page; |