aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/oom_kill.c132
-rw-r--r--mm/page_alloc.c17
3 files changed, 115 insertions, 36 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fe4d258ef32b..fbf64e6f64e4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1930,7 +1930,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
1930 if (!memcg) 1930 if (!memcg)
1931 return false; 1931 return false;
1932 1932
1933 if (!handle) 1933 if (!handle || oom_killer_disabled)
1934 goto cleanup; 1934 goto cleanup;
1935 1935
1936 owait.memcg = memcg; 1936 owait.memcg = memcg;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3cbd76b8c13b..b8df76ee2be3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
398} 398}
399 399
400/* 400/*
401 * Number of OOM killer invocations (including memcg OOM killer). 401 * Number of OOM victims in flight
402 * Primarily used by PM freezer to check for potential races with
403 * OOM killed frozen task.
404 */ 402 */
405static atomic_t oom_kills = ATOMIC_INIT(0); 403static atomic_t oom_victims = ATOMIC_INIT(0);
404static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
406 405
407int oom_kills_count(void) 406bool oom_killer_disabled __read_mostly;
408{ 407static DECLARE_RWSEM(oom_sem);
409 return atomic_read(&oom_kills);
410}
411
412void note_oom_kill(void)
413{
414 atomic_inc(&oom_kills);
415}
416 408
417/** 409/**
418 * mark_tsk_oom_victim - marks the given taks as OOM victim. 410 * mark_tsk_oom_victim - marks the given taks as OOM victim.
419 * @tsk: task to mark 411 * @tsk: task to mark
412 *
413 * Has to be called with oom_sem taken for read and never after
414 * oom has been disabled already.
420 */ 415 */
421void mark_tsk_oom_victim(struct task_struct *tsk) 416void mark_tsk_oom_victim(struct task_struct *tsk)
422{ 417{
423 set_tsk_thread_flag(tsk, TIF_MEMDIE); 418 WARN_ON(oom_killer_disabled);
424 419 /* OOM killer might race with memcg OOM */
420 if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
421 return;
425 /* 422 /*
426 * Make sure that the task is woken up from uninterruptible sleep 423 * Make sure that the task is woken up from uninterruptible sleep
427 * if it is frozen because OOM killer wouldn't be able to free 424 * if it is frozen because OOM killer wouldn't be able to free
@@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk)
429 * that TIF_MEMDIE tasks should be ignored. 426 * that TIF_MEMDIE tasks should be ignored.
430 */ 427 */
431 __thaw_task(tsk); 428 __thaw_task(tsk);
429 atomic_inc(&oom_victims);
432} 430}
433 431
434/** 432/**
435 * unmark_oom_victim - unmarks the current task as OOM victim. 433 * unmark_oom_victim - unmarks the current task as OOM victim.
434 *
435 * Wakes up all waiters in oom_killer_disable()
436 */ 436 */
437void unmark_oom_victim(void) 437void unmark_oom_victim(void)
438{ 438{
439 clear_thread_flag(TIF_MEMDIE); 439 if (!test_and_clear_thread_flag(TIF_MEMDIE))
440 return;
441
442 down_read(&oom_sem);
443 /*
444 * There is no need to signal the lasst oom_victim if there
445 * is nobody who cares.
446 */
447 if (!atomic_dec_return(&oom_victims) && oom_killer_disabled)
448 wake_up_all(&oom_victims_wait);
449 up_read(&oom_sem);
450}
451
452/**
453 * oom_killer_disable - disable OOM killer
454 *
455 * Forces all page allocations to fail rather than trigger OOM killer.
456 * Will block and wait until all OOM victims are killed.
457 *
458 * The function cannot be called when there are runnable user tasks because
459 * the userspace would see unexpected allocation failures as a result. Any
460 * new usage of this function should be consulted with MM people.
461 *
462 * Returns true if successful and false if the OOM killer cannot be
463 * disabled.
464 */
465bool oom_killer_disable(void)
466{
467 /*
468 * Make sure to not race with an ongoing OOM killer
469 * and that the current is not the victim.
470 */
471 down_write(&oom_sem);
472 if (test_thread_flag(TIF_MEMDIE)) {
473 up_write(&oom_sem);
474 return false;
475 }
476
477 oom_killer_disabled = true;
478 up_write(&oom_sem);
479
480 wait_event(oom_victims_wait, !atomic_read(&oom_victims));
481
482 return true;
483}
484
485/**
486 * oom_killer_enable - enable OOM killer
487 */
488void oom_killer_enable(void)
489{
490 down_write(&oom_sem);
491 oom_killer_disabled = false;
492 up_write(&oom_sem);
440} 493}
441 494
442#define K(x) ((x) << (PAGE_SHIFT-10)) 495#define K(x) ((x) << (PAGE_SHIFT-10))
@@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
637} 690}
638 691
639/** 692/**
640 * out_of_memory - kill the "best" process when we run out of memory 693 * __out_of_memory - kill the "best" process when we run out of memory
641 * @zonelist: zonelist pointer 694 * @zonelist: zonelist pointer
642 * @gfp_mask: memory allocation flags 695 * @gfp_mask: memory allocation flags
643 * @order: amount of memory being requested as a power of 2 696 * @order: amount of memory being requested as a power of 2
@@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
649 * OR try to be smart about which process to kill. Note that we 702 * OR try to be smart about which process to kill. Note that we
650 * don't have to be perfect here, we just have to be good. 703 * don't have to be perfect here, we just have to be good.
651 */ 704 */
652void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 705static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
653 int order, nodemask_t *nodemask, bool force_kill) 706 int order, nodemask_t *nodemask, bool force_kill)
654{ 707{
655 const nodemask_t *mpol_mask; 708 const nodemask_t *mpol_mask;
@@ -718,6 +771,32 @@ out:
718 schedule_timeout_killable(1); 771 schedule_timeout_killable(1);
719} 772}
720 773
774/**
775 * out_of_memory - tries to invoke OOM killer.
776 * @zonelist: zonelist pointer
777 * @gfp_mask: memory allocation flags
778 * @order: amount of memory being requested as a power of 2
779 * @nodemask: nodemask passed to page allocator
780 * @force_kill: true if a task must be killed, even if others are exiting
781 *
782 * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable()
783 * when it returns false. Otherwise returns true.
784 */
785bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
786 int order, nodemask_t *nodemask, bool force_kill)
787{
788 bool ret = false;
789
790 down_read(&oom_sem);
791 if (!oom_killer_disabled) {
792 __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill);
793 ret = true;
794 }
795 up_read(&oom_sem);
796
797 return ret;
798}
799
721/* 800/*
722 * The pagefault handler calls here because it is out of memory, so kill a 801 * The pagefault handler calls here because it is out of memory, so kill a
723 * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a 802 * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
@@ -727,12 +806,25 @@ void pagefault_out_of_memory(void)
727{ 806{
728 struct zonelist *zonelist; 807 struct zonelist *zonelist;
729 808
809 down_read(&oom_sem);
730 if (mem_cgroup_oom_synchronize(true)) 810 if (mem_cgroup_oom_synchronize(true))
731 return; 811 goto unlock;
732 812
733 zonelist = node_zonelist(first_memory_node, GFP_KERNEL); 813 zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
734 if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { 814 if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
735 out_of_memory(NULL, 0, 0, NULL, false); 815 if (!oom_killer_disabled)
816 __out_of_memory(NULL, 0, 0, NULL, false);
817 else
818 /*
819 * There shouldn't be any user tasks runable while the
820 * OOM killer is disabled so the current task has to
821 * be a racing OOM victim for which oom_killer_disable()
822 * is waiting for.
823 */
824 WARN_ON(test_thread_flag(TIF_MEMDIE));
825
736 oom_zonelist_unlock(zonelist, GFP_KERNEL); 826 oom_zonelist_unlock(zonelist, GFP_KERNEL);
737 } 827 }
828unlock:
829 up_read(&oom_sem);
738} 830}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 641d5a9a8617..134e25525044 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -244,8 +244,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
244 PB_migrate, PB_migrate_end); 244 PB_migrate, PB_migrate_end);
245} 245}
246 246
247bool oom_killer_disabled __read_mostly;
248
249#ifdef CONFIG_DEBUG_VM 247#ifdef CONFIG_DEBUG_VM
250static int page_outside_zone_boundaries(struct zone *zone, struct page *page) 248static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
251{ 249{
@@ -2317,9 +2315,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2317 2315
2318 *did_some_progress = 0; 2316 *did_some_progress = 0;
2319 2317
2320 if (oom_killer_disabled)
2321 return NULL;
2322
2323 /* 2318 /*
2324 * Acquire the per-zone oom lock for each zone. If that 2319 * Acquire the per-zone oom lock for each zone. If that
2325 * fails, somebody else is making progress for us. 2320 * fails, somebody else is making progress for us.
@@ -2331,14 +2326,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2331 } 2326 }
2332 2327
2333 /* 2328 /*
2334 * PM-freezer should be notified that there might be an OOM killer on
2335 * its way to kill and wake somebody up. This is too early and we might
2336 * end up not killing anything but false positives are acceptable.
2337 * See freeze_processes.
2338 */
2339 note_oom_kill();
2340
2341 /*
2342 * Go through the zonelist yet one more time, keep very high watermark 2329 * Go through the zonelist yet one more time, keep very high watermark
2343 * here, this is only to catch a parallel oom killing, we must fail if 2330 * here, this is only to catch a parallel oom killing, we must fail if
2344 * we're still under heavy pressure. 2331 * we're still under heavy pressure.
@@ -2372,8 +2359,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2372 goto out; 2359 goto out;
2373 } 2360 }
2374 /* Exhausted what can be done so it's blamo time */ 2361 /* Exhausted what can be done so it's blamo time */
2375 out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); 2362 if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
2376 *did_some_progress = 1; 2363 *did_some_progress = 1;
2377out: 2364out:
2378 oom_zonelist_unlock(ac->zonelist, gfp_mask); 2365 oom_zonelist_unlock(ac->zonelist, gfp_mask);
2379 return page; 2366 return page;