aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/tty/sysrq.c5
-rw-r--r--include/linux/oom.h14
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/power/process.c50
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/oom_kill.c132
-rw-r--r--mm/page_alloc.c17
7 files changed, 132 insertions, 91 deletions
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 0071469ecbf1..259a4d5a4e8f 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,8 +355,9 @@ static struct sysrq_key_op sysrq_term_op = {
355 355
356static void moom_callback(struct work_struct *ignored) 356static void moom_callback(struct work_struct *ignored)
357{ 357{
358 out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, 358 if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL),
359 0, NULL, true); 359 GFP_KERNEL, 0, NULL, true))
360 pr_info("OOM request ignored because killer is disabled\n");
360} 361}
361 362
362static DECLARE_WORK(moom_work, moom_callback); 363static DECLARE_WORK(moom_work, moom_callback);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index b42b80f88c3a..d5771bed59c9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -72,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
72 unsigned long totalpages, const nodemask_t *nodemask, 72 unsigned long totalpages, const nodemask_t *nodemask,
73 bool force_kill); 73 bool force_kill);
74 74
75extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 75extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
76 int order, nodemask_t *mask, bool force_kill); 76 int order, nodemask_t *mask, bool force_kill);
77extern int register_oom_notifier(struct notifier_block *nb); 77extern int register_oom_notifier(struct notifier_block *nb);
78extern int unregister_oom_notifier(struct notifier_block *nb); 78extern int unregister_oom_notifier(struct notifier_block *nb);
79 79
80extern bool oom_killer_disabled; 80extern bool oom_killer_disabled;
81 81extern bool oom_killer_disable(void);
82static inline void oom_killer_disable(void) 82extern void oom_killer_enable(void);
83{
84 oom_killer_disabled = true;
85}
86
87static inline void oom_killer_enable(void)
88{
89 oom_killer_disabled = false;
90}
91 83
92extern struct task_struct *find_lock_task_mm(struct task_struct *p); 84extern struct task_struct *find_lock_task_mm(struct task_struct *p);
93 85
diff --git a/kernel/exit.c b/kernel/exit.c
index 02b3d1ab2ec0..feff10bbb307 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -435,7 +435,8 @@ static void exit_mm(struct task_struct *tsk)
435 task_unlock(tsk); 435 task_unlock(tsk);
436 mm_update_next_owner(mm); 436 mm_update_next_owner(mm);
437 mmput(mm); 437 mmput(mm);
438 unmark_oom_victim(); 438 if (test_thread_flag(TIF_MEMDIE))
439 unmark_oom_victim();
439} 440}
440 441
441static struct task_struct *find_alive_thread(struct task_struct *p) 442static struct task_struct *find_alive_thread(struct task_struct *p)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 3ac45f192e9f..564f786df470 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -108,30 +108,6 @@ static int try_to_freeze_tasks(bool user_only)
108 return todo ? -EBUSY : 0; 108 return todo ? -EBUSY : 0;
109} 109}
110 110
111static bool __check_frozen_processes(void)
112{
113 struct task_struct *g, *p;
114
115 for_each_process_thread(g, p)
116 if (p != current && !freezer_should_skip(p) && !frozen(p))
117 return false;
118
119 return true;
120}
121
122/*
123 * Returns true if all freezable tasks (except for current) are frozen already
124 */
125static bool check_frozen_processes(void)
126{
127 bool ret;
128
129 read_lock(&tasklist_lock);
130 ret = __check_frozen_processes();
131 read_unlock(&tasklist_lock);
132 return ret;
133}
134
135/** 111/**
136 * freeze_processes - Signal user space processes to enter the refrigerator. 112 * freeze_processes - Signal user space processes to enter the refrigerator.
137 * The current thread will not be frozen. The same process that calls 113 * The current thread will not be frozen. The same process that calls
@@ -142,7 +118,6 @@ static bool check_frozen_processes(void)
142int freeze_processes(void) 118int freeze_processes(void)
143{ 119{
144 int error; 120 int error;
145 int oom_kills_saved;
146 121
147 error = __usermodehelper_disable(UMH_FREEZING); 122 error = __usermodehelper_disable(UMH_FREEZING);
148 if (error) 123 if (error)
@@ -157,29 +132,22 @@ int freeze_processes(void)
157 pm_wakeup_clear(); 132 pm_wakeup_clear();
158 pr_info("Freezing user space processes ... "); 133 pr_info("Freezing user space processes ... ");
159 pm_freezing = true; 134 pm_freezing = true;
160 oom_kills_saved = oom_kills_count();
161 error = try_to_freeze_tasks(true); 135 error = try_to_freeze_tasks(true);
162 if (!error) { 136 if (!error) {
163 __usermodehelper_set_disable_depth(UMH_DISABLED); 137 __usermodehelper_set_disable_depth(UMH_DISABLED);
164 oom_killer_disable(); 138 pr_cont("done.");
165
166 /*
167 * There might have been an OOM kill while we were
168 * freezing tasks and the killed task might be still
169 * on the way out so we have to double check for race.
170 */
171 if (oom_kills_count() != oom_kills_saved &&
172 !check_frozen_processes()) {
173 __usermodehelper_set_disable_depth(UMH_ENABLED);
174 pr_cont("OOM in progress.");
175 error = -EBUSY;
176 } else {
177 pr_cont("done.");
178 }
179 } 139 }
180 pr_cont("\n"); 140 pr_cont("\n");
181 BUG_ON(in_atomic()); 141 BUG_ON(in_atomic());
182 142
143 /*
144 * Now that the whole userspace is frozen we need to disbale
145 * the OOM killer to disallow any further interference with
146 * killable tasks.
147 */
148 if (!error && !oom_killer_disable())
149 error = -EBUSY;
150
183 if (error) 151 if (error)
184 thaw_processes(); 152 thaw_processes();
185 return error; 153 return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fe4d258ef32b..fbf64e6f64e4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1930,7 +1930,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
1930 if (!memcg) 1930 if (!memcg)
1931 return false; 1931 return false;
1932 1932
1933 if (!handle) 1933 if (!handle || oom_killer_disabled)
1934 goto cleanup; 1934 goto cleanup;
1935 1935
1936 owait.memcg = memcg; 1936 owait.memcg = memcg;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3cbd76b8c13b..b8df76ee2be3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
398} 398}
399 399
400/* 400/*
401 * Number of OOM killer invocations (including memcg OOM killer). 401 * Number of OOM victims in flight
402 * Primarily used by PM freezer to check for potential races with
403 * OOM killed frozen task.
404 */ 402 */
405static atomic_t oom_kills = ATOMIC_INIT(0); 403static atomic_t oom_victims = ATOMIC_INIT(0);
404static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
406 405
407int oom_kills_count(void) 406bool oom_killer_disabled __read_mostly;
408{ 407static DECLARE_RWSEM(oom_sem);
409 return atomic_read(&oom_kills);
410}
411
412void note_oom_kill(void)
413{
414 atomic_inc(&oom_kills);
415}
416 408
417/** 409/**
418 * mark_tsk_oom_victim - marks the given taks as OOM victim. 410 * mark_tsk_oom_victim - marks the given taks as OOM victim.
419 * @tsk: task to mark 411 * @tsk: task to mark
412 *
413 * Has to be called with oom_sem taken for read and never after
414 * oom has been disabled already.
420 */ 415 */
421void mark_tsk_oom_victim(struct task_struct *tsk) 416void mark_tsk_oom_victim(struct task_struct *tsk)
422{ 417{
423 set_tsk_thread_flag(tsk, TIF_MEMDIE); 418 WARN_ON(oom_killer_disabled);
424 419 /* OOM killer might race with memcg OOM */
420 if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
421 return;
425 /* 422 /*
426 * Make sure that the task is woken up from uninterruptible sleep 423 * Make sure that the task is woken up from uninterruptible sleep
427 * if it is frozen because OOM killer wouldn't be able to free 424 * if it is frozen because OOM killer wouldn't be able to free
@@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk)
429 * that TIF_MEMDIE tasks should be ignored. 426 * that TIF_MEMDIE tasks should be ignored.
430 */ 427 */
431 __thaw_task(tsk); 428 __thaw_task(tsk);
429 atomic_inc(&oom_victims);
432} 430}
433 431
434/** 432/**
435 * unmark_oom_victim - unmarks the current task as OOM victim. 433 * unmark_oom_victim - unmarks the current task as OOM victim.
434 *
435 * Wakes up all waiters in oom_killer_disable()
436 */ 436 */
437void unmark_oom_victim(void) 437void unmark_oom_victim(void)
438{ 438{
439 clear_thread_flag(TIF_MEMDIE); 439 if (!test_and_clear_thread_flag(TIF_MEMDIE))
440 return;
441
442 down_read(&oom_sem);
443 /*
444 * There is no need to signal the lasst oom_victim if there
445 * is nobody who cares.
446 */
447 if (!atomic_dec_return(&oom_victims) && oom_killer_disabled)
448 wake_up_all(&oom_victims_wait);
449 up_read(&oom_sem);
450}
451
452/**
453 * oom_killer_disable - disable OOM killer
454 *
455 * Forces all page allocations to fail rather than trigger OOM killer.
456 * Will block and wait until all OOM victims are killed.
457 *
458 * The function cannot be called when there are runnable user tasks because
459 * the userspace would see unexpected allocation failures as a result. Any
460 * new usage of this function should be consulted with MM people.
461 *
462 * Returns true if successful and false if the OOM killer cannot be
463 * disabled.
464 */
465bool oom_killer_disable(void)
466{
467 /*
468 * Make sure to not race with an ongoing OOM killer
469 * and that the current is not the victim.
470 */
471 down_write(&oom_sem);
472 if (test_thread_flag(TIF_MEMDIE)) {
473 up_write(&oom_sem);
474 return false;
475 }
476
477 oom_killer_disabled = true;
478 up_write(&oom_sem);
479
480 wait_event(oom_victims_wait, !atomic_read(&oom_victims));
481
482 return true;
483}
484
485/**
486 * oom_killer_enable - enable OOM killer
487 */
488void oom_killer_enable(void)
489{
490 down_write(&oom_sem);
491 oom_killer_disabled = false;
492 up_write(&oom_sem);
440} 493}
441 494
442#define K(x) ((x) << (PAGE_SHIFT-10)) 495#define K(x) ((x) << (PAGE_SHIFT-10))
@@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
637} 690}
638 691
639/** 692/**
640 * out_of_memory - kill the "best" process when we run out of memory 693 * __out_of_memory - kill the "best" process when we run out of memory
641 * @zonelist: zonelist pointer 694 * @zonelist: zonelist pointer
642 * @gfp_mask: memory allocation flags 695 * @gfp_mask: memory allocation flags
643 * @order: amount of memory being requested as a power of 2 696 * @order: amount of memory being requested as a power of 2
@@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
649 * OR try to be smart about which process to kill. Note that we 702 * OR try to be smart about which process to kill. Note that we
650 * don't have to be perfect here, we just have to be good. 703 * don't have to be perfect here, we just have to be good.
651 */ 704 */
652void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 705static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
653 int order, nodemask_t *nodemask, bool force_kill) 706 int order, nodemask_t *nodemask, bool force_kill)
654{ 707{
655 const nodemask_t *mpol_mask; 708 const nodemask_t *mpol_mask;
@@ -718,6 +771,32 @@ out:
718 schedule_timeout_killable(1); 771 schedule_timeout_killable(1);
719} 772}
720 773
774/**
775 * out_of_memory - tries to invoke OOM killer.
776 * @zonelist: zonelist pointer
777 * @gfp_mask: memory allocation flags
778 * @order: amount of memory being requested as a power of 2
779 * @nodemask: nodemask passed to page allocator
780 * @force_kill: true if a task must be killed, even if others are exiting
781 *
782 * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable()
783 * when it returns false. Otherwise returns true.
784 */
785bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
786 int order, nodemask_t *nodemask, bool force_kill)
787{
788 bool ret = false;
789
790 down_read(&oom_sem);
791 if (!oom_killer_disabled) {
792 __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill);
793 ret = true;
794 }
795 up_read(&oom_sem);
796
797 return ret;
798}
799
721/* 800/*
722 * The pagefault handler calls here because it is out of memory, so kill a 801 * The pagefault handler calls here because it is out of memory, so kill a
723 * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a 802 * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
@@ -727,12 +806,25 @@ void pagefault_out_of_memory(void)
727{ 806{
728 struct zonelist *zonelist; 807 struct zonelist *zonelist;
729 808
809 down_read(&oom_sem);
730 if (mem_cgroup_oom_synchronize(true)) 810 if (mem_cgroup_oom_synchronize(true))
731 return; 811 goto unlock;
732 812
733 zonelist = node_zonelist(first_memory_node, GFP_KERNEL); 813 zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
734 if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { 814 if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
735 out_of_memory(NULL, 0, 0, NULL, false); 815 if (!oom_killer_disabled)
816 __out_of_memory(NULL, 0, 0, NULL, false);
817 else
818 /*
819 * There shouldn't be any user tasks runable while the
820 * OOM killer is disabled so the current task has to
821 * be a racing OOM victim for which oom_killer_disable()
822 * is waiting for.
823 */
824 WARN_ON(test_thread_flag(TIF_MEMDIE));
825
736 oom_zonelist_unlock(zonelist, GFP_KERNEL); 826 oom_zonelist_unlock(zonelist, GFP_KERNEL);
737 } 827 }
828unlock:
829 up_read(&oom_sem);
738} 830}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 641d5a9a8617..134e25525044 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -244,8 +244,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
244 PB_migrate, PB_migrate_end); 244 PB_migrate, PB_migrate_end);
245} 245}
246 246
247bool oom_killer_disabled __read_mostly;
248
249#ifdef CONFIG_DEBUG_VM 247#ifdef CONFIG_DEBUG_VM
250static int page_outside_zone_boundaries(struct zone *zone, struct page *page) 248static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
251{ 249{
@@ -2317,9 +2315,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2317 2315
2318 *did_some_progress = 0; 2316 *did_some_progress = 0;
2319 2317
2320 if (oom_killer_disabled)
2321 return NULL;
2322
2323 /* 2318 /*
2324 * Acquire the per-zone oom lock for each zone. If that 2319 * Acquire the per-zone oom lock for each zone. If that
2325 * fails, somebody else is making progress for us. 2320 * fails, somebody else is making progress for us.
@@ -2331,14 +2326,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2331 } 2326 }
2332 2327
2333 /* 2328 /*
2334 * PM-freezer should be notified that there might be an OOM killer on
2335 * its way to kill and wake somebody up. This is too early and we might
2336 * end up not killing anything but false positives are acceptable.
2337 * See freeze_processes.
2338 */
2339 note_oom_kill();
2340
2341 /*
2342 * Go through the zonelist yet one more time, keep very high watermark 2329 * Go through the zonelist yet one more time, keep very high watermark
2343 * here, this is only to catch a parallel oom killing, we must fail if 2330 * here, this is only to catch a parallel oom killing, we must fail if
2344 * we're still under heavy pressure. 2331 * we're still under heavy pressure.
@@ -2372,8 +2359,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2372 goto out; 2359 goto out;
2373 } 2360 }
2374 /* Exhausted what can be done so it's blamo time */ 2361 /* Exhausted what can be done so it's blamo time */
2375 out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); 2362 if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
2376 *did_some_progress = 1; 2363 *did_some_progress = 1;
2377out: 2364out:
2378 oom_zonelist_unlock(ac->zonelist, gfp_mask); 2365 oom_zonelist_unlock(ac->zonelist, gfp_mask);
2379 return page; 2366 return page;