aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2014-10-20 12:12:32 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2014-10-21 17:44:21 -0400
commit5695be142e203167e3cb515ef86a88424f3524eb (patch)
tree2c2b21d91658f23b3290db56f726959863ee8248
parentc05eb32f472fb9f7f474c20ff6fa5bfe0cbedc05 (diff)
OOM, PM: OOM killed task shouldn't escape PM suspend
PM freezer relies on having all tasks frozen by the time devices are getting frozen so that no task will touch them while they are getting frozen. But OOM killer is allowed to kill an already frozen task in order to handle OOM situtation. In order to protect from late wake ups OOM killer is disabled after all tasks are frozen. This, however, still keeps a window open when a killed task didn't manage to die by the time freeze_processes finishes. Reduce the race window by checking all tasks after OOM killer has been disabled. This is still not race free completely unfortunately because oom_killer_disable cannot stop an already ongoing OOM killer so a task might still wake up from the fridge and get killed without freeze_processes noticing. Full synchronization of OOM and freezer is, however, too heavy weight for this highly unlikely case. Introduce and check oom_kills counter which gets incremented early when the allocator enters __alloc_pages_may_oom path and only check all the tasks if the counter changes during the freezing attempt. The counter is updated so early to reduce the race window since allocator checked oom_killer_disabled which is set by PM-freezing code. A false positive will push the PM-freezer into a slow path but that is not a big deal. Changes since v1 - push the re-check loop out of freeze_processes into check_frozen_processes and invert the condition to make the code more readable as per Rafael Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring) Cc: 3.2+ <stable@vger.kernel.org> # 3.2+ Signed-off-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--include/linux/oom.h3
-rw-r--r--kernel/power/process.c40
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page_alloc.c8
4 files changed, 67 insertions, 1 deletions
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 647395a1a550..e8d6e1058723 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
50extern unsigned long oom_badness(struct task_struct *p, 50extern unsigned long oom_badness(struct task_struct *p,
51 struct mem_cgroup *memcg, const nodemask_t *nodemask, 51 struct mem_cgroup *memcg, const nodemask_t *nodemask,
52 unsigned long totalpages); 52 unsigned long totalpages);
53
54extern int oom_kills_count(void);
55extern void note_oom_kill(void);
53extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, 56extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
54 unsigned int points, unsigned long totalpages, 57 unsigned int points, unsigned long totalpages,
55 struct mem_cgroup *memcg, nodemask_t *nodemask, 58 struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7b323221b9ee..5cc588c1abab 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -108,6 +108,28 @@ static int try_to_freeze_tasks(bool user_only)
108 return todo ? -EBUSY : 0; 108 return todo ? -EBUSY : 0;
109} 109}
110 110
111/*
112 * Returns true if all freezable tasks (except for current) are frozen already
113 */
114static bool check_frozen_processes(void)
115{
116 struct task_struct *g, *p;
117 bool ret = true;
118
119 read_lock(&tasklist_lock);
120 for_each_process_thread(g, p) {
121 if (p != current && !freezer_should_skip(p) &&
122 !frozen(p)) {
123 ret = false;
124 goto done;
125 }
126 }
127done:
128 read_unlock(&tasklist_lock);
129
130 return ret;
131}
132
111/** 133/**
112 * freeze_processes - Signal user space processes to enter the refrigerator. 134 * freeze_processes - Signal user space processes to enter the refrigerator.
113 * The current thread will not be frozen. The same process that calls 135 * The current thread will not be frozen. The same process that calls
@@ -118,6 +140,7 @@ static int try_to_freeze_tasks(bool user_only)
118int freeze_processes(void) 140int freeze_processes(void)
119{ 141{
120 int error; 142 int error;
143 int oom_kills_saved;
121 144
122 error = __usermodehelper_disable(UMH_FREEZING); 145 error = __usermodehelper_disable(UMH_FREEZING);
123 if (error) 146 if (error)
@@ -132,12 +155,27 @@ int freeze_processes(void)
132 pm_wakeup_clear(); 155 pm_wakeup_clear();
133 printk("Freezing user space processes ... "); 156 printk("Freezing user space processes ... ");
134 pm_freezing = true; 157 pm_freezing = true;
158 oom_kills_saved = oom_kills_count();
135 error = try_to_freeze_tasks(true); 159 error = try_to_freeze_tasks(true);
136 if (!error) { 160 if (!error) {
137 printk("done.");
138 __usermodehelper_set_disable_depth(UMH_DISABLED); 161 __usermodehelper_set_disable_depth(UMH_DISABLED);
139 oom_killer_disable(); 162 oom_killer_disable();
163
164 /*
165 * There might have been an OOM kill while we were
166 * freezing tasks and the killed task might be still
167 * on the way out so we have to double check for race.
168 */
169 if (oom_kills_count() != oom_kills_saved &&
170 !check_frozen_processes()) {
171 __usermodehelper_set_disable_depth(UMH_ENABLED);
172 printk("OOM in progress.");
173 error = -EBUSY;
174 goto done;
175 }
176 printk("done.");
140 } 177 }
178done:
141 printk("\n"); 179 printk("\n");
142 BUG_ON(in_atomic()); 180 BUG_ON(in_atomic());
143 181
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a3a18f..5340f6b91312 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
404 dump_tasks(memcg, nodemask); 404 dump_tasks(memcg, nodemask);
405} 405}
406 406
407/*
408 * Number of OOM killer invocations (including memcg OOM killer).
409 * Primarily used by PM freezer to check for potential races with
410 * OOM killed frozen task.
411 */
412static atomic_t oom_kills = ATOMIC_INIT(0);
413
414int oom_kills_count(void)
415{
416 return atomic_read(&oom_kills);
417}
418
419void note_oom_kill(void)
420{
421 atomic_inc(&oom_kills);
422}
423
407#define K(x) ((x) << (PAGE_SHIFT-10)) 424#define K(x) ((x) << (PAGE_SHIFT-10))
408/* 425/*
409 * Must be called while holding a reference to p, which will be released upon 426 * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1b6381..9cd36b822444 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2252,6 +2252,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * PM-freezer should be notified that there might be an OOM killer on
2256 * its way to kill and wake somebody up. This is too early and we might
2257 * end up not killing anything but false positives are acceptable.
2258 * See freeze_processes.
2259 */
2260 note_oom_kill();
2261
2262 /*
2255 * Go through the zonelist yet one more time, keep very high watermark 2263 * Go through the zonelist yet one more time, keep very high watermark
2256 * here, this is only to catch a parallel oom killing, we must fail if 2264 * here, this is only to catch a parallel oom killing, we must fail if
2257 * we're still under heavy pressure. 2265 * we're still under heavy pressure.