aboutsummaryrefslogtreecommitdiffstats
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c97
1 files changed, 79 insertions, 18 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b9af136e5cfa..bada3d03119f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -21,6 +21,8 @@
21#include <linux/timex.h> 21#include <linux/timex.h>
22#include <linux/jiffies.h> 22#include <linux/jiffies.h>
23#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/module.h>
25#include <linux/notifier.h>
24 26
25int sysctl_panic_on_oom; 27int sysctl_panic_on_oom;
26/* #define DEBUG */ 28/* #define DEBUG */
@@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 } 60 }
59 61
60 /* 62 /*
63 * swapoff can easily use up all memory, so kill those first.
64 */
65 if (p->flags & PF_SWAPOFF)
66 return ULONG_MAX;
67
68 /*
61 * The memory size of the process is the basis for the badness. 69 * The memory size of the process is the basis for the badness.
62 */ 70 */
63 points = mm->total_vm; 71 points = mm->total_vm;
@@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
127 points /= 4; 135 points /= 4;
128 136
129 /* 137 /*
138 * If p's nodes don't overlap ours, it may still help to kill p
139 * because p may have allocated or otherwise mapped memory on
140 * this node before. However it will be less likely.
141 */
142 if (!cpuset_excl_nodes_overlap(p))
143 points /= 8;
144
145 /*
130 * Adjust the score by oomkilladj. 146 * Adjust the score by oomkilladj.
131 */ 147 */
132 if (p->oomkilladj) { 148 if (p->oomkilladj) {
@@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
161 177
162 for (z = zonelist->zones; *z; z++) 178 for (z = zonelist->zones; *z; z++)
163 if (cpuset_zone_allowed(*z, gfp_mask)) 179 if (cpuset_zone_allowed(*z, gfp_mask))
164 node_clear((*z)->zone_pgdat->node_id, 180 node_clear(zone_to_nid(*z), nodes);
165 nodes);
166 else 181 else
167 return CONSTRAINT_CPUSET; 182 return CONSTRAINT_CPUSET;
168 183
@@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
191 unsigned long points; 206 unsigned long points;
192 int releasing; 207 int releasing;
193 208
209 /* skip kernel threads */
210 if (!p->mm)
211 continue;
194 /* skip the init task with pid == 1 */ 212 /* skip the init task with pid == 1 */
195 if (p->pid == 1) 213 if (p->pid == 1)
196 continue; 214 continue;
197 if (p->oomkilladj == OOM_DISABLE)
198 continue;
199 /* If p's nodes don't overlap ours, it won't help to kill p. */
200 if (!cpuset_excl_nodes_overlap(p))
201 continue;
202 215
203 /* 216 /*
204 * This is in the process of releasing memory so wait for it 217 * This is in the process of releasing memory so wait for it
205 * to finish before killing some other task by mistake. 218 * to finish before killing some other task by mistake.
219 *
220 * However, if p is the current task, we allow the 'kill' to
221 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
222 * which will allow it to gain access to memory reserves in
223 * the process of exiting and releasing its resources.
224 * Otherwise we could get an OOM deadlock.
206 */ 225 */
207 releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || 226 releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
208 p->flags & PF_EXITING; 227 p->flags & PF_EXITING;
209 if (releasing && !(p->flags & PF_DEAD)) 228 if (releasing) {
229 /* PF_DEAD tasks have already released their mm */
230 if (p->flags & PF_DEAD)
231 continue;
232 if (p->flags & PF_EXITING && p == current) {
233 chosen = p;
234 *ppoints = ULONG_MAX;
235 break;
236 }
210 return ERR_PTR(-1UL); 237 return ERR_PTR(-1UL);
211 if (p->flags & PF_SWAPOFF) 238 }
212 return p; 239 if (p->oomkilladj == OOM_DISABLE)
240 continue;
213 241
214 points = badness(p, uptime.tv_sec); 242 points = badness(p, uptime.tv_sec);
215 if (points > *ppoints || !chosen) { 243 if (points > *ppoints || !chosen) {
@@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
221} 249}
222 250
223/** 251/**
224 * We must be careful though to never send SIGKILL a process with 252 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO
225 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 253 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO
226 * we select a process with CAP_SYS_RAW_IO set). 254 * set.
227 */ 255 */
228static void __oom_kill_task(struct task_struct *p, const char *message) 256static void __oom_kill_task(struct task_struct *p, const char *message)
229{ 257{
@@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message)
241 return; 269 return;
242 } 270 }
243 task_unlock(p); 271 task_unlock(p);
244 printk(KERN_ERR "%s: Killed process %d (%s).\n", 272
273 if (message) {
274 printk(KERN_ERR "%s: Killed process %d (%s).\n",
245 message, p->pid, p->comm); 275 message, p->pid, p->comm);
276 }
246 277
247 /* 278 /*
248 * We give our sacrificial lamb high priority and access to 279 * We give our sacrificial lamb high priority and access to
@@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
293 struct task_struct *c; 324 struct task_struct *c;
294 struct list_head *tsk; 325 struct list_head *tsk;
295 326
296 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " 327 /*
297 "children.\n", p->pid, p->comm, points); 328 * If the task is already exiting, don't alarm the sysadmin or kill
329 * its children or threads, just set TIF_MEMDIE so it can die quickly
330 */
331 if (p->flags & PF_EXITING) {
332 __oom_kill_task(p, NULL);
333 return 0;
334 }
335
336 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li"
337 " and children.\n", p->pid, p->comm, points);
298 /* Try to kill a child first */ 338 /* Try to kill a child first */
299 list_for_each(tsk, &p->children) { 339 list_for_each(tsk, &p->children) {
300 c = list_entry(tsk, struct task_struct, sibling); 340 c = list_entry(tsk, struct task_struct, sibling);
@@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
306 return oom_kill_task(p, message); 346 return oom_kill_task(p, message);
307} 347}
308 348
349static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
350
351int register_oom_notifier(struct notifier_block *nb)
352{
353 return blocking_notifier_chain_register(&oom_notify_list, nb);
354}
355EXPORT_SYMBOL_GPL(register_oom_notifier);
356
357int unregister_oom_notifier(struct notifier_block *nb)
358{
359 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
360}
361EXPORT_SYMBOL_GPL(unregister_oom_notifier);
362
309/** 363/**
310 * out_of_memory - kill the "best" process when we run out of memory 364 * out_of_memory - kill the "best" process when we run out of memory
311 * 365 *
@@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
318{ 372{
319 struct task_struct *p; 373 struct task_struct *p;
320 unsigned long points = 0; 374 unsigned long points = 0;
375 unsigned long freed = 0;
376
377 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
378 if (freed > 0)
379 /* Got some memory back in the last second. */
380 return;
321 381
322 if (printk_ratelimit()) { 382 if (printk_ratelimit()) {
323 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 383 printk(KERN_WARNING "%s invoked oom-killer: "
324 gfp_mask, order); 384 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
385 current->comm, gfp_mask, order, current->oomkilladj);
325 dump_stack(); 386 dump_stack();
326 show_mem(); 387 show_mem();
327 } 388 }