diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 97 |
1 files changed, 79 insertions, 18 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b9af136e5cfa..bada3d03119f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/timex.h> | 21 | #include <linux/timex.h> |
22 | #include <linux/jiffies.h> | 22 | #include <linux/jiffies.h> |
23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
24 | #include <linux/module.h> | ||
25 | #include <linux/notifier.h> | ||
24 | 26 | ||
25 | int sysctl_panic_on_oom; | 27 | int sysctl_panic_on_oom; |
26 | /* #define DEBUG */ | 28 | /* #define DEBUG */ |
@@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
58 | } | 60 | } |
59 | 61 | ||
60 | /* | 62 | /* |
63 | * swapoff can easily use up all memory, so kill those first. | ||
64 | */ | ||
65 | if (p->flags & PF_SWAPOFF) | ||
66 | return ULONG_MAX; | ||
67 | |||
68 | /* | ||
61 | * The memory size of the process is the basis for the badness. | 69 | * The memory size of the process is the basis for the badness. |
62 | */ | 70 | */ |
63 | points = mm->total_vm; | 71 | points = mm->total_vm; |
@@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
127 | points /= 4; | 135 | points /= 4; |
128 | 136 | ||
129 | /* | 137 | /* |
138 | * If p's nodes don't overlap ours, it may still help to kill p | ||
139 | * because p may have allocated or otherwise mapped memory on | ||
140 | * this node before. However it will be less likely. | ||
141 | */ | ||
142 | if (!cpuset_excl_nodes_overlap(p)) | ||
143 | points /= 8; | ||
144 | |||
145 | /* | ||
130 | * Adjust the score by oomkilladj. | 146 | * Adjust the score by oomkilladj. |
131 | */ | 147 | */ |
132 | if (p->oomkilladj) { | 148 | if (p->oomkilladj) { |
@@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | |||
161 | 177 | ||
162 | for (z = zonelist->zones; *z; z++) | 178 | for (z = zonelist->zones; *z; z++) |
163 | if (cpuset_zone_allowed(*z, gfp_mask)) | 179 | if (cpuset_zone_allowed(*z, gfp_mask)) |
164 | node_clear((*z)->zone_pgdat->node_id, | 180 | node_clear(zone_to_nid(*z), nodes); |
165 | nodes); | ||
166 | else | 181 | else |
167 | return CONSTRAINT_CPUSET; | 182 | return CONSTRAINT_CPUSET; |
168 | 183 | ||
@@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) | |||
191 | unsigned long points; | 206 | unsigned long points; |
192 | int releasing; | 207 | int releasing; |
193 | 208 | ||
209 | /* skip kernel threads */ | ||
210 | if (!p->mm) | ||
211 | continue; | ||
194 | /* skip the init task with pid == 1 */ | 212 | /* skip the init task with pid == 1 */ |
195 | if (p->pid == 1) | 213 | if (p->pid == 1) |
196 | continue; | 214 | continue; |
197 | if (p->oomkilladj == OOM_DISABLE) | ||
198 | continue; | ||
199 | /* If p's nodes don't overlap ours, it won't help to kill p. */ | ||
200 | if (!cpuset_excl_nodes_overlap(p)) | ||
201 | continue; | ||
202 | 215 | ||
203 | /* | 216 | /* |
204 | * This is in the process of releasing memory so wait for it | 217 | * This is in the process of releasing memory so wait for it |
205 | * to finish before killing some other task by mistake. | 218 | * to finish before killing some other task by mistake. |
219 | * | ||
220 | * However, if p is the current task, we allow the 'kill' to | ||
221 | * go ahead if it is exiting: this will simply set TIF_MEMDIE, | ||
222 | * which will allow it to gain access to memory reserves in | ||
223 | * the process of exiting and releasing its resources. | ||
224 | * Otherwise we could get an OOM deadlock. | ||
206 | */ | 225 | */ |
207 | releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || | 226 | releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || |
208 | p->flags & PF_EXITING; | 227 | p->flags & PF_EXITING; |
209 | if (releasing && !(p->flags & PF_DEAD)) | 228 | if (releasing) { |
229 | /* PF_DEAD tasks have already released their mm */ | ||
230 | if (p->flags & PF_DEAD) | ||
231 | continue; | ||
232 | if (p->flags & PF_EXITING && p == current) { | ||
233 | chosen = p; | ||
234 | *ppoints = ULONG_MAX; | ||
235 | break; | ||
236 | } | ||
210 | return ERR_PTR(-1UL); | 237 | return ERR_PTR(-1UL); |
211 | if (p->flags & PF_SWAPOFF) | 238 | } |
212 | return p; | 239 | if (p->oomkilladj == OOM_DISABLE) |
240 | continue; | ||
213 | 241 | ||
214 | points = badness(p, uptime.tv_sec); | 242 | points = badness(p, uptime.tv_sec); |
215 | if (points > *ppoints || !chosen) { | 243 | if (points > *ppoints || !chosen) { |
@@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) | |||
221 | } | 249 | } |
222 | 250 | ||
223 | /** | 251 | /** |
224 | * We must be careful though to never send SIGKILL a process with | 252 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO |
225 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that | 253 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO |
226 | * we select a process with CAP_SYS_RAW_IO set). | 254 | * set. |
227 | */ | 255 | */ |
228 | static void __oom_kill_task(struct task_struct *p, const char *message) | 256 | static void __oom_kill_task(struct task_struct *p, const char *message) |
229 | { | 257 | { |
@@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message) | |||
241 | return; | 269 | return; |
242 | } | 270 | } |
243 | task_unlock(p); | 271 | task_unlock(p); |
244 | printk(KERN_ERR "%s: Killed process %d (%s).\n", | 272 | |
273 | if (message) { | ||
274 | printk(KERN_ERR "%s: Killed process %d (%s).\n", | ||
245 | message, p->pid, p->comm); | 275 | message, p->pid, p->comm); |
276 | } | ||
246 | 277 | ||
247 | /* | 278 | /* |
248 | * We give our sacrificial lamb high priority and access to | 279 | * We give our sacrificial lamb high priority and access to |
@@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, | |||
293 | struct task_struct *c; | 324 | struct task_struct *c; |
294 | struct list_head *tsk; | 325 | struct list_head *tsk; |
295 | 326 | ||
296 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " | 327 | /* |
297 | "children.\n", p->pid, p->comm, points); | 328 | * If the task is already exiting, don't alarm the sysadmin or kill |
329 | * its children or threads, just set TIF_MEMDIE so it can die quickly | ||
330 | */ | ||
331 | if (p->flags & PF_EXITING) { | ||
332 | __oom_kill_task(p, NULL); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li" | ||
337 | " and children.\n", p->pid, p->comm, points); | ||
298 | /* Try to kill a child first */ | 338 | /* Try to kill a child first */ |
299 | list_for_each(tsk, &p->children) { | 339 | list_for_each(tsk, &p->children) { |
300 | c = list_entry(tsk, struct task_struct, sibling); | 340 | c = list_entry(tsk, struct task_struct, sibling); |
@@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, | |||
306 | return oom_kill_task(p, message); | 346 | return oom_kill_task(p, message); |
307 | } | 347 | } |
308 | 348 | ||
349 | static BLOCKING_NOTIFIER_HEAD(oom_notify_list); | ||
350 | |||
351 | int register_oom_notifier(struct notifier_block *nb) | ||
352 | { | ||
353 | return blocking_notifier_chain_register(&oom_notify_list, nb); | ||
354 | } | ||
355 | EXPORT_SYMBOL_GPL(register_oom_notifier); | ||
356 | |||
357 | int unregister_oom_notifier(struct notifier_block *nb) | ||
358 | { | ||
359 | return blocking_notifier_chain_unregister(&oom_notify_list, nb); | ||
360 | } | ||
361 | EXPORT_SYMBOL_GPL(unregister_oom_notifier); | ||
362 | |||
309 | /** | 363 | /** |
310 | * out_of_memory - kill the "best" process when we run out of memory | 364 | * out_of_memory - kill the "best" process when we run out of memory |
311 | * | 365 | * |
@@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
318 | { | 372 | { |
319 | struct task_struct *p; | 373 | struct task_struct *p; |
320 | unsigned long points = 0; | 374 | unsigned long points = 0; |
375 | unsigned long freed = 0; | ||
376 | |||
377 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); | ||
378 | if (freed > 0) | ||
379 | /* Got some memory back in the last second. */ | ||
380 | return; | ||
321 | 381 | ||
322 | if (printk_ratelimit()) { | 382 | if (printk_ratelimit()) { |
323 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 383 | printk(KERN_WARNING "%s invoked oom-killer: " |
324 | gfp_mask, order); | 384 | "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", |
385 | current->comm, gfp_mask, order, current->oomkilladj); | ||
325 | dump_stack(); | 386 | dump_stack(); |
326 | show_mem(); | 387 | show_mem(); |
327 | } | 388 | } |