diff options
Diffstat (limited to 'mm/oom_kill.c')
| -rw-r--r-- | mm/oom_kill.c | 97 |
1 files changed, 79 insertions, 18 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b9af136e5cfa..bada3d03119f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | #include <linux/timex.h> | 21 | #include <linux/timex.h> |
| 22 | #include <linux/jiffies.h> | 22 | #include <linux/jiffies.h> |
| 23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
| 24 | #include <linux/module.h> | ||
| 25 | #include <linux/notifier.h> | ||
| 24 | 26 | ||
| 25 | int sysctl_panic_on_oom; | 27 | int sysctl_panic_on_oom; |
| 26 | /* #define DEBUG */ | 28 | /* #define DEBUG */ |
| @@ -58,6 +60,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 58 | } | 60 | } |
| 59 | 61 | ||
| 60 | /* | 62 | /* |
| 63 | * swapoff can easily use up all memory, so kill those first. | ||
| 64 | */ | ||
| 65 | if (p->flags & PF_SWAPOFF) | ||
| 66 | return ULONG_MAX; | ||
| 67 | |||
| 68 | /* | ||
| 61 | * The memory size of the process is the basis for the badness. | 69 | * The memory size of the process is the basis for the badness. |
| 62 | */ | 70 | */ |
| 63 | points = mm->total_vm; | 71 | points = mm->total_vm; |
| @@ -127,6 +135,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 127 | points /= 4; | 135 | points /= 4; |
| 128 | 136 | ||
| 129 | /* | 137 | /* |
| 138 | * If p's nodes don't overlap ours, it may still help to kill p | ||
| 139 | * because p may have allocated or otherwise mapped memory on | ||
| 140 | * this node before. However it will be less likely. | ||
| 141 | */ | ||
| 142 | if (!cpuset_excl_nodes_overlap(p)) | ||
| 143 | points /= 8; | ||
| 144 | |||
| 145 | /* | ||
| 130 | * Adjust the score by oomkilladj. | 146 | * Adjust the score by oomkilladj. |
| 131 | */ | 147 | */ |
| 132 | if (p->oomkilladj) { | 148 | if (p->oomkilladj) { |
| @@ -161,8 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | |||
| 161 | 177 | ||
| 162 | for (z = zonelist->zones; *z; z++) | 178 | for (z = zonelist->zones; *z; z++) |
| 163 | if (cpuset_zone_allowed(*z, gfp_mask)) | 179 | if (cpuset_zone_allowed(*z, gfp_mask)) |
| 164 | node_clear((*z)->zone_pgdat->node_id, | 180 | node_clear(zone_to_nid(*z), nodes); |
| 165 | nodes); | ||
| 166 | else | 181 | else |
| 167 | return CONSTRAINT_CPUSET; | 182 | return CONSTRAINT_CPUSET; |
| 168 | 183 | ||
| @@ -191,25 +206,38 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) | |||
| 191 | unsigned long points; | 206 | unsigned long points; |
| 192 | int releasing; | 207 | int releasing; |
| 193 | 208 | ||
| 209 | /* skip kernel threads */ | ||
| 210 | if (!p->mm) | ||
| 211 | continue; | ||
| 194 | /* skip the init task with pid == 1 */ | 212 | /* skip the init task with pid == 1 */ |
| 195 | if (p->pid == 1) | 213 | if (p->pid == 1) |
| 196 | continue; | 214 | continue; |
| 197 | if (p->oomkilladj == OOM_DISABLE) | ||
| 198 | continue; | ||
| 199 | /* If p's nodes don't overlap ours, it won't help to kill p. */ | ||
| 200 | if (!cpuset_excl_nodes_overlap(p)) | ||
| 201 | continue; | ||
| 202 | 215 | ||
| 203 | /* | 216 | /* |
| 204 | * This is in the process of releasing memory so wait for it | 217 | * This is in the process of releasing memory so wait for it |
| 205 | * to finish before killing some other task by mistake. | 218 | * to finish before killing some other task by mistake. |
| 219 | * | ||
| 220 | * However, if p is the current task, we allow the 'kill' to | ||
| 221 | * go ahead if it is exiting: this will simply set TIF_MEMDIE, | ||
| 222 | * which will allow it to gain access to memory reserves in | ||
| 223 | * the process of exiting and releasing its resources. | ||
| 224 | * Otherwise we could get an OOM deadlock. | ||
| 206 | */ | 225 | */ |
| 207 | releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || | 226 | releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || |
| 208 | p->flags & PF_EXITING; | 227 | p->flags & PF_EXITING; |
| 209 | if (releasing && !(p->flags & PF_DEAD)) | 228 | if (releasing) { |
| 229 | /* PF_DEAD tasks have already released their mm */ | ||
| 230 | if (p->flags & PF_DEAD) | ||
| 231 | continue; | ||
| 232 | if (p->flags & PF_EXITING && p == current) { | ||
| 233 | chosen = p; | ||
| 234 | *ppoints = ULONG_MAX; | ||
| 235 | break; | ||
| 236 | } | ||
| 210 | return ERR_PTR(-1UL); | 237 | return ERR_PTR(-1UL); |
| 211 | if (p->flags & PF_SWAPOFF) | 238 | } |
| 212 | return p; | 239 | if (p->oomkilladj == OOM_DISABLE) |
| 240 | continue; | ||
| 213 | 241 | ||
| 214 | points = badness(p, uptime.tv_sec); | 242 | points = badness(p, uptime.tv_sec); |
| 215 | if (points > *ppoints || !chosen) { | 243 | if (points > *ppoints || !chosen) { |
| @@ -221,9 +249,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) | |||
| 221 | } | 249 | } |
| 222 | 250 | ||
| 223 | /** | 251 | /** |
| 224 | * We must be careful though to never send SIGKILL a process with | 252 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO |
| 225 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that | 253 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO |
| 226 | * we select a process with CAP_SYS_RAW_IO set). | 254 | * set. |
| 227 | */ | 255 | */ |
| 228 | static void __oom_kill_task(struct task_struct *p, const char *message) | 256 | static void __oom_kill_task(struct task_struct *p, const char *message) |
| 229 | { | 257 | { |
| @@ -241,8 +269,11 @@ static void __oom_kill_task(struct task_struct *p, const char *message) | |||
| 241 | return; | 269 | return; |
| 242 | } | 270 | } |
| 243 | task_unlock(p); | 271 | task_unlock(p); |
| 244 | printk(KERN_ERR "%s: Killed process %d (%s).\n", | 272 | |
| 273 | if (message) { | ||
| 274 | printk(KERN_ERR "%s: Killed process %d (%s).\n", | ||
| 245 | message, p->pid, p->comm); | 275 | message, p->pid, p->comm); |
| 276 | } | ||
| 246 | 277 | ||
| 247 | /* | 278 | /* |
| 248 | * We give our sacrificial lamb high priority and access to | 279 | * We give our sacrificial lamb high priority and access to |
| @@ -293,8 +324,17 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, | |||
| 293 | struct task_struct *c; | 324 | struct task_struct *c; |
| 294 | struct list_head *tsk; | 325 | struct list_head *tsk; |
| 295 | 326 | ||
| 296 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " | 327 | /* |
| 297 | "children.\n", p->pid, p->comm, points); | 328 | * If the task is already exiting, don't alarm the sysadmin or kill |
| 329 | * its children or threads, just set TIF_MEMDIE so it can die quickly | ||
| 330 | */ | ||
| 331 | if (p->flags & PF_EXITING) { | ||
| 332 | __oom_kill_task(p, NULL); | ||
| 333 | return 0; | ||
| 334 | } | ||
| 335 | |||
| 336 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li" | ||
| 337 | " and children.\n", p->pid, p->comm, points); | ||
| 298 | /* Try to kill a child first */ | 338 | /* Try to kill a child first */ |
| 299 | list_for_each(tsk, &p->children) { | 339 | list_for_each(tsk, &p->children) { |
| 300 | c = list_entry(tsk, struct task_struct, sibling); | 340 | c = list_entry(tsk, struct task_struct, sibling); |
| @@ -306,6 +346,20 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, | |||
| 306 | return oom_kill_task(p, message); | 346 | return oom_kill_task(p, message); |
| 307 | } | 347 | } |
| 308 | 348 | ||
| 349 | static BLOCKING_NOTIFIER_HEAD(oom_notify_list); | ||
| 350 | |||
| 351 | int register_oom_notifier(struct notifier_block *nb) | ||
| 352 | { | ||
| 353 | return blocking_notifier_chain_register(&oom_notify_list, nb); | ||
| 354 | } | ||
| 355 | EXPORT_SYMBOL_GPL(register_oom_notifier); | ||
| 356 | |||
| 357 | int unregister_oom_notifier(struct notifier_block *nb) | ||
| 358 | { | ||
| 359 | return blocking_notifier_chain_unregister(&oom_notify_list, nb); | ||
| 360 | } | ||
| 361 | EXPORT_SYMBOL_GPL(unregister_oom_notifier); | ||
| 362 | |||
| 309 | /** | 363 | /** |
| 310 | * out_of_memory - kill the "best" process when we run out of memory | 364 | * out_of_memory - kill the "best" process when we run out of memory |
| 311 | * | 365 | * |
| @@ -318,10 +372,17 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
| 318 | { | 372 | { |
| 319 | struct task_struct *p; | 373 | struct task_struct *p; |
| 320 | unsigned long points = 0; | 374 | unsigned long points = 0; |
| 375 | unsigned long freed = 0; | ||
| 376 | |||
| 377 | blocking_notifier_call_chain(&oom_notify_list, 0, &freed); | ||
| 378 | if (freed > 0) | ||
| 379 | /* Got some memory back in the last second. */ | ||
| 380 | return; | ||
| 321 | 381 | ||
| 322 | if (printk_ratelimit()) { | 382 | if (printk_ratelimit()) { |
| 323 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 383 | printk(KERN_WARNING "%s invoked oom-killer: " |
| 324 | gfp_mask, order); | 384 | "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", |
| 385 | current->comm, gfp_mask, order, current->oomkilladj); | ||
| 325 | dump_stack(); | 386 | dump_stack(); |
| 326 | show_mem(); | 387 | show_mem(); |
| 327 | } | 388 | } |
