diff options
Diffstat (limited to 'mm/oom_kill.c')
| -rw-r--r-- | mm/oom_kill.c | 123 |
1 files changed, 91 insertions, 32 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b05ab8f2a562..8123fad5a485 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -58,15 +58,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 58 | 58 | ||
| 59 | /* | 59 | /* |
| 60 | * Processes which fork a lot of child processes are likely | 60 | * Processes which fork a lot of child processes are likely |
| 61 | * a good choice. We add the vmsize of the children if they | 61 | * a good choice. We add half the vmsize of the children if they |
| 62 | * have an own mm. This prevents forking servers to flood the | 62 | * have an own mm. This prevents forking servers to flood the |
| 63 | * machine with an endless amount of children | 63 | * machine with an endless amount of children. In case a single |
| 64 | * child is eating the vast majority of memory, adding only half | ||
| 65 | * to the parents will make the child our kill candidate of choice. | ||
| 64 | */ | 66 | */ |
| 65 | list_for_each(tsk, &p->children) { | 67 | list_for_each(tsk, &p->children) { |
| 66 | struct task_struct *chld; | 68 | struct task_struct *chld; |
| 67 | chld = list_entry(tsk, struct task_struct, sibling); | 69 | chld = list_entry(tsk, struct task_struct, sibling); |
| 68 | if (chld->mm != p->mm && chld->mm) | 70 | if (chld->mm != p->mm && chld->mm) |
| 69 | points += chld->mm->total_vm; | 71 | points += chld->mm->total_vm/2 + 1; |
| 70 | } | 72 | } |
| 71 | 73 | ||
| 72 | /* | 74 | /* |
| @@ -131,17 +133,47 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
| 131 | } | 133 | } |
| 132 | 134 | ||
| 133 | /* | 135 | /* |
| 136 | * Types of limitations to the nodes from which allocations may occur | ||
| 137 | */ | ||
| 138 | #define CONSTRAINT_NONE 1 | ||
| 139 | #define CONSTRAINT_MEMORY_POLICY 2 | ||
| 140 | #define CONSTRAINT_CPUSET 3 | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Determine the type of allocation constraint. | ||
| 144 | */ | ||
| 145 | static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | ||
| 146 | { | ||
| 147 | #ifdef CONFIG_NUMA | ||
| 148 | struct zone **z; | ||
| 149 | nodemask_t nodes = node_online_map; | ||
| 150 | |||
| 151 | for (z = zonelist->zones; *z; z++) | ||
| 152 | if (cpuset_zone_allowed(*z, gfp_mask)) | ||
| 153 | node_clear((*z)->zone_pgdat->node_id, | ||
| 154 | nodes); | ||
| 155 | else | ||
| 156 | return CONSTRAINT_CPUSET; | ||
| 157 | |||
| 158 | if (!nodes_empty(nodes)) | ||
| 159 | return CONSTRAINT_MEMORY_POLICY; | ||
| 160 | #endif | ||
| 161 | |||
| 162 | return CONSTRAINT_NONE; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 134 | * Simple selection loop. We chose the process with the highest | 166 | * Simple selection loop. We chose the process with the highest |
| 135 | * number of 'points'. We expect the caller will lock the tasklist. | 167 | * number of 'points'. We expect the caller will lock the tasklist. |
| 136 | * | 168 | * |
| 137 | * (not docbooked, we don't want this one cluttering up the manual) | 169 | * (not docbooked, we don't want this one cluttering up the manual) |
| 138 | */ | 170 | */ |
| 139 | static struct task_struct * select_bad_process(void) | 171 | static struct task_struct *select_bad_process(unsigned long *ppoints) |
| 140 | { | 172 | { |
| 141 | unsigned long maxpoints = 0; | ||
| 142 | struct task_struct *g, *p; | 173 | struct task_struct *g, *p; |
| 143 | struct task_struct *chosen = NULL; | 174 | struct task_struct *chosen = NULL; |
| 144 | struct timespec uptime; | 175 | struct timespec uptime; |
| 176 | *ppoints = 0; | ||
| 145 | 177 | ||
| 146 | do_posix_clock_monotonic_gettime(&uptime); | 178 | do_posix_clock_monotonic_gettime(&uptime); |
| 147 | do_each_thread(g, p) { | 179 | do_each_thread(g, p) { |
| @@ -169,9 +201,9 @@ static struct task_struct * select_bad_process(void) | |||
| 169 | return p; | 201 | return p; |
| 170 | 202 | ||
| 171 | points = badness(p, uptime.tv_sec); | 203 | points = badness(p, uptime.tv_sec); |
| 172 | if (points > maxpoints || !chosen) { | 204 | if (points > *ppoints || !chosen) { |
| 173 | chosen = p; | 205 | chosen = p; |
| 174 | maxpoints = points; | 206 | *ppoints = points; |
| 175 | } | 207 | } |
| 176 | } while_each_thread(g, p); | 208 | } while_each_thread(g, p); |
| 177 | return chosen; | 209 | return chosen; |
| @@ -182,7 +214,7 @@ static struct task_struct * select_bad_process(void) | |||
| 182 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that | 214 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that |
| 183 | * we select a process with CAP_SYS_RAW_IO set). | 215 | * we select a process with CAP_SYS_RAW_IO set). |
| 184 | */ | 216 | */ |
| 185 | static void __oom_kill_task(task_t *p) | 217 | static void __oom_kill_task(task_t *p, const char *message) |
| 186 | { | 218 | { |
| 187 | if (p->pid == 1) { | 219 | if (p->pid == 1) { |
| 188 | WARN_ON(1); | 220 | WARN_ON(1); |
| @@ -198,8 +230,8 @@ static void __oom_kill_task(task_t *p) | |||
| 198 | return; | 230 | return; |
| 199 | } | 231 | } |
| 200 | task_unlock(p); | 232 | task_unlock(p); |
| 201 | printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", | 233 | printk(KERN_ERR "%s: Killed process %d (%s).\n", |
| 202 | p->pid, p->comm); | 234 | message, p->pid, p->comm); |
| 203 | 235 | ||
| 204 | /* | 236 | /* |
| 205 | * We give our sacrificial lamb high priority and access to | 237 | * We give our sacrificial lamb high priority and access to |
| @@ -212,7 +244,7 @@ static void __oom_kill_task(task_t *p) | |||
| 212 | force_sig(SIGKILL, p); | 244 | force_sig(SIGKILL, p); |
| 213 | } | 245 | } |
| 214 | 246 | ||
| 215 | static struct mm_struct *oom_kill_task(task_t *p) | 247 | static struct mm_struct *oom_kill_task(task_t *p, const char *message) |
| 216 | { | 248 | { |
| 217 | struct mm_struct *mm = get_task_mm(p); | 249 | struct mm_struct *mm = get_task_mm(p); |
| 218 | task_t * g, * q; | 250 | task_t * g, * q; |
| @@ -224,35 +256,38 @@ static struct mm_struct *oom_kill_task(task_t *p) | |||
| 224 | return NULL; | 256 | return NULL; |
| 225 | } | 257 | } |
| 226 | 258 | ||
| 227 | __oom_kill_task(p); | 259 | __oom_kill_task(p, message); |
| 228 | /* | 260 | /* |
| 229 | * kill all processes that share the ->mm (i.e. all threads), | 261 | * kill all processes that share the ->mm (i.e. all threads), |
| 230 | * but are in a different thread group | 262 | * but are in a different thread group |
| 231 | */ | 263 | */ |
| 232 | do_each_thread(g, q) | 264 | do_each_thread(g, q) |
| 233 | if (q->mm == mm && q->tgid != p->tgid) | 265 | if (q->mm == mm && q->tgid != p->tgid) |
| 234 | __oom_kill_task(q); | 266 | __oom_kill_task(q, message); |
| 235 | while_each_thread(g, q); | 267 | while_each_thread(g, q); |
| 236 | 268 | ||
| 237 | return mm; | 269 | return mm; |
| 238 | } | 270 | } |
| 239 | 271 | ||
| 240 | static struct mm_struct *oom_kill_process(struct task_struct *p) | 272 | static struct mm_struct *oom_kill_process(struct task_struct *p, |
| 273 | unsigned long points, const char *message) | ||
| 241 | { | 274 | { |
| 242 | struct mm_struct *mm; | 275 | struct mm_struct *mm; |
| 243 | struct task_struct *c; | 276 | struct task_struct *c; |
| 244 | struct list_head *tsk; | 277 | struct list_head *tsk; |
| 245 | 278 | ||
| 279 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " | ||
| 280 | "children.\n", p->pid, p->comm, points); | ||
| 246 | /* Try to kill a child first */ | 281 | /* Try to kill a child first */ |
| 247 | list_for_each(tsk, &p->children) { | 282 | list_for_each(tsk, &p->children) { |
| 248 | c = list_entry(tsk, struct task_struct, sibling); | 283 | c = list_entry(tsk, struct task_struct, sibling); |
| 249 | if (c->mm == p->mm) | 284 | if (c->mm == p->mm) |
| 250 | continue; | 285 | continue; |
| 251 | mm = oom_kill_task(c); | 286 | mm = oom_kill_task(c, message); |
| 252 | if (mm) | 287 | if (mm) |
| 253 | return mm; | 288 | return mm; |
| 254 | } | 289 | } |
| 255 | return oom_kill_task(p); | 290 | return oom_kill_task(p, message); |
| 256 | } | 291 | } |
| 257 | 292 | ||
| 258 | /** | 293 | /** |
| @@ -263,10 +298,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) | |||
| 263 | * OR try to be smart about which process to kill. Note that we | 298 | * OR try to be smart about which process to kill. Note that we |
| 264 | * don't have to be perfect here, we just have to be good. | 299 | * don't have to be perfect here, we just have to be good. |
| 265 | */ | 300 | */ |
| 266 | void out_of_memory(gfp_t gfp_mask, int order) | 301 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) |
| 267 | { | 302 | { |
| 268 | struct mm_struct *mm = NULL; | 303 | struct mm_struct *mm = NULL; |
| 269 | task_t * p; | 304 | task_t *p; |
| 305 | unsigned long points; | ||
| 270 | 306 | ||
| 271 | if (printk_ratelimit()) { | 307 | if (printk_ratelimit()) { |
| 272 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 308 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", |
| @@ -277,25 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order) | |||
| 277 | 313 | ||
| 278 | cpuset_lock(); | 314 | cpuset_lock(); |
| 279 | read_lock(&tasklist_lock); | 315 | read_lock(&tasklist_lock); |
| 316 | |||
| 317 | /* | ||
| 318 | * Check if there were limitations on the allocation (only relevant for | ||
| 319 | * NUMA) that may require different handling. | ||
| 320 | */ | ||
| 321 | switch (constrained_alloc(zonelist, gfp_mask)) { | ||
| 322 | case CONSTRAINT_MEMORY_POLICY: | ||
| 323 | mm = oom_kill_process(current, points, | ||
| 324 | "No available memory (MPOL_BIND)"); | ||
| 325 | break; | ||
| 326 | |||
| 327 | case CONSTRAINT_CPUSET: | ||
| 328 | mm = oom_kill_process(current, points, | ||
| 329 | "No available memory in cpuset"); | ||
| 330 | break; | ||
| 331 | |||
| 332 | case CONSTRAINT_NONE: | ||
| 280 | retry: | 333 | retry: |
| 281 | p = select_bad_process(); | 334 | /* |
| 335 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
| 336 | * issues we may have. | ||
| 337 | */ | ||
| 338 | p = select_bad_process(&points); | ||
| 282 | 339 | ||
| 283 | if (PTR_ERR(p) == -1UL) | 340 | if (PTR_ERR(p) == -1UL) |
| 284 | goto out; | 341 | goto out; |
| 285 | 342 | ||
| 286 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 343 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
| 287 | if (!p) { | 344 | if (!p) { |
| 288 | read_unlock(&tasklist_lock); | 345 | read_unlock(&tasklist_lock); |
| 289 | cpuset_unlock(); | 346 | cpuset_unlock(); |
| 290 | panic("Out of memory and no killable processes...\n"); | 347 | panic("Out of memory and no killable processes...\n"); |
| 291 | } | 348 | } |
| 292 | 349 | ||
| 293 | mm = oom_kill_process(p); | 350 | mm = oom_kill_process(p, points, "Out of memory"); |
| 294 | if (!mm) | 351 | if (!mm) |
| 295 | goto retry; | 352 | goto retry; |
| 353 | |||
| 354 | break; | ||
| 355 | } | ||
| 296 | 356 | ||
| 297 | out: | 357 | out: |
| 298 | read_unlock(&tasklist_lock); | ||
| 299 | cpuset_unlock(); | 358 | cpuset_unlock(); |
| 300 | if (mm) | 359 | if (mm) |
| 301 | mmput(mm); | 360 | mmput(mm); |
