diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 124 |
1 files changed, 92 insertions, 32 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b05ab8f2a562..78747afad6b0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -58,15 +58,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Processes which fork a lot of child processes are likely | 60 | * Processes which fork a lot of child processes are likely |
61 | * a good choice. We add the vmsize of the children if they | 61 | * a good choice. We add half the vmsize of the children if they |
62 | * have an own mm. This prevents forking servers to flood the | 62 | * have an own mm. This prevents forking servers to flood the |
63 | * machine with an endless amount of children | 63 | * machine with an endless amount of children. In case a single |
64 | * child is eating the vast majority of memory, adding only half | ||
65 | * to the parents will make the child our kill candidate of choice. | ||
64 | */ | 66 | */ |
65 | list_for_each(tsk, &p->children) { | 67 | list_for_each(tsk, &p->children) { |
66 | struct task_struct *chld; | 68 | struct task_struct *chld; |
67 | chld = list_entry(tsk, struct task_struct, sibling); | 69 | chld = list_entry(tsk, struct task_struct, sibling); |
68 | if (chld->mm != p->mm && chld->mm) | 70 | if (chld->mm != p->mm && chld->mm) |
69 | points += chld->mm->total_vm; | 71 | points += chld->mm->total_vm/2 + 1; |
70 | } | 72 | } |
71 | 73 | ||
72 | /* | 74 | /* |
@@ -131,17 +133,47 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
131 | } | 133 | } |
132 | 134 | ||
133 | /* | 135 | /* |
136 | * Types of limitations to the nodes from which allocations may occur | ||
137 | */ | ||
138 | #define CONSTRAINT_NONE 1 | ||
139 | #define CONSTRAINT_MEMORY_POLICY 2 | ||
140 | #define CONSTRAINT_CPUSET 3 | ||
141 | |||
142 | /* | ||
143 | * Determine the type of allocation constraint. | ||
144 | */ | ||
145 | static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | ||
146 | { | ||
147 | #ifdef CONFIG_NUMA | ||
148 | struct zone **z; | ||
149 | nodemask_t nodes = node_online_map; | ||
150 | |||
151 | for (z = zonelist->zones; *z; z++) | ||
152 | if (cpuset_zone_allowed(*z, gfp_mask)) | ||
153 | node_clear((*z)->zone_pgdat->node_id, | ||
154 | nodes); | ||
155 | else | ||
156 | return CONSTRAINT_CPUSET; | ||
157 | |||
158 | if (!nodes_empty(nodes)) | ||
159 | return CONSTRAINT_MEMORY_POLICY; | ||
160 | #endif | ||
161 | |||
162 | return CONSTRAINT_NONE; | ||
163 | } | ||
164 | |||
165 | /* | ||
134 | * Simple selection loop. We chose the process with the highest | 166 | * Simple selection loop. We chose the process with the highest |
135 | * number of 'points'. We expect the caller will lock the tasklist. | 167 | * number of 'points'. We expect the caller will lock the tasklist. |
136 | * | 168 | * |
137 | * (not docbooked, we don't want this one cluttering up the manual) | 169 | * (not docbooked, we don't want this one cluttering up the manual) |
138 | */ | 170 | */ |
139 | static struct task_struct * select_bad_process(void) | 171 | static struct task_struct *select_bad_process(unsigned long *ppoints) |
140 | { | 172 | { |
141 | unsigned long maxpoints = 0; | ||
142 | struct task_struct *g, *p; | 173 | struct task_struct *g, *p; |
143 | struct task_struct *chosen = NULL; | 174 | struct task_struct *chosen = NULL; |
144 | struct timespec uptime; | 175 | struct timespec uptime; |
176 | *ppoints = 0; | ||
145 | 177 | ||
146 | do_posix_clock_monotonic_gettime(&uptime); | 178 | do_posix_clock_monotonic_gettime(&uptime); |
147 | do_each_thread(g, p) { | 179 | do_each_thread(g, p) { |
@@ -169,9 +201,9 @@ static struct task_struct * select_bad_process(void) | |||
169 | return p; | 201 | return p; |
170 | 202 | ||
171 | points = badness(p, uptime.tv_sec); | 203 | points = badness(p, uptime.tv_sec); |
172 | if (points > maxpoints || !chosen) { | 204 | if (points > *ppoints || !chosen) { |
173 | chosen = p; | 205 | chosen = p; |
174 | maxpoints = points; | 206 | *ppoints = points; |
175 | } | 207 | } |
176 | } while_each_thread(g, p); | 208 | } while_each_thread(g, p); |
177 | return chosen; | 209 | return chosen; |
@@ -182,7 +214,7 @@ static struct task_struct * select_bad_process(void) | |||
182 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that | 214 | * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that |
183 | * we select a process with CAP_SYS_RAW_IO set). | 215 | * we select a process with CAP_SYS_RAW_IO set). |
184 | */ | 216 | */ |
185 | static void __oom_kill_task(task_t *p) | 217 | static void __oom_kill_task(task_t *p, const char *message) |
186 | { | 218 | { |
187 | if (p->pid == 1) { | 219 | if (p->pid == 1) { |
188 | WARN_ON(1); | 220 | WARN_ON(1); |
@@ -198,8 +230,8 @@ static void __oom_kill_task(task_t *p) | |||
198 | return; | 230 | return; |
199 | } | 231 | } |
200 | task_unlock(p); | 232 | task_unlock(p); |
201 | printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", | 233 | printk(KERN_ERR "%s: Killed process %d (%s).\n", |
202 | p->pid, p->comm); | 234 | message, p->pid, p->comm); |
203 | 235 | ||
204 | /* | 236 | /* |
205 | * We give our sacrificial lamb high priority and access to | 237 | * We give our sacrificial lamb high priority and access to |
@@ -212,7 +244,7 @@ static void __oom_kill_task(task_t *p) | |||
212 | force_sig(SIGKILL, p); | 244 | force_sig(SIGKILL, p); |
213 | } | 245 | } |
214 | 246 | ||
215 | static struct mm_struct *oom_kill_task(task_t *p) | 247 | static struct mm_struct *oom_kill_task(task_t *p, const char *message) |
216 | { | 248 | { |
217 | struct mm_struct *mm = get_task_mm(p); | 249 | struct mm_struct *mm = get_task_mm(p); |
218 | task_t * g, * q; | 250 | task_t * g, * q; |
@@ -224,35 +256,38 @@ static struct mm_struct *oom_kill_task(task_t *p) | |||
224 | return NULL; | 256 | return NULL; |
225 | } | 257 | } |
226 | 258 | ||
227 | __oom_kill_task(p); | 259 | __oom_kill_task(p, message); |
228 | /* | 260 | /* |
229 | * kill all processes that share the ->mm (i.e. all threads), | 261 | * kill all processes that share the ->mm (i.e. all threads), |
230 | * but are in a different thread group | 262 | * but are in a different thread group |
231 | */ | 263 | */ |
232 | do_each_thread(g, q) | 264 | do_each_thread(g, q) |
233 | if (q->mm == mm && q->tgid != p->tgid) | 265 | if (q->mm == mm && q->tgid != p->tgid) |
234 | __oom_kill_task(q); | 266 | __oom_kill_task(q, message); |
235 | while_each_thread(g, q); | 267 | while_each_thread(g, q); |
236 | 268 | ||
237 | return mm; | 269 | return mm; |
238 | } | 270 | } |
239 | 271 | ||
240 | static struct mm_struct *oom_kill_process(struct task_struct *p) | 272 | static struct mm_struct *oom_kill_process(struct task_struct *p, |
273 | unsigned long points, const char *message) | ||
241 | { | 274 | { |
242 | struct mm_struct *mm; | 275 | struct mm_struct *mm; |
243 | struct task_struct *c; | 276 | struct task_struct *c; |
244 | struct list_head *tsk; | 277 | struct list_head *tsk; |
245 | 278 | ||
279 | printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and " | ||
280 | "children.\n", p->pid, p->comm, points); | ||
246 | /* Try to kill a child first */ | 281 | /* Try to kill a child first */ |
247 | list_for_each(tsk, &p->children) { | 282 | list_for_each(tsk, &p->children) { |
248 | c = list_entry(tsk, struct task_struct, sibling); | 283 | c = list_entry(tsk, struct task_struct, sibling); |
249 | if (c->mm == p->mm) | 284 | if (c->mm == p->mm) |
250 | continue; | 285 | continue; |
251 | mm = oom_kill_task(c); | 286 | mm = oom_kill_task(c, message); |
252 | if (mm) | 287 | if (mm) |
253 | return mm; | 288 | return mm; |
254 | } | 289 | } |
255 | return oom_kill_task(p); | 290 | return oom_kill_task(p, message); |
256 | } | 291 | } |
257 | 292 | ||
258 | /** | 293 | /** |
@@ -263,10 +298,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) | |||
263 | * OR try to be smart about which process to kill. Note that we | 298 | * OR try to be smart about which process to kill. Note that we |
264 | * don't have to be perfect here, we just have to be good. | 299 | * don't have to be perfect here, we just have to be good. |
265 | */ | 300 | */ |
266 | void out_of_memory(gfp_t gfp_mask, int order) | 301 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) |
267 | { | 302 | { |
268 | struct mm_struct *mm = NULL; | 303 | struct mm_struct *mm = NULL; |
269 | task_t * p; | 304 | task_t *p; |
305 | unsigned long points = 0; | ||
270 | 306 | ||
271 | if (printk_ratelimit()) { | 307 | if (printk_ratelimit()) { |
272 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", | 308 | printk("oom-killer: gfp_mask=0x%x, order=%d\n", |
@@ -277,24 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order) | |||
277 | 313 | ||
278 | cpuset_lock(); | 314 | cpuset_lock(); |
279 | read_lock(&tasklist_lock); | 315 | read_lock(&tasklist_lock); |
316 | |||
317 | /* | ||
318 | * Check if there were limitations on the allocation (only relevant for | ||
319 | * NUMA) that may require different handling. | ||
320 | */ | ||
321 | switch (constrained_alloc(zonelist, gfp_mask)) { | ||
322 | case CONSTRAINT_MEMORY_POLICY: | ||
323 | mm = oom_kill_process(current, points, | ||
324 | "No available memory (MPOL_BIND)"); | ||
325 | break; | ||
326 | |||
327 | case CONSTRAINT_CPUSET: | ||
328 | mm = oom_kill_process(current, points, | ||
329 | "No available memory in cpuset"); | ||
330 | break; | ||
331 | |||
332 | case CONSTRAINT_NONE: | ||
280 | retry: | 333 | retry: |
281 | p = select_bad_process(); | 334 | /* |
335 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
336 | * issues we may have. | ||
337 | */ | ||
338 | p = select_bad_process(&points); | ||
282 | 339 | ||
283 | if (PTR_ERR(p) == -1UL) | 340 | if (PTR_ERR(p) == -1UL) |
284 | goto out; | 341 | goto out; |
285 | 342 | ||
286 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 343 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
287 | if (!p) { | 344 | if (!p) { |
288 | read_unlock(&tasklist_lock); | 345 | read_unlock(&tasklist_lock); |
289 | cpuset_unlock(); | 346 | cpuset_unlock(); |
290 | panic("Out of memory and no killable processes...\n"); | 347 | panic("Out of memory and no killable processes...\n"); |
291 | } | 348 | } |
292 | 349 | ||
293 | mm = oom_kill_process(p); | 350 | mm = oom_kill_process(p, points, "Out of memory"); |
294 | if (!mm) | 351 | if (!mm) |
295 | goto retry; | 352 | goto retry; |
353 | |||
354 | break; | ||
355 | } | ||
296 | 356 | ||
297 | out: | 357 | out: |
298 | read_unlock(&tasklist_lock); | 358 | read_unlock(&tasklist_lock); |
299 | cpuset_unlock(); | 359 | cpuset_unlock(); |
300 | if (mm) | 360 | if (mm) |
@@ -305,5 +365,5 @@ retry: | |||
305 | * retry to allocate memory unless "p" is current | 365 | * retry to allocate memory unless "p" is current |
306 | */ | 366 | */ |
307 | if (!test_thread_flag(TIF_MEMDIE)) | 367 | if (!test_thread_flag(TIF_MEMDIE)) |
308 | schedule_timeout_interruptible(1); | 368 | schedule_timeout_uninterruptible(1); |
309 | } | 369 | } |