aboutsummaryrefslogtreecommitdiffstats
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c124
1 files changed, 92 insertions, 32 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b05ab8f2a562..78747afad6b0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,15 +58,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 58
59 /* 59 /*
60 * Processes which fork a lot of child processes are likely 60 * Processes which fork a lot of child processes are likely
61 * a good choice. We add the vmsize of the children if they 61 * a good choice. We add half the vmsize of the children if they
62 * have an own mm. This prevents forking servers to flood the 62 * have an own mm. This prevents forking servers to flood the
63 * machine with an endless amount of children 63 * machine with an endless amount of children. In case a single
64 * child is eating the vast majority of memory, adding only half
65 * to the parents will make the child our kill candidate of choice.
64 */ 66 */
65 list_for_each(tsk, &p->children) { 67 list_for_each(tsk, &p->children) {
66 struct task_struct *chld; 68 struct task_struct *chld;
67 chld = list_entry(tsk, struct task_struct, sibling); 69 chld = list_entry(tsk, struct task_struct, sibling);
68 if (chld->mm != p->mm && chld->mm) 70 if (chld->mm != p->mm && chld->mm)
69 points += chld->mm->total_vm; 71 points += chld->mm->total_vm/2 + 1;
70 } 72 }
71 73
72 /* 74 /*
@@ -131,17 +133,47 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
131} 133}
132 134
133/* 135/*
136 * Types of limitations to the nodes from which allocations may occur
137 */
138#define CONSTRAINT_NONE 1
139#define CONSTRAINT_MEMORY_POLICY 2
140#define CONSTRAINT_CPUSET 3
141
142/*
143 * Determine the type of allocation constraint.
144 */
145static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
146{
147#ifdef CONFIG_NUMA
148 struct zone **z;
149 nodemask_t nodes = node_online_map;
150
151 for (z = zonelist->zones; *z; z++)
152 if (cpuset_zone_allowed(*z, gfp_mask))
153 node_clear((*z)->zone_pgdat->node_id,
154 nodes);
155 else
156 return CONSTRAINT_CPUSET;
157
158 if (!nodes_empty(nodes))
159 return CONSTRAINT_MEMORY_POLICY;
160#endif
161
162 return CONSTRAINT_NONE;
163}
164
165/*
134 * Simple selection loop. We chose the process with the highest 166 * Simple selection loop. We chose the process with the highest
135 * number of 'points'. We expect the caller will lock the tasklist. 167 * number of 'points'. We expect the caller will lock the tasklist.
136 * 168 *
137 * (not docbooked, we don't want this one cluttering up the manual) 169 * (not docbooked, we don't want this one cluttering up the manual)
138 */ 170 */
139static struct task_struct * select_bad_process(void) 171static struct task_struct *select_bad_process(unsigned long *ppoints)
140{ 172{
141 unsigned long maxpoints = 0;
142 struct task_struct *g, *p; 173 struct task_struct *g, *p;
143 struct task_struct *chosen = NULL; 174 struct task_struct *chosen = NULL;
144 struct timespec uptime; 175 struct timespec uptime;
176 *ppoints = 0;
145 177
146 do_posix_clock_monotonic_gettime(&uptime); 178 do_posix_clock_monotonic_gettime(&uptime);
147 do_each_thread(g, p) { 179 do_each_thread(g, p) {
@@ -169,9 +201,9 @@ static struct task_struct * select_bad_process(void)
169 return p; 201 return p;
170 202
171 points = badness(p, uptime.tv_sec); 203 points = badness(p, uptime.tv_sec);
172 if (points > maxpoints || !chosen) { 204 if (points > *ppoints || !chosen) {
173 chosen = p; 205 chosen = p;
174 maxpoints = points; 206 *ppoints = points;
175 } 207 }
176 } while_each_thread(g, p); 208 } while_each_thread(g, p);
177 return chosen; 209 return chosen;
@@ -182,7 +214,7 @@ static struct task_struct * select_bad_process(void)
182 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 214 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
183 * we select a process with CAP_SYS_RAW_IO set). 215 * we select a process with CAP_SYS_RAW_IO set).
184 */ 216 */
185static void __oom_kill_task(task_t *p) 217static void __oom_kill_task(task_t *p, const char *message)
186{ 218{
187 if (p->pid == 1) { 219 if (p->pid == 1) {
188 WARN_ON(1); 220 WARN_ON(1);
@@ -198,8 +230,8 @@ static void __oom_kill_task(task_t *p)
198 return; 230 return;
199 } 231 }
200 task_unlock(p); 232 task_unlock(p);
201 printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", 233 printk(KERN_ERR "%s: Killed process %d (%s).\n",
202 p->pid, p->comm); 234 message, p->pid, p->comm);
203 235
204 /* 236 /*
205 * We give our sacrificial lamb high priority and access to 237 * We give our sacrificial lamb high priority and access to
@@ -212,7 +244,7 @@ static void __oom_kill_task(task_t *p)
212 force_sig(SIGKILL, p); 244 force_sig(SIGKILL, p);
213} 245}
214 246
215static struct mm_struct *oom_kill_task(task_t *p) 247static struct mm_struct *oom_kill_task(task_t *p, const char *message)
216{ 248{
217 struct mm_struct *mm = get_task_mm(p); 249 struct mm_struct *mm = get_task_mm(p);
218 task_t * g, * q; 250 task_t * g, * q;
@@ -224,35 +256,38 @@ static struct mm_struct *oom_kill_task(task_t *p)
224 return NULL; 256 return NULL;
225 } 257 }
226 258
227 __oom_kill_task(p); 259 __oom_kill_task(p, message);
228 /* 260 /*
229 * kill all processes that share the ->mm (i.e. all threads), 261 * kill all processes that share the ->mm (i.e. all threads),
230 * but are in a different thread group 262 * but are in a different thread group
231 */ 263 */
232 do_each_thread(g, q) 264 do_each_thread(g, q)
233 if (q->mm == mm && q->tgid != p->tgid) 265 if (q->mm == mm && q->tgid != p->tgid)
234 __oom_kill_task(q); 266 __oom_kill_task(q, message);
235 while_each_thread(g, q); 267 while_each_thread(g, q);
236 268
237 return mm; 269 return mm;
238} 270}
239 271
240static struct mm_struct *oom_kill_process(struct task_struct *p) 272static struct mm_struct *oom_kill_process(struct task_struct *p,
273 unsigned long points, const char *message)
241{ 274{
242 struct mm_struct *mm; 275 struct mm_struct *mm;
243 struct task_struct *c; 276 struct task_struct *c;
244 struct list_head *tsk; 277 struct list_head *tsk;
245 278
279 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and "
280 "children.\n", p->pid, p->comm, points);
246 /* Try to kill a child first */ 281 /* Try to kill a child first */
247 list_for_each(tsk, &p->children) { 282 list_for_each(tsk, &p->children) {
248 c = list_entry(tsk, struct task_struct, sibling); 283 c = list_entry(tsk, struct task_struct, sibling);
249 if (c->mm == p->mm) 284 if (c->mm == p->mm)
250 continue; 285 continue;
251 mm = oom_kill_task(c); 286 mm = oom_kill_task(c, message);
252 if (mm) 287 if (mm)
253 return mm; 288 return mm;
254 } 289 }
255 return oom_kill_task(p); 290 return oom_kill_task(p, message);
256} 291}
257 292
258/** 293/**
@@ -263,10 +298,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p)
263 * OR try to be smart about which process to kill. Note that we 298 * OR try to be smart about which process to kill. Note that we
264 * don't have to be perfect here, we just have to be good. 299 * don't have to be perfect here, we just have to be good.
265 */ 300 */
266void out_of_memory(gfp_t gfp_mask, int order) 301void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
267{ 302{
268 struct mm_struct *mm = NULL; 303 struct mm_struct *mm = NULL;
269 task_t * p; 304 task_t *p;
305 unsigned long points = 0;
270 306
271 if (printk_ratelimit()) { 307 if (printk_ratelimit()) {
272 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 308 printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -277,24 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order)
277 313
278 cpuset_lock(); 314 cpuset_lock();
279 read_lock(&tasklist_lock); 315 read_lock(&tasklist_lock);
316
317 /*
318 * Check if there were limitations on the allocation (only relevant for
319 * NUMA) that may require different handling.
320 */
321 switch (constrained_alloc(zonelist, gfp_mask)) {
322 case CONSTRAINT_MEMORY_POLICY:
323 mm = oom_kill_process(current, points,
324 "No available memory (MPOL_BIND)");
325 break;
326
327 case CONSTRAINT_CPUSET:
328 mm = oom_kill_process(current, points,
329 "No available memory in cpuset");
330 break;
331
332 case CONSTRAINT_NONE:
280retry: 333retry:
281 p = select_bad_process(); 334 /*
335 * Rambo mode: Shoot down a process and hope it solves whatever
336 * issues we may have.
337 */
338 p = select_bad_process(&points);
282 339
283 if (PTR_ERR(p) == -1UL) 340 if (PTR_ERR(p) == -1UL)
284 goto out; 341 goto out;
285 342
286 /* Found nothing?!?! Either we hang forever, or we panic. */ 343 /* Found nothing?!?! Either we hang forever, or we panic. */
287 if (!p) { 344 if (!p) {
288 read_unlock(&tasklist_lock); 345 read_unlock(&tasklist_lock);
289 cpuset_unlock(); 346 cpuset_unlock();
290 panic("Out of memory and no killable processes...\n"); 347 panic("Out of memory and no killable processes...\n");
291 } 348 }
292 349
293 mm = oom_kill_process(p); 350 mm = oom_kill_process(p, points, "Out of memory");
294 if (!mm) 351 if (!mm)
295 goto retry; 352 goto retry;
353
354 break;
355 }
296 356
297 out: 357out:
298 read_unlock(&tasklist_lock); 358 read_unlock(&tasklist_lock);
299 cpuset_unlock(); 359 cpuset_unlock();
300 if (mm) 360 if (mm)
@@ -305,5 +365,5 @@ retry:
305 * retry to allocate memory unless "p" is current 365 * retry to allocate memory unless "p" is current
306 */ 366 */
307 if (!test_thread_flag(TIF_MEMDIE)) 367 if (!test_thread_flag(TIF_MEMDIE))
308 schedule_timeout_interruptible(1); 368 schedule_timeout_uninterruptible(1);
309} 369}