aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKurt Garloff <garloff@suse.de>2006-02-20 21:27:51 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-20 23:00:09 -0500
commit9827b781f20828e5ceb911b879f268f78fe90815 (patch)
tree9ba223facf6071a1fd21bf8471801ab794738c05
parentbd71c2b17468a2531fb4c81ec1d73520845e97e1 (diff)
[PATCH] OOM kill: children accounting
In the badness() calculation, there's currently this piece of code: /* * Processes which fork a lot of child processes are likely * a good choice. We add the vmsize of the children if they * have an own mm. This prevents forking servers to flood the * machine with an endless amount of children */ list_for_each(tsk, &p->children) { struct task_struct *chld; chld = list_entry(tsk, struct task_struct, sibling); if (chld->mm = p->mm && chld->mm) points += chld->mm->total_vm; } The intention is clear: If some server (apache) keeps spawning new children and we run OOM, we want to kill the father rather than picking a child. This -- to some degree -- also helps a bit with getting fork bombs under control, though I'd consider this a desirable side-effect rather than a feature. There's one problem with this: No matter how many or few children there are, if just one of them misbehaves, and all others (including the father) do everything right, we still always kill the whole family. This hits in real life; whether it's javascript in konqueror resulting in kdeinit (and thus the whole KDE session) being hit or just a classical server that spawns children. Sidenote: The killer does kill all direct children as well, not only the selected father, see oom_kill_process(). The idea in attached patch is that we do want to account the memory consumption of the (direct) children to the father -- however not fully. This maintains the property that fathers with too many children will still very likely be picked, whereas a single misbehaving child has the chance to be picked by the OOM killer. In the patch I account only half (rounded up) of the children's vm_size to the parent. This means that if one child eats more mem than the rest of the family, it will be picked, otherwise it's still the father and thus the whole family that gets selected. This is heuristics -- we could debate whether accounting for a fourth would be better than for half of it. Or -- if people would consider it worth the trouble -- make it a sysctl. For now I sticked to accounting for half, which should IMHO be a significant improvement. The patch does one more thing: As users tend to be irritated by the choice of killed processes (mainly because the children are killed first, despite some of them having a very low OOM score), I added some more output: The selected (father) process will be reported first and it's oom_score printed to syslog. Description: Only account for half of children's vm size in oom score calculation This should still give the parent enough point in case of fork bombs. If any child however has more than 50% of the vm size of all children together, it'll get a higher score and be elected. This patch also makes the kernel display the oom_score. Signed-off-by: Kurt Garloff <garloff@suse.de> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/oom_kill.c26
1 files changed, 16 insertions, 10 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b05ab8f2a562..949eba1d5ba3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,15 +58,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 58
59 /* 59 /*
60 * Processes which fork a lot of child processes are likely 60 * Processes which fork a lot of child processes are likely
61 * a good choice. We add the vmsize of the children if they 61 * a good choice. We add half the vmsize of the children if they
62 * have an own mm. This prevents forking servers to flood the 62 * have an own mm. This prevents forking servers to flood the
63 * machine with an endless amount of children 63 * machine with an endless amount of children. In case a single
64 * child is eating the vast majority of memory, adding only half
65 * to the parents will make the child our kill candidate of choice.
64 */ 66 */
65 list_for_each(tsk, &p->children) { 67 list_for_each(tsk, &p->children) {
66 struct task_struct *chld; 68 struct task_struct *chld;
67 chld = list_entry(tsk, struct task_struct, sibling); 69 chld = list_entry(tsk, struct task_struct, sibling);
68 if (chld->mm != p->mm && chld->mm) 70 if (chld->mm != p->mm && chld->mm)
69 points += chld->mm->total_vm; 71 points += chld->mm->total_vm/2 + 1;
70 } 72 }
71 73
72 /* 74 /*
@@ -136,12 +138,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
136 * 138 *
137 * (not docbooked, we don't want this one cluttering up the manual) 139 * (not docbooked, we don't want this one cluttering up the manual)
138 */ 140 */
139static struct task_struct * select_bad_process(void) 141static struct task_struct *select_bad_process(unsigned long *ppoints)
140{ 142{
141 unsigned long maxpoints = 0;
142 struct task_struct *g, *p; 143 struct task_struct *g, *p;
143 struct task_struct *chosen = NULL; 144 struct task_struct *chosen = NULL;
144 struct timespec uptime; 145 struct timespec uptime;
146 *ppoints = 0;
145 147
146 do_posix_clock_monotonic_gettime(&uptime); 148 do_posix_clock_monotonic_gettime(&uptime);
147 do_each_thread(g, p) { 149 do_each_thread(g, p) {
@@ -169,9 +171,9 @@ static struct task_struct * select_bad_process(void)
169 return p; 171 return p;
170 172
171 points = badness(p, uptime.tv_sec); 173 points = badness(p, uptime.tv_sec);
172 if (points > maxpoints || !chosen) { 174 if (points > *ppoints || !chosen) {
173 chosen = p; 175 chosen = p;
174 maxpoints = points; 176 *ppoints = points;
175 } 177 }
176 } while_each_thread(g, p); 178 } while_each_thread(g, p);
177 return chosen; 179 return chosen;
@@ -237,12 +239,15 @@ static struct mm_struct *oom_kill_task(task_t *p)
237 return mm; 239 return mm;
238} 240}
239 241
240static struct mm_struct *oom_kill_process(struct task_struct *p) 242static struct mm_struct *oom_kill_process(struct task_struct *p,
243 unsigned long points)
241{ 244{
242 struct mm_struct *mm; 245 struct mm_struct *mm;
243 struct task_struct *c; 246 struct task_struct *c;
244 struct list_head *tsk; 247 struct list_head *tsk;
245 248
249 printk(KERN_ERR "Out of Memory: Kill process %d (%s) score %li and "
250 "children.\n", p->pid, p->comm, points);
246 /* Try to kill a child first */ 251 /* Try to kill a child first */
247 list_for_each(tsk, &p->children) { 252 list_for_each(tsk, &p->children) {
248 c = list_entry(tsk, struct task_struct, sibling); 253 c = list_entry(tsk, struct task_struct, sibling);
@@ -267,6 +272,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
267{ 272{
268 struct mm_struct *mm = NULL; 273 struct mm_struct *mm = NULL;
269 task_t * p; 274 task_t * p;
275 unsigned long points;
270 276
271 if (printk_ratelimit()) { 277 if (printk_ratelimit()) {
272 printk("oom-killer: gfp_mask=0x%x, order=%d\n", 278 printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -278,7 +284,7 @@ void out_of_memory(gfp_t gfp_mask, int order)
278 cpuset_lock(); 284 cpuset_lock();
279 read_lock(&tasklist_lock); 285 read_lock(&tasklist_lock);
280retry: 286retry:
281 p = select_bad_process(); 287 p = select_bad_process(&points);
282 288
283 if (PTR_ERR(p) == -1UL) 289 if (PTR_ERR(p) == -1UL)
284 goto out; 290 goto out;
@@ -290,7 +296,7 @@ retry:
290 panic("Out of memory and no killable processes...\n"); 296 panic("Out of memory and no killable processes...\n");
291 } 297 }
292 298
293 mm = oom_kill_process(p); 299 mm = oom_kill_process(p, points);
294 if (!mm) 300 if (!mm)
295 goto retry; 301 goto retry;
296 302