aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2006-02-20 21:27:52 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-20 23:00:09 -0500
commit9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc (patch)
tree33c69908705c88b86f14d9e835b6dee6a9c3a31b
parent9827b781f20828e5ceb911b879f268f78fe90815 (diff)
[PATCH] Terminate process that fails on a constrained allocation
Some allocations are restricted to a limited set of nodes (due to memory policies or cpuset constraints). If the page allocator is not able to find enough memory then that does not mean that overall system memory is low. In particular going postal and more or less randomly shooting at processes is not likely going to help the situation but may just lead to suicide (the whole system coming down). It is better to signal to the process that no memory exists given the constraints that the process (or the configuration of the process) has placed on the allocation behavior. The process may be killed but then the sysadmin or developer can investigate the situation. The solution is similar to what we do when running out of hugepages. This patch adds a check before we kill processes. At that point performance considerations do not matter much so we just scan the zonelist and reconstruct a list of nodes. If the list of nodes does not contain all online nodes then this is a constrained allocation and we should kill the current process. Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--include/linux/swap.h2
-rw-r--r--mm/oom_kill.c103
-rw-r--r--mm/page_alloc.c2
4 files changed, 81 insertions, 28 deletions
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5765f672e853..d58f82318853 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -243,7 +243,7 @@ static struct sysrq_key_op sysrq_term_op = {
243 243
244static void moom_callback(void *ignored) 244static void moom_callback(void *ignored)
245{ 245{
246 out_of_memory(GFP_KERNEL, 0); 246 out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL], GFP_KERNEL, 0);
247} 247}
248 248
249static DECLARE_WORK(moom_work, moom_callback, NULL); 249static DECLARE_WORK(moom_work, moom_callback, NULL);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f3e17d5963c3..d572b19afb7d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -147,7 +147,7 @@ struct swap_list_t {
147#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) 147#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
148 148
149/* linux/mm/oom_kill.c */ 149/* linux/mm/oom_kill.c */
150extern void out_of_memory(gfp_t gfp_mask, int order); 150extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
151 151
152/* linux/mm/memory.c */ 152/* linux/mm/memory.c */
153extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); 153extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 949eba1d5ba3..8123fad5a485 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -133,6 +133,36 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
133} 133}
134 134
135/* 135/*
136 * Types of limitations to the nodes from which allocations may occur
137 */
138#define CONSTRAINT_NONE 1
139#define CONSTRAINT_MEMORY_POLICY 2
140#define CONSTRAINT_CPUSET 3
141
142/*
143 * Determine the type of allocation constraint.
144 */
145static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
146{
147#ifdef CONFIG_NUMA
148 struct zone **z;
149 nodemask_t nodes = node_online_map;
150
151 for (z = zonelist->zones; *z; z++)
152 if (cpuset_zone_allowed(*z, gfp_mask))
153 node_clear((*z)->zone_pgdat->node_id,
154 nodes);
155 else
156 return CONSTRAINT_CPUSET;
157
158 if (!nodes_empty(nodes))
159 return CONSTRAINT_MEMORY_POLICY;
160#endif
161
162 return CONSTRAINT_NONE;
163}
164
165/*
136 * Simple selection loop. We chose the process with the highest 166 * Simple selection loop. We chose the process with the highest
137 * number of 'points'. We expect the caller will lock the tasklist. 167 * number of 'points'. We expect the caller will lock the tasklist.
138 * 168 *
@@ -184,7 +214,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
184 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that 214 * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
185 * we select a process with CAP_SYS_RAW_IO set). 215 * we select a process with CAP_SYS_RAW_IO set).
186 */ 216 */
187static void __oom_kill_task(task_t *p) 217static void __oom_kill_task(task_t *p, const char *message)
188{ 218{
189 if (p->pid == 1) { 219 if (p->pid == 1) {
190 WARN_ON(1); 220 WARN_ON(1);
@@ -200,8 +230,8 @@ static void __oom_kill_task(task_t *p)
200 return; 230 return;
201 } 231 }
202 task_unlock(p); 232 task_unlock(p);
203 printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", 233 printk(KERN_ERR "%s: Killed process %d (%s).\n",
204 p->pid, p->comm); 234 message, p->pid, p->comm);
205 235
206 /* 236 /*
207 * We give our sacrificial lamb high priority and access to 237 * We give our sacrificial lamb high priority and access to
@@ -214,7 +244,7 @@ static void __oom_kill_task(task_t *p)
214 force_sig(SIGKILL, p); 244 force_sig(SIGKILL, p);
215} 245}
216 246
217static struct mm_struct *oom_kill_task(task_t *p) 247static struct mm_struct *oom_kill_task(task_t *p, const char *message)
218{ 248{
219 struct mm_struct *mm = get_task_mm(p); 249 struct mm_struct *mm = get_task_mm(p);
220 task_t * g, * q; 250 task_t * g, * q;
@@ -226,21 +256,21 @@ static struct mm_struct *oom_kill_task(task_t *p)
226 return NULL; 256 return NULL;
227 } 257 }
228 258
229 __oom_kill_task(p); 259 __oom_kill_task(p, message);
230 /* 260 /*
231 * kill all processes that share the ->mm (i.e. all threads), 261 * kill all processes that share the ->mm (i.e. all threads),
232 * but are in a different thread group 262 * but are in a different thread group
233 */ 263 */
234 do_each_thread(g, q) 264 do_each_thread(g, q)
235 if (q->mm == mm && q->tgid != p->tgid) 265 if (q->mm == mm && q->tgid != p->tgid)
236 __oom_kill_task(q); 266 __oom_kill_task(q, message);
237 while_each_thread(g, q); 267 while_each_thread(g, q);
238 268
239 return mm; 269 return mm;
240} 270}
241 271
242static struct mm_struct *oom_kill_process(struct task_struct *p, 272static struct mm_struct *oom_kill_process(struct task_struct *p,
243 unsigned long points) 273 unsigned long points, const char *message)
244{ 274{
245 struct mm_struct *mm; 275 struct mm_struct *mm;
246 struct task_struct *c; 276 struct task_struct *c;
@@ -253,11 +283,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
253 c = list_entry(tsk, struct task_struct, sibling); 283 c = list_entry(tsk, struct task_struct, sibling);
254 if (c->mm == p->mm) 284 if (c->mm == p->mm)
255 continue; 285 continue;
256 mm = oom_kill_task(c); 286 mm = oom_kill_task(c, message);
257 if (mm) 287 if (mm)
258 return mm; 288 return mm;
259 } 289 }
260 return oom_kill_task(p); 290 return oom_kill_task(p, message);
261} 291}
262 292
263/** 293/**
@@ -268,10 +298,10 @@ static struct mm_struct *oom_kill_process(struct task_struct *p,
268 * OR try to be smart about which process to kill. Note that we 298 * OR try to be smart about which process to kill. Note that we
269 * don't have to be perfect here, we just have to be good. 299 * don't have to be perfect here, we just have to be good.
270 */ 300 */
271void out_of_memory(gfp_t gfp_mask, int order) 301void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
272{ 302{
273 struct mm_struct *mm = NULL; 303 struct mm_struct *mm = NULL;
274 task_t * p; 304 task_t *p;
275 unsigned long points; 305 unsigned long points;
276 306
277 if (printk_ratelimit()) { 307 if (printk_ratelimit()) {
@@ -283,25 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order)
283 313
284 cpuset_lock(); 314 cpuset_lock();
285 read_lock(&tasklist_lock); 315 read_lock(&tasklist_lock);
316
317 /*
318 * Check if there were limitations on the allocation (only relevant for
319 * NUMA) that may require different handling.
320 */
321 switch (constrained_alloc(zonelist, gfp_mask)) {
322 case CONSTRAINT_MEMORY_POLICY:
323 mm = oom_kill_process(current, points,
324 "No available memory (MPOL_BIND)");
325 break;
326
327 case CONSTRAINT_CPUSET:
328 mm = oom_kill_process(current, points,
329 "No available memory in cpuset");
330 break;
331
332 case CONSTRAINT_NONE:
286retry: 333retry:
287 p = select_bad_process(&points); 334 /*
335 * Rambo mode: Shoot down a process and hope it solves whatever
336 * issues we may have.
337 */
338 p = select_bad_process(&points);
288 339
289 if (PTR_ERR(p) == -1UL) 340 if (PTR_ERR(p) == -1UL)
290 goto out; 341 goto out;
291 342
292 /* Found nothing?!?! Either we hang forever, or we panic. */ 343 /* Found nothing?!?! Either we hang forever, or we panic. */
293 if (!p) { 344 if (!p) {
294 read_unlock(&tasklist_lock); 345 read_unlock(&tasklist_lock);
295 cpuset_unlock(); 346 cpuset_unlock();
296 panic("Out of memory and no killable processes...\n"); 347 panic("Out of memory and no killable processes...\n");
297 } 348 }
298 349
299 mm = oom_kill_process(p, points); 350 mm = oom_kill_process(p, points, "Out of memory");
300 if (!mm) 351 if (!mm)
301 goto retry; 352 goto retry;
353
354 break;
355 }
302 356
303 out: 357out:
304 read_unlock(&tasklist_lock);
305 cpuset_unlock(); 358 cpuset_unlock();
306 if (mm) 359 if (mm)
307 mmput(mm); 360 mmput(mm);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 208812b25597..791690d7d3fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1015,7 +1015,7 @@ rebalance:
1015 if (page) 1015 if (page)
1016 goto got_pg; 1016 goto got_pg;
1017 1017
1018 out_of_memory(gfp_mask, order); 1018 out_of_memory(zonelist, gfp_mask, order);
1019 goto restart; 1019 goto restart;
1020 } 1020 }
1021 1021