aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2008-02-07 03:14:07 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:19 -0500
commitfef1bdd68c81b71882ccb6f47c70980a03182063 (patch)
tree4cfa5c2c10787a3d54de05ca511db31730a239fd
parent4c4a22148909e4c003562ea7ffe0a06e26919e3c (diff)
oom: add sysctl to enable task memory dump
Adds a new sysctl, 'oom_dump_tasks', that enables the kernel to produce a dump of all system tasks (excluding kernel threads) when performing an OOM-killing. Information includes pid, uid, tgid, vm size, rss, cpu, oom_adj score, and name. This is helpful for determining why there was an OOM condition and which rogue task caused it. It is configurable so that large systems, such as those with several thousand tasks, do not incur a performance penalty associated with dumping data they may not desire. If an OOM was triggered as a result of a memory controller, the tasklist shall be filtered to exclude tasks that are not a member of the same cgroup. Cc: Andrea Arcangeli <andrea@suse.de> Cc: Christoph Lameter <clameter@sgi.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/sysctl/vm.txt22
-rw-r--r--kernel/sysctl.c9
-rw-r--r--mm/oom_kill.c49
3 files changed, 75 insertions, 5 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 24eac1bc735d..8a4863c4edd4 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm:
32- min_unmapped_ratio 32- min_unmapped_ratio
33- min_slab_ratio 33- min_slab_ratio
34- panic_on_oom 34- panic_on_oom
35- oom_dump_tasks
35- oom_kill_allocating_task 36- oom_kill_allocating_task
36- mmap_min_address 37- mmap_min_address
37- numa_zonelist_order 38- numa_zonelist_order
@@ -232,6 +233,27 @@ according to your policy of failover.
232 233
233============================================================= 234=============================================================
234 235
236oom_dump_tasks
237
238Enables a system-wide task dump (excluding kernel threads) to be
239produced when the kernel performs an OOM-killing and includes such
240information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and
241name. This is helpful to determine why the OOM killer was invoked
242and to identify the rogue task that caused it.
243
244If this is set to zero, this information is suppressed. On very
245large systems with thousands of tasks it may not be feasible to dump
246the memory state information for each one. Such systems should not
247be forced to incur a performance penalty in OOM conditions when the
248information may not be desired.
249
250If this is set to non-zero, this information is shown whenever the
251OOM killer actually kills a memory-hogging task.
252
253The default value is 0.
254
255=============================================================
256
235oom_kill_allocating_task 257oom_kill_allocating_task
236 258
237This enables or disables killing the OOM-triggering task in 259This enables or disables killing the OOM-triggering task in
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 86daaa26d120..8c98d8147d88 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,6 +67,7 @@ extern int sysctl_overcommit_memory;
67extern int sysctl_overcommit_ratio; 67extern int sysctl_overcommit_ratio;
68extern int sysctl_panic_on_oom; 68extern int sysctl_panic_on_oom;
69extern int sysctl_oom_kill_allocating_task; 69extern int sysctl_oom_kill_allocating_task;
70extern int sysctl_oom_dump_tasks;
70extern int max_threads; 71extern int max_threads;
71extern int core_uses_pid; 72extern int core_uses_pid;
72extern int suid_dumpable; 73extern int suid_dumpable;
@@ -871,6 +872,14 @@ static struct ctl_table vm_table[] = {
871 .proc_handler = &proc_dointvec, 872 .proc_handler = &proc_dointvec,
872 }, 873 },
873 { 874 {
875 .ctl_name = CTL_UNNUMBERED,
876 .procname = "oom_dump_tasks",
877 .data = &sysctl_oom_dump_tasks,
878 .maxlen = sizeof(sysctl_oom_dump_tasks),
879 .mode = 0644,
880 .proc_handler = &proc_dointvec,
881 },
882 {
874 .ctl_name = VM_OVERCOMMIT_RATIO, 883 .ctl_name = VM_OVERCOMMIT_RATIO,
875 .procname = "overcommit_ratio", 884 .procname = "overcommit_ratio",
876 .data = &sysctl_overcommit_ratio, 885 .data = &sysctl_overcommit_ratio,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ef5084dbc793..4194b9db0104 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -29,6 +29,7 @@
29 29
30int sysctl_panic_on_oom; 30int sysctl_panic_on_oom;
31int sysctl_oom_kill_allocating_task; 31int sysctl_oom_kill_allocating_task;
32int sysctl_oom_dump_tasks;
32static DEFINE_SPINLOCK(zone_scan_mutex); 33static DEFINE_SPINLOCK(zone_scan_mutex);
33/* #define DEBUG */ 34/* #define DEBUG */
34 35
@@ -263,6 +264,41 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
263} 264}
264 265
265/** 266/**
267 * Dumps the current memory state of all system tasks, excluding kernel threads.
268 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj
269 * score, and name.
270 *
271 * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are
272 * shown.
273 *
274 * Call with tasklist_lock read-locked.
275 */
276static void dump_tasks(const struct mem_cgroup *mem)
277{
278 struct task_struct *g, *p;
279
280 printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj "
281 "name\n");
282 do_each_thread(g, p) {
283 /*
284 * total_vm and rss sizes do not exist for tasks with a
285 * detached mm so there's no need to report them.
286 */
287 if (!p->mm)
288 continue;
289 if (mem && !task_in_mem_cgroup(p, mem))
290 continue;
291
292 task_lock(p);
293 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
294 p->pid, p->uid, p->tgid, p->mm->total_vm,
295 get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
296 p->comm);
297 task_unlock(p);
298 } while_each_thread(g, p);
299}
300
301/**
266 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO 302 * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO
267 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO 303 * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO
268 * set. 304 * set.
@@ -339,7 +375,8 @@ static int oom_kill_task(struct task_struct *p)
339} 375}
340 376
341static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, 377static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
342 unsigned long points, const char *message) 378 unsigned long points, struct mem_cgroup *mem,
379 const char *message)
343{ 380{
344 struct task_struct *c; 381 struct task_struct *c;
345 382
@@ -349,6 +386,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
349 current->comm, gfp_mask, order, current->oomkilladj); 386 current->comm, gfp_mask, order, current->oomkilladj);
350 dump_stack(); 387 dump_stack();
351 show_mem(); 388 show_mem();
389 if (sysctl_oom_dump_tasks)
390 dump_tasks(mem);
352 } 391 }
353 392
354 /* 393 /*
@@ -389,7 +428,7 @@ retry:
389 if (!p) 428 if (!p)
390 p = current; 429 p = current;
391 430
392 if (oom_kill_process(p, gfp_mask, 0, points, 431 if (oom_kill_process(p, gfp_mask, 0, points, mem,
393 "Memory cgroup out of memory")) 432 "Memory cgroup out of memory"))
394 goto retry; 433 goto retry;
395out: 434out:
@@ -495,7 +534,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
495 534
496 switch (constraint) { 535 switch (constraint) {
497 case CONSTRAINT_MEMORY_POLICY: 536 case CONSTRAINT_MEMORY_POLICY:
498 oom_kill_process(current, gfp_mask, order, points, 537 oom_kill_process(current, gfp_mask, order, points, NULL,
499 "No available memory (MPOL_BIND)"); 538 "No available memory (MPOL_BIND)");
500 break; 539 break;
501 540
@@ -505,7 +544,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
505 /* Fall-through */ 544 /* Fall-through */
506 case CONSTRAINT_CPUSET: 545 case CONSTRAINT_CPUSET:
507 if (sysctl_oom_kill_allocating_task) { 546 if (sysctl_oom_kill_allocating_task) {
508 oom_kill_process(current, gfp_mask, order, points, 547 oom_kill_process(current, gfp_mask, order, points, NULL,
509 "Out of memory (oom_kill_allocating_task)"); 548 "Out of memory (oom_kill_allocating_task)");
510 break; 549 break;
511 } 550 }
@@ -525,7 +564,7 @@ retry:
525 panic("Out of memory and no killable processes...\n"); 564 panic("Out of memory and no killable processes...\n");
526 } 565 }
527 566
528 if (oom_kill_process(p, gfp_mask, order, points, 567 if (oom_kill_process(p, gfp_mask, order, points, NULL,
529 "Out of memory")) 568 "Out of memory"))
530 goto retry; 569 goto retry;
531 570