diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 22 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | mm/oom_kill.c | 49 |
3 files changed, 75 insertions, 5 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 24eac1bc735d..8a4863c4edd4 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm: | |||
32 | - min_unmapped_ratio | 32 | - min_unmapped_ratio |
33 | - min_slab_ratio | 33 | - min_slab_ratio |
34 | - panic_on_oom | 34 | - panic_on_oom |
35 | - oom_dump_tasks | ||
35 | - oom_kill_allocating_task | 36 | - oom_kill_allocating_task |
36 | - mmap_min_address | 37 | - mmap_min_address |
37 | - numa_zonelist_order | 38 | - numa_zonelist_order |
@@ -232,6 +233,27 @@ according to your policy of failover. | |||
232 | 233 | ||
233 | ============================================================= | 234 | ============================================================= |
234 | 235 | ||
236 | oom_dump_tasks | ||
237 | |||
238 | Enables a system-wide task dump (excluding kernel threads) to be | ||
239 | produced when the kernel performs an OOM-killing and includes such | ||
240 | information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and | ||
241 | name. This is helpful to determine why the OOM killer was invoked | ||
242 | and to identify the rogue task that caused it. | ||
243 | |||
244 | If this is set to zero, this information is suppressed. On very | ||
245 | large systems with thousands of tasks it may not be feasible to dump | ||
246 | the memory state information for each one. Such systems should not | ||
247 | be forced to incur a performance penalty in OOM conditions when the | ||
248 | information may not be desired. | ||
249 | |||
250 | If this is set to non-zero, this information is shown whenever the | ||
251 | OOM killer actually kills a memory-hogging task. | ||
252 | |||
253 | The default value is 0. | ||
254 | |||
255 | ============================================================= | ||
256 | |||
235 | oom_kill_allocating_task | 257 | oom_kill_allocating_task |
236 | 258 | ||
237 | This enables or disables killing the OOM-triggering task in | 259 | This enables or disables killing the OOM-triggering task in |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 86daaa26d120..8c98d8147d88 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -67,6 +67,7 @@ extern int sysctl_overcommit_memory; | |||
67 | extern int sysctl_overcommit_ratio; | 67 | extern int sysctl_overcommit_ratio; |
68 | extern int sysctl_panic_on_oom; | 68 | extern int sysctl_panic_on_oom; |
69 | extern int sysctl_oom_kill_allocating_task; | 69 | extern int sysctl_oom_kill_allocating_task; |
70 | extern int sysctl_oom_dump_tasks; | ||
70 | extern int max_threads; | 71 | extern int max_threads; |
71 | extern int core_uses_pid; | 72 | extern int core_uses_pid; |
72 | extern int suid_dumpable; | 73 | extern int suid_dumpable; |
@@ -871,6 +872,14 @@ static struct ctl_table vm_table[] = { | |||
871 | .proc_handler = &proc_dointvec, | 872 | .proc_handler = &proc_dointvec, |
872 | }, | 873 | }, |
873 | { | 874 | { |
875 | .ctl_name = CTL_UNNUMBERED, | ||
876 | .procname = "oom_dump_tasks", | ||
877 | .data = &sysctl_oom_dump_tasks, | ||
878 | .maxlen = sizeof(sysctl_oom_dump_tasks), | ||
879 | .mode = 0644, | ||
880 | .proc_handler = &proc_dointvec, | ||
881 | }, | ||
882 | { | ||
874 | .ctl_name = VM_OVERCOMMIT_RATIO, | 883 | .ctl_name = VM_OVERCOMMIT_RATIO, |
875 | .procname = "overcommit_ratio", | 884 | .procname = "overcommit_ratio", |
876 | .data = &sysctl_overcommit_ratio, | 885 | .data = &sysctl_overcommit_ratio, |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ef5084dbc793..4194b9db0104 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -29,6 +29,7 @@ | |||
29 | 29 | ||
30 | int sysctl_panic_on_oom; | 30 | int sysctl_panic_on_oom; |
31 | int sysctl_oom_kill_allocating_task; | 31 | int sysctl_oom_kill_allocating_task; |
32 | int sysctl_oom_dump_tasks; | ||
32 | static DEFINE_SPINLOCK(zone_scan_mutex); | 33 | static DEFINE_SPINLOCK(zone_scan_mutex); |
33 | /* #define DEBUG */ | 34 | /* #define DEBUG */ |
34 | 35 | ||
@@ -263,6 +264,41 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, | |||
263 | } | 264 | } |
264 | 265 | ||
265 | /** | 266 | /** |
267 | * Dumps the current memory state of all system tasks, excluding kernel threads. | ||
268 | * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj | ||
269 | * score, and name. | ||
270 | * | ||
271 | * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are | ||
272 | * shown. | ||
273 | * | ||
274 | * Call with tasklist_lock read-locked. | ||
275 | */ | ||
276 | static void dump_tasks(const struct mem_cgroup *mem) | ||
277 | { | ||
278 | struct task_struct *g, *p; | ||
279 | |||
280 | printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " | ||
281 | "name\n"); | ||
282 | do_each_thread(g, p) { | ||
283 | /* | ||
284 | * total_vm and rss sizes do not exist for tasks with a | ||
285 | * detached mm so there's no need to report them. | ||
286 | */ | ||
287 | if (!p->mm) | ||
288 | continue; | ||
289 | if (mem && !task_in_mem_cgroup(p, mem)) | ||
290 | continue; | ||
291 | |||
292 | task_lock(p); | ||
293 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", | ||
294 | p->pid, p->uid, p->tgid, p->mm->total_vm, | ||
295 | get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, | ||
296 | p->comm); | ||
297 | task_unlock(p); | ||
298 | } while_each_thread(g, p); | ||
299 | } | ||
300 | |||
301 | /** | ||
266 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO | 302 | * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO |
267 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO | 303 | * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO |
268 | * set. | 304 | * set. |
@@ -339,7 +375,8 @@ static int oom_kill_task(struct task_struct *p) | |||
339 | } | 375 | } |
340 | 376 | ||
341 | static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | 377 | static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, |
342 | unsigned long points, const char *message) | 378 | unsigned long points, struct mem_cgroup *mem, |
379 | const char *message) | ||
343 | { | 380 | { |
344 | struct task_struct *c; | 381 | struct task_struct *c; |
345 | 382 | ||
@@ -349,6 +386,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
349 | current->comm, gfp_mask, order, current->oomkilladj); | 386 | current->comm, gfp_mask, order, current->oomkilladj); |
350 | dump_stack(); | 387 | dump_stack(); |
351 | show_mem(); | 388 | show_mem(); |
389 | if (sysctl_oom_dump_tasks) | ||
390 | dump_tasks(mem); | ||
352 | } | 391 | } |
353 | 392 | ||
354 | /* | 393 | /* |
@@ -389,7 +428,7 @@ retry: | |||
389 | if (!p) | 428 | if (!p) |
390 | p = current; | 429 | p = current; |
391 | 430 | ||
392 | if (oom_kill_process(p, gfp_mask, 0, points, | 431 | if (oom_kill_process(p, gfp_mask, 0, points, mem, |
393 | "Memory cgroup out of memory")) | 432 | "Memory cgroup out of memory")) |
394 | goto retry; | 433 | goto retry; |
395 | out: | 434 | out: |
@@ -495,7 +534,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
495 | 534 | ||
496 | switch (constraint) { | 535 | switch (constraint) { |
497 | case CONSTRAINT_MEMORY_POLICY: | 536 | case CONSTRAINT_MEMORY_POLICY: |
498 | oom_kill_process(current, gfp_mask, order, points, | 537 | oom_kill_process(current, gfp_mask, order, points, NULL, |
499 | "No available memory (MPOL_BIND)"); | 538 | "No available memory (MPOL_BIND)"); |
500 | break; | 539 | break; |
501 | 540 | ||
@@ -505,7 +544,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
505 | /* Fall-through */ | 544 | /* Fall-through */ |
506 | case CONSTRAINT_CPUSET: | 545 | case CONSTRAINT_CPUSET: |
507 | if (sysctl_oom_kill_allocating_task) { | 546 | if (sysctl_oom_kill_allocating_task) { |
508 | oom_kill_process(current, gfp_mask, order, points, | 547 | oom_kill_process(current, gfp_mask, order, points, NULL, |
509 | "Out of memory (oom_kill_allocating_task)"); | 548 | "Out of memory (oom_kill_allocating_task)"); |
510 | break; | 549 | break; |
511 | } | 550 | } |
@@ -525,7 +564,7 @@ retry: | |||
525 | panic("Out of memory and no killable processes...\n"); | 564 | panic("Out of memory and no killable processes...\n"); |
526 | } | 565 | } |
527 | 566 | ||
528 | if (oom_kill_process(p, gfp_mask, order, points, | 567 | if (oom_kill_process(p, gfp_mask, order, points, NULL, |
529 | "Out of memory")) | 568 | "Out of memory")) |
530 | goto retry; | 569 | goto retry; |
531 | 570 | ||