diff options
| author | Shakeel Butt <shakeelb@google.com> | 2019-07-12 00:00:23 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 14:05:47 -0400 |
| commit | 5eee7e1cdb97123bb55ac14ccd3af8b6edc31537 (patch) | |
| tree | 599719a7a9b06d61532fe62b8c4dab76165ba624 | |
| parent | f168a9a54ec39b3f832c353733898b713b6b5c1f (diff) | |
mm, oom: refactor dump_tasks for memcg OOMs
dump_tasks() traverses all the existing processes even for the memcg OOM
context which is not only unnecessary but also wasteful. This imposes a
long RCU critical section even from a contained context which can be quite
disruptive.
Change dump_tasks() to be aligned with select_bad_process and use
mem_cgroup_scan_tasks to selectively traverse only processes of the target
memcg hierarchy during memcg OOM.
Link: http://lkml.kernel.org/r/20190617231207.160865-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/oom_kill.c | 68 |
1 files changed, 40 insertions, 28 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 606e5e4c6a3e..59326614508a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
| @@ -382,10 +382,38 @@ static void select_bad_process(struct oom_control *oc) | |||
| 382 | oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; | 382 | oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; |
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | static int dump_task(struct task_struct *p, void *arg) | ||
| 386 | { | ||
| 387 | struct oom_control *oc = arg; | ||
| 388 | struct task_struct *task; | ||
| 389 | |||
| 390 | if (oom_unkillable_task(p, NULL, oc->nodemask)) | ||
| 391 | return 0; | ||
| 392 | |||
| 393 | task = find_lock_task_mm(p); | ||
| 394 | if (!task) { | ||
| 395 | /* | ||
| 396 | * This is a kthread or all of p's threads have already | ||
| 397 | * detached their mm's. There's no need to report | ||
| 398 | * them; they can't be oom killed anyway. | ||
| 399 | */ | ||
| 400 | return 0; | ||
| 401 | } | ||
| 402 | |||
| 403 | pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", | ||
| 404 | task->pid, from_kuid(&init_user_ns, task_uid(task)), | ||
| 405 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), | ||
| 406 | mm_pgtables_bytes(task->mm), | ||
| 407 | get_mm_counter(task->mm, MM_SWAPENTS), | ||
| 408 | task->signal->oom_score_adj, task->comm); | ||
| 409 | task_unlock(task); | ||
| 410 | |||
| 411 | return 0; | ||
| 412 | } | ||
| 413 | |||
| 385 | /** | 414 | /** |
| 386 | * dump_tasks - dump current memory state of all system tasks | 415 | * dump_tasks - dump current memory state of all system tasks |
| 387 | * @memcg: current's memory controller, if constrained | 416 | * @oc: pointer to struct oom_control |
| 388 | * @nodemask: nodemask passed to page allocator for mempolicy ooms | ||
| 389 | * | 417 | * |
| 390 | * Dumps the current memory state of all eligible tasks. Tasks not in the same | 418 | * Dumps the current memory state of all eligible tasks. Tasks not in the same |
| 391 | * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes | 419 | * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes |
| @@ -393,37 +421,21 @@ static void select_bad_process(struct oom_control *oc) | |||
| 393 | * State information includes task's pid, uid, tgid, vm size, rss, | 421 | * State information includes task's pid, uid, tgid, vm size, rss, |
| 394 | * pgtables_bytes, swapents, oom_score_adj value, and name. | 422 | * pgtables_bytes, swapents, oom_score_adj value, and name. |
| 395 | */ | 423 | */ |
| 396 | static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) | 424 | static void dump_tasks(struct oom_control *oc) |
| 397 | { | 425 | { |
| 398 | struct task_struct *p; | ||
| 399 | struct task_struct *task; | ||
| 400 | |||
| 401 | pr_info("Tasks state (memory values in pages):\n"); | 426 | pr_info("Tasks state (memory values in pages):\n"); |
| 402 | pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); | 427 | pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); |
| 403 | rcu_read_lock(); | ||
| 404 | for_each_process(p) { | ||
| 405 | if (oom_unkillable_task(p, memcg, nodemask)) | ||
| 406 | continue; | ||
| 407 | 428 | ||
| 408 | task = find_lock_task_mm(p); | 429 | if (is_memcg_oom(oc)) |
| 409 | if (!task) { | 430 | mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); |
| 410 | /* | 431 | else { |
| 411 | * This is a kthread or all of p's threads have already | 432 | struct task_struct *p; |
| 412 | * detached their mm's. There's no need to report | ||
| 413 | * them; they can't be oom killed anyway. | ||
| 414 | */ | ||
| 415 | continue; | ||
| 416 | } | ||
| 417 | 433 | ||
| 418 | pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", | 434 | rcu_read_lock(); |
| 419 | task->pid, from_kuid(&init_user_ns, task_uid(task)), | 435 | for_each_process(p) |
| 420 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), | 436 | dump_task(p, oc); |
| 421 | mm_pgtables_bytes(task->mm), | 437 | rcu_read_unlock(); |
| 422 | get_mm_counter(task->mm, MM_SWAPENTS), | ||
| 423 | task->signal->oom_score_adj, task->comm); | ||
| 424 | task_unlock(task); | ||
| 425 | } | 438 | } |
| 426 | rcu_read_unlock(); | ||
| 427 | } | 439 | } |
| 428 | 440 | ||
| 429 | static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim) | 441 | static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim) |
| @@ -455,7 +467,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) | |||
| 455 | dump_unreclaimable_slab(); | 467 | dump_unreclaimable_slab(); |
| 456 | } | 468 | } |
| 457 | if (sysctl_oom_dump_tasks) | 469 | if (sysctl_oom_dump_tasks) |
| 458 | dump_tasks(oc->memcg, oc->nodemask); | 470 | dump_tasks(oc); |
| 459 | if (p) | 471 | if (p) |
| 460 | dump_oom_summary(oc, p); | 472 | dump_oom_summary(oc, p); |
| 461 | } | 473 | } |
