diff options
author | David Rientjes <rientjes@google.com> | 2012-07-31 19:43:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:44 -0400 |
commit | 6b0c81b3be114a93f79bd4c5639ade5107d77c21 (patch) | |
tree | 4298dd463fcd7bbb629f37c309daf382c57017ce /mm | |
parent | 9cbb78bb314360a860a8b23723971cb6fcb54176 (diff) |
mm, oom: reduce dependency on tasklist_lock
Since exiting tasks require write_lock_irq(&tasklist_lock) several times,
try to reduce the amount of time the readside is held for oom kills. This
makes the interface with the memcg oom handler more consistent since it
now never needs to take tasklist_lock unnecessarily.
The only time the oom killer now takes tasklist_lock is when iterating the
children of the selected task, everything else is protected by
rcu_read_lock().
This requires that a reference to the selected process, p, is grabbed
before calling oom_kill_process(). It may release it and grab a reference
on another one of p's threads if !p->mm, but it also guarantees that it
will release the reference before returning.
[hughd@google.com: fix duplicate put_task_struct()]
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 3 | ||||
-rw-r--r-- | mm/oom_kill.c | 41 |
2 files changed, 30 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b78972e2f43f..77a29cea5d76 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1521,11 +1521,8 @@ void __mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1521 | if (!chosen) | 1521 | if (!chosen) |
1522 | return; | 1522 | return; |
1523 | points = chosen_points * 1000 / totalpages; | 1523 | points = chosen_points * 1000 / totalpages; |
1524 | read_lock(&tasklist_lock); | ||
1525 | oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, | 1524 | oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, |
1526 | NULL, "Memory cgroup out of memory"); | 1525 | NULL, "Memory cgroup out of memory"); |
1527 | read_unlock(&tasklist_lock); | ||
1528 | put_task_struct(chosen); | ||
1529 | } | 1526 | } |
1530 | 1527 | ||
1531 | static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | 1528 | static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c0c97aea837f..a3a32ae02e9d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -336,7 +336,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, | |||
336 | 336 | ||
337 | /* | 337 | /* |
338 | * Simple selection loop. We chose the process with the highest | 338 | * Simple selection loop. We chose the process with the highest |
339 | * number of 'points'. We expect the caller will lock the tasklist. | 339 | * number of 'points'. |
340 | * | 340 | * |
341 | * (not docbooked, we don't want this one cluttering up the manual) | 341 | * (not docbooked, we don't want this one cluttering up the manual) |
342 | */ | 342 | */ |
@@ -348,6 +348,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
348 | struct task_struct *chosen = NULL; | 348 | struct task_struct *chosen = NULL; |
349 | unsigned long chosen_points = 0; | 349 | unsigned long chosen_points = 0; |
350 | 350 | ||
351 | rcu_read_lock(); | ||
351 | do_each_thread(g, p) { | 352 | do_each_thread(g, p) { |
352 | unsigned int points; | 353 | unsigned int points; |
353 | 354 | ||
@@ -360,6 +361,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
360 | case OOM_SCAN_CONTINUE: | 361 | case OOM_SCAN_CONTINUE: |
361 | continue; | 362 | continue; |
362 | case OOM_SCAN_ABORT: | 363 | case OOM_SCAN_ABORT: |
364 | rcu_read_unlock(); | ||
363 | return ERR_PTR(-1UL); | 365 | return ERR_PTR(-1UL); |
364 | case OOM_SCAN_OK: | 366 | case OOM_SCAN_OK: |
365 | break; | 367 | break; |
@@ -370,6 +372,9 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
370 | chosen_points = points; | 372 | chosen_points = points; |
371 | } | 373 | } |
372 | } while_each_thread(g, p); | 374 | } while_each_thread(g, p); |
375 | if (chosen) | ||
376 | get_task_struct(chosen); | ||
377 | rcu_read_unlock(); | ||
373 | 378 | ||
374 | *ppoints = chosen_points * 1000 / totalpages; | 379 | *ppoints = chosen_points * 1000 / totalpages; |
375 | return chosen; | 380 | return chosen; |
@@ -385,8 +390,6 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
385 | * are not shown. | 390 | * are not shown. |
386 | * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, | 391 | * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, |
387 | * swapents, oom_score_adj value, and name. | 392 | * swapents, oom_score_adj value, and name. |
388 | * | ||
389 | * Call with tasklist_lock read-locked. | ||
390 | */ | 393 | */ |
391 | static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) | 394 | static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) |
392 | { | 395 | { |
@@ -394,6 +397,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas | |||
394 | struct task_struct *task; | 397 | struct task_struct *task; |
395 | 398 | ||
396 | pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); | 399 | pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); |
400 | rcu_read_lock(); | ||
397 | for_each_process(p) { | 401 | for_each_process(p) { |
398 | if (oom_unkillable_task(p, memcg, nodemask)) | 402 | if (oom_unkillable_task(p, memcg, nodemask)) |
399 | continue; | 403 | continue; |
@@ -416,6 +420,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas | |||
416 | task->signal->oom_score_adj, task->comm); | 420 | task->signal->oom_score_adj, task->comm); |
417 | task_unlock(task); | 421 | task_unlock(task); |
418 | } | 422 | } |
423 | rcu_read_unlock(); | ||
419 | } | 424 | } |
420 | 425 | ||
421 | static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | 426 | static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, |
@@ -436,6 +441,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
436 | } | 441 | } |
437 | 442 | ||
438 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 443 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
444 | /* | ||
445 | * Must be called while holding a reference to p, which will be released upon | ||
446 | * returning. | ||
447 | */ | ||
439 | void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | 448 | void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, |
440 | unsigned int points, unsigned long totalpages, | 449 | unsigned int points, unsigned long totalpages, |
441 | struct mem_cgroup *memcg, nodemask_t *nodemask, | 450 | struct mem_cgroup *memcg, nodemask_t *nodemask, |
@@ -455,6 +464,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
455 | */ | 464 | */ |
456 | if (p->flags & PF_EXITING) { | 465 | if (p->flags & PF_EXITING) { |
457 | set_tsk_thread_flag(p, TIF_MEMDIE); | 466 | set_tsk_thread_flag(p, TIF_MEMDIE); |
467 | put_task_struct(p); | ||
458 | return; | 468 | return; |
459 | } | 469 | } |
460 | 470 | ||
@@ -472,6 +482,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
472 | * parent. This attempts to lose the minimal amount of work done while | 482 | * parent. This attempts to lose the minimal amount of work done while |
473 | * still freeing memory. | 483 | * still freeing memory. |
474 | */ | 484 | */ |
485 | read_lock(&tasklist_lock); | ||
475 | do { | 486 | do { |
476 | list_for_each_entry(child, &t->children, sibling) { | 487 | list_for_each_entry(child, &t->children, sibling) { |
477 | unsigned int child_points; | 488 | unsigned int child_points; |
@@ -484,15 +495,26 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
484 | child_points = oom_badness(child, memcg, nodemask, | 495 | child_points = oom_badness(child, memcg, nodemask, |
485 | totalpages); | 496 | totalpages); |
486 | if (child_points > victim_points) { | 497 | if (child_points > victim_points) { |
498 | put_task_struct(victim); | ||
487 | victim = child; | 499 | victim = child; |
488 | victim_points = child_points; | 500 | victim_points = child_points; |
501 | get_task_struct(victim); | ||
489 | } | 502 | } |
490 | } | 503 | } |
491 | } while_each_thread(p, t); | 504 | } while_each_thread(p, t); |
505 | read_unlock(&tasklist_lock); | ||
492 | 506 | ||
493 | victim = find_lock_task_mm(victim); | 507 | rcu_read_lock(); |
494 | if (!victim) | 508 | p = find_lock_task_mm(victim); |
509 | if (!p) { | ||
510 | rcu_read_unlock(); | ||
511 | put_task_struct(victim); | ||
495 | return; | 512 | return; |
513 | } else if (victim != p) { | ||
514 | get_task_struct(p); | ||
515 | put_task_struct(victim); | ||
516 | victim = p; | ||
517 | } | ||
496 | 518 | ||
497 | /* mm cannot safely be dereferenced after task_unlock(victim) */ | 519 | /* mm cannot safely be dereferenced after task_unlock(victim) */ |
498 | mm = victim->mm; | 520 | mm = victim->mm; |
@@ -523,9 +545,11 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
523 | task_unlock(p); | 545 | task_unlock(p); |
524 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); | 546 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); |
525 | } | 547 | } |
548 | rcu_read_unlock(); | ||
526 | 549 | ||
527 | set_tsk_thread_flag(victim, TIF_MEMDIE); | 550 | set_tsk_thread_flag(victim, TIF_MEMDIE); |
528 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); | 551 | do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); |
552 | put_task_struct(victim); | ||
529 | } | 553 | } |
530 | #undef K | 554 | #undef K |
531 | 555 | ||
@@ -546,9 +570,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, | |||
546 | if (constraint != CONSTRAINT_NONE) | 570 | if (constraint != CONSTRAINT_NONE) |
547 | return; | 571 | return; |
548 | } | 572 | } |
549 | read_lock(&tasklist_lock); | ||
550 | dump_header(NULL, gfp_mask, order, NULL, nodemask); | 573 | dump_header(NULL, gfp_mask, order, NULL, nodemask); |
551 | read_unlock(&tasklist_lock); | ||
552 | panic("Out of memory: %s panic_on_oom is enabled\n", | 574 | panic("Out of memory: %s panic_on_oom is enabled\n", |
553 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); | 575 | sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); |
554 | } | 576 | } |
@@ -721,10 +743,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
721 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; | 743 | mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; |
722 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); | 744 | check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); |
723 | 745 | ||
724 | read_lock(&tasklist_lock); | ||
725 | if (sysctl_oom_kill_allocating_task && current->mm && | 746 | if (sysctl_oom_kill_allocating_task && current->mm && |
726 | !oom_unkillable_task(current, NULL, nodemask) && | 747 | !oom_unkillable_task(current, NULL, nodemask) && |
727 | current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { | 748 | current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { |
749 | get_task_struct(current); | ||
728 | oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, | 750 | oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, |
729 | nodemask, | 751 | nodemask, |
730 | "Out of memory (oom_kill_allocating_task)"); | 752 | "Out of memory (oom_kill_allocating_task)"); |
@@ -735,7 +757,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
735 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 757 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
736 | if (!p) { | 758 | if (!p) { |
737 | dump_header(NULL, gfp_mask, order, NULL, mpol_mask); | 759 | dump_header(NULL, gfp_mask, order, NULL, mpol_mask); |
738 | read_unlock(&tasklist_lock); | ||
739 | panic("Out of memory and no killable processes...\n"); | 760 | panic("Out of memory and no killable processes...\n"); |
740 | } | 761 | } |
741 | if (PTR_ERR(p) != -1UL) { | 762 | if (PTR_ERR(p) != -1UL) { |
@@ -744,8 +765,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
744 | killed = 1; | 765 | killed = 1; |
745 | } | 766 | } |
746 | out: | 767 | out: |
747 | read_unlock(&tasklist_lock); | ||
748 | |||
749 | /* | 768 | /* |
750 | * Give the killed threads a good chance of exiting before trying to | 769 | * Give the killed threads a good chance of exiting before trying to |
751 | * allocate memory again. | 770 | * allocate memory again. |