aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2012-07-31 19:43:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:44 -0400
commit6b0c81b3be114a93f79bd4c5639ade5107d77c21 (patch)
tree4298dd463fcd7bbb629f37c309daf382c57017ce /mm
parent9cbb78bb314360a860a8b23723971cb6fcb54176 (diff)
mm, oom: reduce dependency on tasklist_lock
Since exiting tasks require write_lock_irq(&tasklist_lock) several times, try to reduce the amount of time the readside is held for oom kills. This makes the interface with the memcg oom handler more consistent since it now never needs to take tasklist_lock unnecessarily. The only time the oom killer now takes tasklist_lock is when iterating the children of the selected task, everything else is protected by rcu_read_lock(). This requires that a reference to the selected process, p, is grabbed before calling oom_kill_process(). It may release it and grab a reference on another one of p's threads if !p->mm, but it also guarantees that it will release the reference before returning. [hughd@google.com: fix duplicate put_task_struct()] Signed-off-by: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: Oleg Nesterov <oleg@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/oom_kill.c41
2 files changed, 30 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b78972e2f43f..77a29cea5d76 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1521,11 +1521,8 @@ void __mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1521 if (!chosen) 1521 if (!chosen)
1522 return; 1522 return;
1523 points = chosen_points * 1000 / totalpages; 1523 points = chosen_points * 1000 / totalpages;
1524 read_lock(&tasklist_lock);
1525 oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, 1524 oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
1526 NULL, "Memory cgroup out of memory"); 1525 NULL, "Memory cgroup out of memory");
1527 read_unlock(&tasklist_lock);
1528 put_task_struct(chosen);
1529} 1526}
1530 1527
1531static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, 1528static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c0c97aea837f..a3a32ae02e9d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -336,7 +336,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
336 336
337/* 337/*
338 * Simple selection loop. We chose the process with the highest 338 * Simple selection loop. We chose the process with the highest
339 * number of 'points'. We expect the caller will lock the tasklist. 339 * number of 'points'.
340 * 340 *
341 * (not docbooked, we don't want this one cluttering up the manual) 341 * (not docbooked, we don't want this one cluttering up the manual)
342 */ 342 */
@@ -348,6 +348,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
348 struct task_struct *chosen = NULL; 348 struct task_struct *chosen = NULL;
349 unsigned long chosen_points = 0; 349 unsigned long chosen_points = 0;
350 350
351 rcu_read_lock();
351 do_each_thread(g, p) { 352 do_each_thread(g, p) {
352 unsigned int points; 353 unsigned int points;
353 354
@@ -360,6 +361,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
360 case OOM_SCAN_CONTINUE: 361 case OOM_SCAN_CONTINUE:
361 continue; 362 continue;
362 case OOM_SCAN_ABORT: 363 case OOM_SCAN_ABORT:
364 rcu_read_unlock();
363 return ERR_PTR(-1UL); 365 return ERR_PTR(-1UL);
364 case OOM_SCAN_OK: 366 case OOM_SCAN_OK:
365 break; 367 break;
@@ -370,6 +372,9 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
370 chosen_points = points; 372 chosen_points = points;
371 } 373 }
372 } while_each_thread(g, p); 374 } while_each_thread(g, p);
375 if (chosen)
376 get_task_struct(chosen);
377 rcu_read_unlock();
373 378
374 *ppoints = chosen_points * 1000 / totalpages; 379 *ppoints = chosen_points * 1000 / totalpages;
375 return chosen; 380 return chosen;
@@ -385,8 +390,6 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
385 * are not shown. 390 * are not shown.
386 * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, 391 * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
387 * swapents, oom_score_adj value, and name. 392 * swapents, oom_score_adj value, and name.
388 *
389 * Call with tasklist_lock read-locked.
390 */ 393 */
391static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) 394static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
392{ 395{
@@ -394,6 +397,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas
394 struct task_struct *task; 397 struct task_struct *task;
395 398
396 pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); 399 pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n");
400 rcu_read_lock();
397 for_each_process(p) { 401 for_each_process(p) {
398 if (oom_unkillable_task(p, memcg, nodemask)) 402 if (oom_unkillable_task(p, memcg, nodemask))
399 continue; 403 continue;
@@ -416,6 +420,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas
416 task->signal->oom_score_adj, task->comm); 420 task->signal->oom_score_adj, task->comm);
417 task_unlock(task); 421 task_unlock(task);
418 } 422 }
423 rcu_read_unlock();
419} 424}
420 425
421static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 426static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
@@ -436,6 +441,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
436} 441}
437 442
438#define K(x) ((x) << (PAGE_SHIFT-10)) 443#define K(x) ((x) << (PAGE_SHIFT-10))
444/*
445 * Must be called while holding a reference to p, which will be released upon
446 * returning.
447 */
439void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, 448void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
440 unsigned int points, unsigned long totalpages, 449 unsigned int points, unsigned long totalpages,
441 struct mem_cgroup *memcg, nodemask_t *nodemask, 450 struct mem_cgroup *memcg, nodemask_t *nodemask,
@@ -455,6 +464,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
455 */ 464 */
456 if (p->flags & PF_EXITING) { 465 if (p->flags & PF_EXITING) {
457 set_tsk_thread_flag(p, TIF_MEMDIE); 466 set_tsk_thread_flag(p, TIF_MEMDIE);
467 put_task_struct(p);
458 return; 468 return;
459 } 469 }
460 470
@@ -472,6 +482,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
472 * parent. This attempts to lose the minimal amount of work done while 482 * parent. This attempts to lose the minimal amount of work done while
473 * still freeing memory. 483 * still freeing memory.
474 */ 484 */
485 read_lock(&tasklist_lock);
475 do { 486 do {
476 list_for_each_entry(child, &t->children, sibling) { 487 list_for_each_entry(child, &t->children, sibling) {
477 unsigned int child_points; 488 unsigned int child_points;
@@ -484,15 +495,26 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
484 child_points = oom_badness(child, memcg, nodemask, 495 child_points = oom_badness(child, memcg, nodemask,
485 totalpages); 496 totalpages);
486 if (child_points > victim_points) { 497 if (child_points > victim_points) {
498 put_task_struct(victim);
487 victim = child; 499 victim = child;
488 victim_points = child_points; 500 victim_points = child_points;
501 get_task_struct(victim);
489 } 502 }
490 } 503 }
491 } while_each_thread(p, t); 504 } while_each_thread(p, t);
505 read_unlock(&tasklist_lock);
492 506
493 victim = find_lock_task_mm(victim); 507 rcu_read_lock();
494 if (!victim) 508 p = find_lock_task_mm(victim);
509 if (!p) {
510 rcu_read_unlock();
511 put_task_struct(victim);
495 return; 512 return;
513 } else if (victim != p) {
514 get_task_struct(p);
515 put_task_struct(victim);
516 victim = p;
517 }
496 518
497 /* mm cannot safely be dereferenced after task_unlock(victim) */ 519 /* mm cannot safely be dereferenced after task_unlock(victim) */
498 mm = victim->mm; 520 mm = victim->mm;
@@ -523,9 +545,11 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
523 task_unlock(p); 545 task_unlock(p);
524 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); 546 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
525 } 547 }
548 rcu_read_unlock();
526 549
527 set_tsk_thread_flag(victim, TIF_MEMDIE); 550 set_tsk_thread_flag(victim, TIF_MEMDIE);
528 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); 551 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
552 put_task_struct(victim);
529} 553}
530#undef K 554#undef K
531 555
@@ -546,9 +570,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
546 if (constraint != CONSTRAINT_NONE) 570 if (constraint != CONSTRAINT_NONE)
547 return; 571 return;
548 } 572 }
549 read_lock(&tasklist_lock);
550 dump_header(NULL, gfp_mask, order, NULL, nodemask); 573 dump_header(NULL, gfp_mask, order, NULL, nodemask);
551 read_unlock(&tasklist_lock);
552 panic("Out of memory: %s panic_on_oom is enabled\n", 574 panic("Out of memory: %s panic_on_oom is enabled\n",
553 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); 575 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
554} 576}
@@ -721,10 +743,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
721 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; 743 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
722 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); 744 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask);
723 745
724 read_lock(&tasklist_lock);
725 if (sysctl_oom_kill_allocating_task && current->mm && 746 if (sysctl_oom_kill_allocating_task && current->mm &&
726 !oom_unkillable_task(current, NULL, nodemask) && 747 !oom_unkillable_task(current, NULL, nodemask) &&
727 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { 748 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
749 get_task_struct(current);
728 oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, 750 oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
729 nodemask, 751 nodemask,
730 "Out of memory (oom_kill_allocating_task)"); 752 "Out of memory (oom_kill_allocating_task)");
@@ -735,7 +757,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
735 /* Found nothing?!?! Either we hang forever, or we panic. */ 757 /* Found nothing?!?! Either we hang forever, or we panic. */
736 if (!p) { 758 if (!p) {
737 dump_header(NULL, gfp_mask, order, NULL, mpol_mask); 759 dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
738 read_unlock(&tasklist_lock);
739 panic("Out of memory and no killable processes...\n"); 760 panic("Out of memory and no killable processes...\n");
740 } 761 }
741 if (PTR_ERR(p) != -1UL) { 762 if (PTR_ERR(p) != -1UL) {
@@ -744,8 +765,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
744 killed = 1; 765 killed = 1;
745 } 766 }
746out: 767out:
747 read_unlock(&tasklist_lock);
748
749 /* 768 /*
750 * Give the killed threads a good chance of exiting before trying to 769 * Give the killed threads a good chance of exiting before trying to
751 * allocate memory again. 770 * allocate memory again.