aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-09-16 11:51:12 -0400
committerTejun Heo <tj@kernel.org>2015-09-16 11:51:12 -0400
commitf9f9e7b776142fb1c0782cade004cc8e0147a199 (patch)
tree53719cfc0bf81bc7e6fb522944553d9b4fa36cbf
parent6ff33f3902c3b1c5d0db6b1e2c70b6d76fba357f (diff)
Revert "cgroup: simplify threadgroup locking"
This reverts commit b5ba75b5fc0e8404e2c50cb68f39bb6a53fc916f. d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo <tj@kernel.org> Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: stable@vger.kernel.org # v4.2+
-rw-r--r--kernel/cgroup.c45
1 files changed, 33 insertions, 12 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cf0f79f1fc9..115091efa889 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2460 if (!cgrp) 2460 if (!cgrp)
2461 return -ENODEV; 2461 return -ENODEV;
2462 2462
2463 percpu_down_write(&cgroup_threadgroup_rwsem); 2463retry_find_task:
2464 rcu_read_lock(); 2464 rcu_read_lock();
2465 if (pid) { 2465 if (pid) {
2466 tsk = find_task_by_vpid(pid); 2466 tsk = find_task_by_vpid(pid);
2467 if (!tsk) { 2467 if (!tsk) {
2468 rcu_read_unlock();
2468 ret = -ESRCH; 2469 ret = -ESRCH;
2469 goto out_unlock_rcu; 2470 goto out_unlock_cgroup;
2470 } 2471 }
2471 } else { 2472 } else {
2472 tsk = current; 2473 tsk = current;
@@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2482 */ 2483 */
2483 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { 2484 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2484 ret = -EINVAL; 2485 ret = -EINVAL;
2485 goto out_unlock_rcu; 2486 rcu_read_unlock();
2487 goto out_unlock_cgroup;
2486 } 2488 }
2487 2489
2488 get_task_struct(tsk); 2490 get_task_struct(tsk);
2489 rcu_read_unlock(); 2491 rcu_read_unlock();
2490 2492
2493 percpu_down_write(&cgroup_threadgroup_rwsem);
2494 if (threadgroup) {
2495 if (!thread_group_leader(tsk)) {
2496 /*
2497 * a race with de_thread from another thread's exec()
2498 * may strip us of our leadership, if this happens,
2499 * there is no choice but to throw this task away and
2500 * try again; this is
2501 * "double-double-toil-and-trouble-check locking".
2502 */
2503 percpu_up_write(&cgroup_threadgroup_rwsem);
2504 put_task_struct(tsk);
2505 goto retry_find_task;
2506 }
2507 }
2508
2491 ret = cgroup_procs_write_permission(tsk, cgrp, of); 2509 ret = cgroup_procs_write_permission(tsk, cgrp, of);
2492 if (!ret) 2510 if (!ret)
2493 ret = cgroup_attach_task(cgrp, tsk, threadgroup); 2511 ret = cgroup_attach_task(cgrp, tsk, threadgroup);
2494 2512
2495 put_task_struct(tsk);
2496 goto out_unlock_threadgroup;
2497
2498out_unlock_rcu:
2499 rcu_read_unlock();
2500out_unlock_threadgroup:
2501 percpu_up_write(&cgroup_threadgroup_rwsem); 2513 percpu_up_write(&cgroup_threadgroup_rwsem);
2514
2515 put_task_struct(tsk);
2516out_unlock_cgroup:
2502 cgroup_kn_unlock(of->kn); 2517 cgroup_kn_unlock(of->kn);
2503 return ret ?: nbytes; 2518 return ret ?: nbytes;
2504} 2519}
@@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2643 2658
2644 lockdep_assert_held(&cgroup_mutex); 2659 lockdep_assert_held(&cgroup_mutex);
2645 2660
2646 percpu_down_write(&cgroup_threadgroup_rwsem);
2647
2648 /* look up all csses currently attached to @cgrp's subtree */ 2661 /* look up all csses currently attached to @cgrp's subtree */
2649 down_read(&css_set_rwsem); 2662 down_read(&css_set_rwsem);
2650 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { 2663 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2700 goto out_finish; 2713 goto out_finish;
2701 last_task = task; 2714 last_task = task;
2702 2715
2716 percpu_down_write(&cgroup_threadgroup_rwsem);
2717 /* raced against de_thread() from another thread? */
2718 if (!thread_group_leader(task)) {
2719 percpu_up_write(&cgroup_threadgroup_rwsem);
2720 put_task_struct(task);
2721 continue;
2722 }
2723
2703 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); 2724 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
2704 2725
2726 percpu_up_write(&cgroup_threadgroup_rwsem);
2705 put_task_struct(task); 2727 put_task_struct(task);
2706 2728
2707 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) 2729 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2711 2733
2712out_finish: 2734out_finish:
2713 cgroup_migrate_finish(&preloaded_csets); 2735 cgroup_migrate_finish(&preloaded_csets);
2714 percpu_up_write(&cgroup_threadgroup_rwsem);
2715 return ret; 2736 return ret;
2716} 2737}
2717 2738