diff options
author | Tejun Heo <tj@kernel.org> | 2015-09-16 11:51:12 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2015-09-16 11:51:12 -0400 |
commit | f9f9e7b776142fb1c0782cade004cc8e0147a199 (patch) | |
tree | 53719cfc0bf81bc7e6fb522944553d9b4fa36cbf | |
parent | 6ff33f3902c3b1c5d0db6b1e2c70b6d76fba357f (diff) |
Revert "cgroup: simplify threadgroup locking"
This reverts commit b5ba75b5fc0e8404e2c50cb68f39bb6a53fc916f.
d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with
a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify
threadgroup locking") changed how cgroup synchronizes against task
fork and exits so that it uses global percpu_rwsem instead of
per-process rwsem; unfortunately, the write [un]lock paths of
percpu_rwsem always involve synchronize_rcu_expedited() which turned
out to be too expensive.
Improvements for percpu_rwsem are scheduled to be merged in the coming
v4.4-rc1 merge window which alleviates this issue. For now, revert
the two commits to restore per-process rwsem. They will be re-applied
for the v4.4-rc1 merge window.
Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org # v4.2+
-rw-r--r-- | kernel/cgroup.c | 45 |
1 files changed, 33 insertions, 12 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cf0f79f1fc9..115091efa889 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2460,13 +2460,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2460 | if (!cgrp) | 2460 | if (!cgrp) |
2461 | return -ENODEV; | 2461 | return -ENODEV; |
2462 | 2462 | ||
2463 | percpu_down_write(&cgroup_threadgroup_rwsem); | 2463 | retry_find_task: |
2464 | rcu_read_lock(); | 2464 | rcu_read_lock(); |
2465 | if (pid) { | 2465 | if (pid) { |
2466 | tsk = find_task_by_vpid(pid); | 2466 | tsk = find_task_by_vpid(pid); |
2467 | if (!tsk) { | 2467 | if (!tsk) { |
2468 | rcu_read_unlock(); | ||
2468 | ret = -ESRCH; | 2469 | ret = -ESRCH; |
2469 | goto out_unlock_rcu; | 2470 | goto out_unlock_cgroup; |
2470 | } | 2471 | } |
2471 | } else { | 2472 | } else { |
2472 | tsk = current; | 2473 | tsk = current; |
@@ -2482,23 +2483,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2482 | */ | 2483 | */ |
2483 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { | 2484 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { |
2484 | ret = -EINVAL; | 2485 | ret = -EINVAL; |
2485 | goto out_unlock_rcu; | 2486 | rcu_read_unlock(); |
2487 | goto out_unlock_cgroup; | ||
2486 | } | 2488 | } |
2487 | 2489 | ||
2488 | get_task_struct(tsk); | 2490 | get_task_struct(tsk); |
2489 | rcu_read_unlock(); | 2491 | rcu_read_unlock(); |
2490 | 2492 | ||
2493 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2494 | if (threadgroup) { | ||
2495 | if (!thread_group_leader(tsk)) { | ||
2496 | /* | ||
2497 | * a race with de_thread from another thread's exec() | ||
2498 | * may strip us of our leadership, if this happens, | ||
2499 | * there is no choice but to throw this task away and | ||
2500 | * try again; this is | ||
2501 | * "double-double-toil-and-trouble-check locking". | ||
2502 | */ | ||
2503 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2504 | put_task_struct(tsk); | ||
2505 | goto retry_find_task; | ||
2506 | } | ||
2507 | } | ||
2508 | |||
2491 | ret = cgroup_procs_write_permission(tsk, cgrp, of); | 2509 | ret = cgroup_procs_write_permission(tsk, cgrp, of); |
2492 | if (!ret) | 2510 | if (!ret) |
2493 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2511 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
2494 | 2512 | ||
2495 | put_task_struct(tsk); | ||
2496 | goto out_unlock_threadgroup; | ||
2497 | |||
2498 | out_unlock_rcu: | ||
2499 | rcu_read_unlock(); | ||
2500 | out_unlock_threadgroup: | ||
2501 | percpu_up_write(&cgroup_threadgroup_rwsem); | 2513 | percpu_up_write(&cgroup_threadgroup_rwsem); |
2514 | |||
2515 | put_task_struct(tsk); | ||
2516 | out_unlock_cgroup: | ||
2502 | cgroup_kn_unlock(of->kn); | 2517 | cgroup_kn_unlock(of->kn); |
2503 | return ret ?: nbytes; | 2518 | return ret ?: nbytes; |
2504 | } | 2519 | } |
@@ -2643,8 +2658,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2643 | 2658 | ||
2644 | lockdep_assert_held(&cgroup_mutex); | 2659 | lockdep_assert_held(&cgroup_mutex); |
2645 | 2660 | ||
2646 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2647 | |||
2648 | /* look up all csses currently attached to @cgrp's subtree */ | 2661 | /* look up all csses currently attached to @cgrp's subtree */ |
2649 | down_read(&css_set_rwsem); | 2662 | down_read(&css_set_rwsem); |
2650 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { | 2663 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { |
@@ -2700,8 +2713,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2700 | goto out_finish; | 2713 | goto out_finish; |
2701 | last_task = task; | 2714 | last_task = task; |
2702 | 2715 | ||
2716 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2717 | /* raced against de_thread() from another thread? */ | ||
2718 | if (!thread_group_leader(task)) { | ||
2719 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2720 | put_task_struct(task); | ||
2721 | continue; | ||
2722 | } | ||
2723 | |||
2703 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); | 2724 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); |
2704 | 2725 | ||
2726 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2705 | put_task_struct(task); | 2727 | put_task_struct(task); |
2706 | 2728 | ||
2707 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) | 2729 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) |
@@ -2711,7 +2733,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2711 | 2733 | ||
2712 | out_finish: | 2734 | out_finish: |
2713 | cgroup_migrate_finish(&preloaded_csets); | 2735 | cgroup_migrate_finish(&preloaded_csets); |
2714 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2715 | return ret; | 2736 | return ret; |
2716 | } | 2737 | } |
2717 | 2738 | ||