diff options
author | Tejun Heo <tj@kernel.org> | 2016-03-03 09:58:00 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-03-03 09:58:00 -0500 |
commit | 945ba1996888809cf510a8da000a9c20a9fab5ad (patch) | |
tree | 4bf5e7a11fbb4c80148c53c77be5105f131cf5a0 /kernel/cgroup.c | |
parent | f7b2814bb9b6cb1d69333e1592c702260fcb4184 (diff) |
cgroup: combine cgroup_mutex locking and offline css draining
cgroup_drain_offline() is used to wait for csses being offlined to
uninstall itself from cgroup->subsys[] array so that new csses can be
installed. The function's only user, cgroup_subtree_control_write(),
calls it after performing some checks and restarts the whole process
via restart_syscall() if draining has to release cgroup_mutex to wait.
This can be simplified by draining before other synchronized
operations so that there's nothing to restart. This patch converts
cgroup_drain_offline() to cgroup_lock_and_drain_offline() which
performs both locking and draining and updates cgroup_kn_lock_live()
use it instead of cgroup_mutex() if requested. This combined locking
and draining operations are easier to use and less error-prone.
While at it, add WARNs in control_apply functions which triggers if
the subtree isn't properly drained.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 55 |
1 files changed, 27 insertions, 28 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2adf0433a3cf..bbeb35f14eda 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -220,6 +220,7 @@ static struct cftype cgroup_dfl_base_files[]; | |||
220 | static struct cftype cgroup_legacy_base_files[]; | 220 | static struct cftype cgroup_legacy_base_files[]; |
221 | 221 | ||
222 | static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); | 222 | static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); |
223 | static void cgroup_lock_and_drain_offline(struct cgroup *cgrp); | ||
223 | static void css_task_iter_advance(struct css_task_iter *it); | 224 | static void css_task_iter_advance(struct css_task_iter *it); |
224 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 225 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
225 | static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, | 226 | static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, |
@@ -1391,19 +1392,22 @@ static void cgroup_kn_unlock(struct kernfs_node *kn) | |||
1391 | /** | 1392 | /** |
1392 | * cgroup_kn_lock_live - locking helper for cgroup kernfs methods | 1393 | * cgroup_kn_lock_live - locking helper for cgroup kernfs methods |
1393 | * @kn: the kernfs_node being serviced | 1394 | * @kn: the kernfs_node being serviced |
1395 | * @drain_offline: perform offline draining on the cgroup | ||
1394 | * | 1396 | * |
1395 | * This helper is to be used by a cgroup kernfs method currently servicing | 1397 | * This helper is to be used by a cgroup kernfs method currently servicing |
1396 | * @kn. It breaks the active protection, performs cgroup locking and | 1398 | * @kn. It breaks the active protection, performs cgroup locking and |
1397 | * verifies that the associated cgroup is alive. Returns the cgroup if | 1399 | * verifies that the associated cgroup is alive. Returns the cgroup if |
1398 | * alive; otherwise, %NULL. A successful return should be undone by a | 1400 | * alive; otherwise, %NULL. A successful return should be undone by a |
1399 | * matching cgroup_kn_unlock() invocation. | 1401 | * matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the |
1402 | * cgroup is drained of offlining csses before return. | ||
1400 | * | 1403 | * |
1401 | * Any cgroup kernfs method implementation which requires locking the | 1404 | * Any cgroup kernfs method implementation which requires locking the |
1402 | * associated cgroup should use this helper. It avoids nesting cgroup | 1405 | * associated cgroup should use this helper. It avoids nesting cgroup |
1403 | * locking under kernfs active protection and allows all kernfs operations | 1406 | * locking under kernfs active protection and allows all kernfs operations |
1404 | * including self-removal. | 1407 | * including self-removal. |
1405 | */ | 1408 | */ |
1406 | static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) | 1409 | static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, |
1410 | bool drain_offline) | ||
1407 | { | 1411 | { |
1408 | struct cgroup *cgrp; | 1412 | struct cgroup *cgrp; |
1409 | 1413 | ||
@@ -1422,7 +1426,10 @@ static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) | |||
1422 | return NULL; | 1426 | return NULL; |
1423 | kernfs_break_active_protection(kn); | 1427 | kernfs_break_active_protection(kn); |
1424 | 1428 | ||
1425 | mutex_lock(&cgroup_mutex); | 1429 | if (drain_offline) |
1430 | cgroup_lock_and_drain_offline(cgrp); | ||
1431 | else | ||
1432 | mutex_lock(&cgroup_mutex); | ||
1426 | 1433 | ||
1427 | if (!cgroup_is_dead(cgrp)) | 1434 | if (!cgroup_is_dead(cgrp)) |
1428 | return cgrp; | 1435 | return cgrp; |
@@ -2761,7 +2768,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2761 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) | 2768 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) |
2762 | return -EINVAL; | 2769 | return -EINVAL; |
2763 | 2770 | ||
2764 | cgrp = cgroup_kn_lock_live(of->kn); | 2771 | cgrp = cgroup_kn_lock_live(of->kn, false); |
2765 | if (!cgrp) | 2772 | if (!cgrp) |
2766 | return -ENODEV; | 2773 | return -ENODEV; |
2767 | 2774 | ||
@@ -2859,7 +2866,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, | |||
2859 | 2866 | ||
2860 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 2867 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
2861 | 2868 | ||
2862 | cgrp = cgroup_kn_lock_live(of->kn); | 2869 | cgrp = cgroup_kn_lock_live(of->kn, false); |
2863 | if (!cgrp) | 2870 | if (!cgrp) |
2864 | return -ENODEV; | 2871 | return -ENODEV; |
2865 | spin_lock(&release_agent_path_lock); | 2872 | spin_lock(&release_agent_path_lock); |
@@ -2984,27 +2991,23 @@ out_finish: | |||
2984 | } | 2991 | } |
2985 | 2992 | ||
2986 | /** | 2993 | /** |
2987 | * cgroup_drain_offline - wait for previously offlined csses to go away | 2994 | * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses |
2988 | * @cgrp: root of the target subtree | 2995 | * @cgrp: root of the target subtree |
2989 | * | 2996 | * |
2990 | * Because css offlining is asynchronous, userland may try to re-enable a | 2997 | * Because css offlining is asynchronous, userland may try to re-enable a |
2991 | * controller while the previous css is still around. This function drains | 2998 | * controller while the previous css is still around. This function grabs |
2992 | * the previous css instances of @cgrp's subtree. | 2999 | * cgroup_mutex and drains the previous css instances of @cgrp's subtree. |
2993 | * | ||
2994 | * Must be called with cgroup_mutex held. Returns %false if there were no | ||
2995 | * dying css instances. Returns %true if there were one or more and this | ||
2996 | * function waited. On %true return, cgroup_mutex has been dropped and | ||
2997 | * re-acquired inbetween which anything could have happened. The caller | ||
2998 | * typically would have to start over. | ||
2999 | */ | 3000 | */ |
3000 | static bool cgroup_drain_offline(struct cgroup *cgrp) | 3001 | static void cgroup_lock_and_drain_offline(struct cgroup *cgrp) |
3002 | __acquires(&cgroup_mutex) | ||
3001 | { | 3003 | { |
3002 | struct cgroup *dsct; | 3004 | struct cgroup *dsct; |
3003 | struct cgroup_subsys_state *d_css; | 3005 | struct cgroup_subsys_state *d_css; |
3004 | struct cgroup_subsys *ss; | 3006 | struct cgroup_subsys *ss; |
3005 | int ssid; | 3007 | int ssid; |
3006 | 3008 | ||
3007 | lockdep_assert_held(&cgroup_mutex); | 3009 | restart: |
3010 | mutex_lock(&cgroup_mutex); | ||
3008 | 3011 | ||
3009 | cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { | 3012 | cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { |
3010 | for_each_subsys(ss, ssid) { | 3013 | for_each_subsys(ss, ssid) { |
@@ -3021,14 +3024,11 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) | |||
3021 | mutex_unlock(&cgroup_mutex); | 3024 | mutex_unlock(&cgroup_mutex); |
3022 | schedule(); | 3025 | schedule(); |
3023 | finish_wait(&dsct->offline_waitq, &wait); | 3026 | finish_wait(&dsct->offline_waitq, &wait); |
3024 | mutex_lock(&cgroup_mutex); | ||
3025 | 3027 | ||
3026 | cgroup_put(dsct); | 3028 | cgroup_put(dsct); |
3027 | return true; | 3029 | goto restart; |
3028 | } | 3030 | } |
3029 | } | 3031 | } |
3030 | |||
3031 | return false; | ||
3032 | } | 3032 | } |
3033 | 3033 | ||
3034 | /** | 3034 | /** |
@@ -3111,6 +3111,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) | |||
3111 | for_each_subsys(ss, ssid) { | 3111 | for_each_subsys(ss, ssid) { |
3112 | struct cgroup_subsys_state *css = cgroup_css(dsct, ss); | 3112 | struct cgroup_subsys_state *css = cgroup_css(dsct, ss); |
3113 | 3113 | ||
3114 | WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); | ||
3115 | |||
3114 | if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) | 3116 | if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) |
3115 | continue; | 3117 | continue; |
3116 | 3118 | ||
@@ -3155,6 +3157,8 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) | |||
3155 | for_each_subsys(ss, ssid) { | 3157 | for_each_subsys(ss, ssid) { |
3156 | struct cgroup_subsys_state *css = cgroup_css(dsct, ss); | 3158 | struct cgroup_subsys_state *css = cgroup_css(dsct, ss); |
3157 | 3159 | ||
3160 | WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); | ||
3161 | |||
3158 | if (!css) | 3162 | if (!css) |
3159 | continue; | 3163 | continue; |
3160 | 3164 | ||
@@ -3264,7 +3268,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
3264 | return -EINVAL; | 3268 | return -EINVAL; |
3265 | } | 3269 | } |
3266 | 3270 | ||
3267 | cgrp = cgroup_kn_lock_live(of->kn); | 3271 | cgrp = cgroup_kn_lock_live(of->kn, true); |
3268 | if (!cgrp) | 3272 | if (!cgrp) |
3269 | return -ENODEV; | 3273 | return -ENODEV; |
3270 | 3274 | ||
@@ -3309,11 +3313,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
3309 | goto out_unlock; | 3313 | goto out_unlock; |
3310 | } | 3314 | } |
3311 | 3315 | ||
3312 | if (cgroup_drain_offline(cgrp)) { | ||
3313 | cgroup_kn_unlock(of->kn); | ||
3314 | return restart_syscall(); | ||
3315 | } | ||
3316 | |||
3317 | /* save and update control masks and prepare csses */ | 3316 | /* save and update control masks and prepare csses */ |
3318 | cgroup_save_control(cgrp); | 3317 | cgroup_save_control(cgrp); |
3319 | 3318 | ||
@@ -5140,7 +5139,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, | |||
5140 | if (strchr(name, '\n')) | 5139 | if (strchr(name, '\n')) |
5141 | return -EINVAL; | 5140 | return -EINVAL; |
5142 | 5141 | ||
5143 | parent = cgroup_kn_lock_live(parent_kn); | 5142 | parent = cgroup_kn_lock_live(parent_kn, false); |
5144 | if (!parent) | 5143 | if (!parent) |
5145 | return -ENODEV; | 5144 | return -ENODEV; |
5146 | 5145 | ||
@@ -5339,7 +5338,7 @@ static int cgroup_rmdir(struct kernfs_node *kn) | |||
5339 | struct cgroup *cgrp; | 5338 | struct cgroup *cgrp; |
5340 | int ret = 0; | 5339 | int ret = 0; |
5341 | 5340 | ||
5342 | cgrp = cgroup_kn_lock_live(kn); | 5341 | cgrp = cgroup_kn_lock_live(kn, false); |
5343 | if (!cgrp) | 5342 | if (!cgrp) |
5344 | return 0; | 5343 | return 0; |
5345 | 5344 | ||