aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-07-09 16:39:47 -0400
committerJens Axboe <axboe@fb.com>2015-07-09 16:41:06 -0400
commit838f13bf4b6737d4aec508558e45f81798fc2677 (patch)
treead481dd55649a687b67b20f9f6e3926b4151881d /block/blk-cgroup.c
parenta322baad1003798312741b0cb97bd2c7511ccf61 (diff)
blkcg: allow blkcg_pol_mutex to be grabbed from cgroup [file] methods
blkcg_pol_mutex primarily protects the blkcg_policy array. It also protects cgroup file type [un]registration during policy addition / removal. This puts blkcg_pol_mutex outside cgroup internal synchronization and in turn makes it impossible to grab from blkcg's cgroup methods as that leads to cyclic dependency. Another problematic dependency arising from this is through cgroup interface file deactivation. Removing a cftype requires removing all files of the type which in turn involves draining all on-going invocations of the file methods. This means that an interface file implementation can't grab blkcg_pol_mutex as draining can lead to AA deadlock. blkcg_reset_stats() is already in this situation. It currently trylocks blkcg_pol_mutex and then unwinds and retries the whole operation on failure, which is cumbersome at best. It has a lengthy comment explaining how cgroup internal synchronization is involved and expected to be updated but as explained above this doesn't need cgroup internal locking to deadlock. It's a self-contained AA deadlock. The described circular dependencies can be easily broken by moving cftype [un]registration out of blkcg_pol_mutex and protect them with an outer mutex. This patch introduces blkcg_pol_register_mutex which wraps entire policy [un]registration including cftype operations and shrinks blkcg_pol_mutex critical section. This also makes the trylock dancing in blkcg_reset_stats() unnecessary. Removed. This patch is necessary for the following blkcg_policy_data allocation bug fixes. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c40
1 files changed, 21 insertions, 19 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5e2723f2c6a3..2ff74ffcbb27 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -29,6 +29,14 @@
29 29
30#define MAX_KEY_LEN 100 30#define MAX_KEY_LEN 100
31 31
32/*
33 * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
34 * blkcg_pol_register_mutex nests outside of it and synchronizes entire
35 * policy [un]register operations including cgroup file additions /
36 * removals. Putting cgroup file registration outside blkcg_pol_mutex
37 * allows grabbing it from cgroup callbacks.
38 */
39static DEFINE_MUTEX(blkcg_pol_register_mutex);
32static DEFINE_MUTEX(blkcg_pol_mutex); 40static DEFINE_MUTEX(blkcg_pol_mutex);
33 41
34struct blkcg blkcg_root; 42struct blkcg blkcg_root;
@@ -453,20 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
453 struct blkcg_gq *blkg; 461 struct blkcg_gq *blkg;
454 int i; 462 int i;
455 463
456 /* 464 mutex_lock(&blkcg_pol_mutex);
457 * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
458 * which ends up putting cgroup's internal cgroup_tree_mutex under
459 * it; however, cgroup_tree_mutex is nested above cgroup file
460 * active protection and grabbing blkcg_pol_mutex from a cgroup
461 * file operation creates a possible circular dependency. cgroup
462 * internal locking is planned to go through further simplification
463 * and this issue should go away soon. For now, let's trylock
464 * blkcg_pol_mutex and restart the write on failure.
465 *
466 * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
467 */
468 if (!mutex_trylock(&blkcg_pol_mutex))
469 return restart_syscall();
470 spin_lock_irq(&blkcg->lock); 465 spin_lock_irq(&blkcg->lock);
471 466
472 /* 467 /*
@@ -1190,6 +1185,7 @@ int blkcg_policy_register(struct blkcg_policy *pol)
1190 if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) 1185 if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
1191 return -EINVAL; 1186 return -EINVAL;
1192 1187
1188 mutex_lock(&blkcg_pol_register_mutex);
1193 mutex_lock(&blkcg_pol_mutex); 1189 mutex_lock(&blkcg_pol_mutex);
1194 1190
1195 /* find an empty slot */ 1191 /* find an empty slot */
@@ -1198,19 +1194,23 @@ int blkcg_policy_register(struct blkcg_policy *pol)
1198 if (!blkcg_policy[i]) 1194 if (!blkcg_policy[i])
1199 break; 1195 break;
1200 if (i >= BLKCG_MAX_POLS) 1196 if (i >= BLKCG_MAX_POLS)
1201 goto out_unlock; 1197 goto err_unlock;
1202 1198
1203 /* register and update blkgs */ 1199 /* register and update blkgs */
1204 pol->plid = i; 1200 pol->plid = i;
1205 blkcg_policy[i] = pol; 1201 blkcg_policy[i] = pol;
1202 mutex_unlock(&blkcg_pol_mutex);
1206 1203
1207 /* everything is in place, add intf files for the new policy */ 1204 /* everything is in place, add intf files for the new policy */
1208 if (pol->cftypes) 1205 if (pol->cftypes)
1209 WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, 1206 WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys,
1210 pol->cftypes)); 1207 pol->cftypes));
1211 ret = 0; 1208 mutex_unlock(&blkcg_pol_register_mutex);
1212out_unlock: 1209 return 0;
1210
1211err_unlock:
1213 mutex_unlock(&blkcg_pol_mutex); 1212 mutex_unlock(&blkcg_pol_mutex);
1213 mutex_unlock(&blkcg_pol_register_mutex);
1214 return ret; 1214 return ret;
1215} 1215}
1216EXPORT_SYMBOL_GPL(blkcg_policy_register); 1216EXPORT_SYMBOL_GPL(blkcg_policy_register);
@@ -1223,7 +1223,7 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
1223 */ 1223 */
1224void blkcg_policy_unregister(struct blkcg_policy *pol) 1224void blkcg_policy_unregister(struct blkcg_policy *pol)
1225{ 1225{
1226 mutex_lock(&blkcg_pol_mutex); 1226 mutex_lock(&blkcg_pol_register_mutex);
1227 1227
1228 if (WARN_ON(blkcg_policy[pol->plid] != pol)) 1228 if (WARN_ON(blkcg_policy[pol->plid] != pol))
1229 goto out_unlock; 1229 goto out_unlock;
@@ -1233,8 +1233,10 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
1233 cgroup_rm_cftypes(pol->cftypes); 1233 cgroup_rm_cftypes(pol->cftypes);
1234 1234
1235 /* unregister and update blkgs */ 1235 /* unregister and update blkgs */
1236 mutex_lock(&blkcg_pol_mutex);
1236 blkcg_policy[pol->plid] = NULL; 1237 blkcg_policy[pol->plid] = NULL;
1237out_unlock:
1238 mutex_unlock(&blkcg_pol_mutex); 1238 mutex_unlock(&blkcg_pol_mutex);
1239out_unlock:
1240 mutex_unlock(&blkcg_pol_register_mutex);
1239} 1241}
1240EXPORT_SYMBOL_GPL(blkcg_policy_unregister); 1242EXPORT_SYMBOL_GPL(blkcg_policy_unregister);