diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:22 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:24 -0500 |
commit | c875f4d0250a1f070fa26087a73bdd8f54c48100 (patch) | |
tree | 4ed2bae2fc48e54ac712d28eaaae8217c8064c1d | |
parent | 9f13ef678efd977487fc0c2e489f17c9a8c67a3e (diff) |
blkcg: drop unnecessary RCU locking
Now that blkg additions / removals are always done under both q and
blkcg locks, the only places RCU locking is necessary are
blkg_lookup[_create]() for lookup w/o blkcg lock. This patch drops
unncessary RCU locking replacing it with plain blkcg locking as
necessary.
* blkiocg_pre_destroy() already perform proper locking and don't need
RCU. Dropped.
* blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read
lock. This isn't a hot path.
* Now unnecessary synchronize_rcu() from queue exit paths removed.
This makes q->nr_blkgs unnecessary. Dropped.
* RCU annotation on blkg->q removed.
-v2: Vivek pointed out that blkg_lookup_create() still needs to be
called under rcu_read_lock(). Updated.
-v3: After the update, stats_lock locking in blkio_read_blkg_stats()
shouldn't be using _irq variant as it otherwise ends up enabling
irq while blkcg->lock is locked. Fixed.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-cgroup.c | 24 | ||||
-rw-r--r-- | block/blk-cgroup.h | 4 | ||||
-rw-r--r-- | block/blk-throttle.c | 33 | ||||
-rw-r--r-- | block/cfq-iosched.c | 24 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 |
5 files changed, 12 insertions, 74 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e9e3b038c702..27d39a810cb6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -500,7 +500,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
500 | return NULL; | 500 | return NULL; |
501 | 501 | ||
502 | spin_lock_init(&blkg->stats_lock); | 502 | spin_lock_init(&blkg->stats_lock); |
503 | rcu_assign_pointer(blkg->q, q); | 503 | blkg->q = q; |
504 | INIT_LIST_HEAD(&blkg->q_node); | 504 | INIT_LIST_HEAD(&blkg->q_node); |
505 | blkg->blkcg = blkcg; | 505 | blkg->blkcg = blkcg; |
506 | blkg->refcnt = 1; | 506 | blkg->refcnt = 1; |
@@ -611,7 +611,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |||
611 | 611 | ||
612 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 612 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
613 | list_add(&blkg->q_node, &q->blkg_list); | 613 | list_add(&blkg->q_node, &q->blkg_list); |
614 | q->nr_blkgs++; | ||
615 | 614 | ||
616 | spin_unlock(&blkcg->lock); | 615 | spin_unlock(&blkcg->lock); |
617 | out: | 616 | out: |
@@ -648,9 +647,6 @@ static void blkg_destroy(struct blkio_group *blkg) | |||
648 | list_del_init(&blkg->q_node); | 647 | list_del_init(&blkg->q_node); |
649 | hlist_del_init_rcu(&blkg->blkcg_node); | 648 | hlist_del_init_rcu(&blkg->blkcg_node); |
650 | 649 | ||
651 | WARN_ON_ONCE(q->nr_blkgs <= 0); | ||
652 | q->nr_blkgs--; | ||
653 | |||
654 | /* | 650 | /* |
655 | * Put the reference taken at the time of creation so that when all | 651 | * Put the reference taken at the time of creation so that when all |
656 | * queues are gone, group can be destroyed. | 652 | * queues are gone, group can be destroyed. |
@@ -1232,8 +1228,9 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg, | |||
1232 | struct hlist_node *n; | 1228 | struct hlist_node *n; |
1233 | uint64_t cgroup_total = 0; | 1229 | uint64_t cgroup_total = 0; |
1234 | 1230 | ||
1235 | rcu_read_lock(); | 1231 | spin_lock_irq(&blkcg->lock); |
1236 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { | 1232 | |
1233 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
1237 | const char *dname = blkg_dev_name(blkg); | 1234 | const char *dname = blkg_dev_name(blkg); |
1238 | int plid = BLKIOFILE_POLICY(cft->private); | 1235 | int plid = BLKIOFILE_POLICY(cft->private); |
1239 | 1236 | ||
@@ -1243,15 +1240,16 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg, | |||
1243 | cgroup_total += blkio_get_stat_cpu(blkg, plid, | 1240 | cgroup_total += blkio_get_stat_cpu(blkg, plid, |
1244 | cb, dname, type); | 1241 | cb, dname, type); |
1245 | } else { | 1242 | } else { |
1246 | spin_lock_irq(&blkg->stats_lock); | 1243 | spin_lock(&blkg->stats_lock); |
1247 | cgroup_total += blkio_get_stat(blkg, plid, | 1244 | cgroup_total += blkio_get_stat(blkg, plid, |
1248 | cb, dname, type); | 1245 | cb, dname, type); |
1249 | spin_unlock_irq(&blkg->stats_lock); | 1246 | spin_unlock(&blkg->stats_lock); |
1250 | } | 1247 | } |
1251 | } | 1248 | } |
1252 | if (show_total) | 1249 | if (show_total) |
1253 | cb->fill(cb, "Total", cgroup_total); | 1250 | cb->fill(cb, "Total", cgroup_total); |
1254 | rcu_read_unlock(); | 1251 | |
1252 | spin_unlock_irq(&blkcg->lock); | ||
1255 | return 0; | 1253 | return 0; |
1256 | } | 1254 | } |
1257 | 1255 | ||
@@ -1583,28 +1581,24 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys, | |||
1583 | { | 1581 | { |
1584 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | 1582 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); |
1585 | 1583 | ||
1586 | rcu_read_lock(); | ||
1587 | spin_lock_irq(&blkcg->lock); | 1584 | spin_lock_irq(&blkcg->lock); |
1588 | 1585 | ||
1589 | while (!hlist_empty(&blkcg->blkg_list)) { | 1586 | while (!hlist_empty(&blkcg->blkg_list)) { |
1590 | struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first, | 1587 | struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first, |
1591 | struct blkio_group, blkcg_node); | 1588 | struct blkio_group, blkcg_node); |
1592 | struct request_queue *q = rcu_dereference(blkg->q); | 1589 | struct request_queue *q = blkg->q; |
1593 | 1590 | ||
1594 | if (spin_trylock(q->queue_lock)) { | 1591 | if (spin_trylock(q->queue_lock)) { |
1595 | blkg_destroy(blkg); | 1592 | blkg_destroy(blkg); |
1596 | spin_unlock(q->queue_lock); | 1593 | spin_unlock(q->queue_lock); |
1597 | } else { | 1594 | } else { |
1598 | spin_unlock_irq(&blkcg->lock); | 1595 | spin_unlock_irq(&blkcg->lock); |
1599 | rcu_read_unlock(); | ||
1600 | cpu_relax(); | 1596 | cpu_relax(); |
1601 | rcu_read_lock(); | ||
1602 | spin_lock(&blkcg->lock); | 1597 | spin_lock(&blkcg->lock); |
1603 | } | 1598 | } |
1604 | } | 1599 | } |
1605 | 1600 | ||
1606 | spin_unlock_irq(&blkcg->lock); | 1601 | spin_unlock_irq(&blkcg->lock); |
1607 | rcu_read_unlock(); | ||
1608 | return 0; | 1602 | return 0; |
1609 | } | 1603 | } |
1610 | 1604 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index df73040a6a5f..66eaefefcbd2 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -176,8 +176,8 @@ struct blkg_policy_data { | |||
176 | }; | 176 | }; |
177 | 177 | ||
178 | struct blkio_group { | 178 | struct blkio_group { |
179 | /* Pointer to the associated request_queue, RCU protected */ | 179 | /* Pointer to the associated request_queue */ |
180 | struct request_queue __rcu *q; | 180 | struct request_queue *q; |
181 | struct list_head q_node; | 181 | struct list_head q_node; |
182 | struct hlist_node blkcg_node; | 182 | struct hlist_node blkcg_node; |
183 | struct blkio_cgroup *blkcg; | 183 | struct blkio_cgroup *blkcg; |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e35ee7aeea69..bfa5168249eb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q) | |||
1046 | 1046 | ||
1047 | void blk_throtl_exit(struct request_queue *q) | 1047 | void blk_throtl_exit(struct request_queue *q) |
1048 | { | 1048 | { |
1049 | struct throtl_data *td = q->td; | 1049 | BUG_ON(!q->td); |
1050 | bool wait; | ||
1051 | |||
1052 | BUG_ON(!td); | ||
1053 | |||
1054 | throtl_shutdown_wq(q); | 1050 | throtl_shutdown_wq(q); |
1055 | |||
1056 | /* If there are other groups */ | ||
1057 | spin_lock_irq(q->queue_lock); | ||
1058 | wait = q->nr_blkgs; | ||
1059 | spin_unlock_irq(q->queue_lock); | ||
1060 | |||
1061 | /* | ||
1062 | * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods. | ||
1063 | * Do this wait only if there are other undestroyed groups out | ||
1064 | * there (other than root group). This can happen if cgroup deletion | ||
1065 | * path claimed the responsibility of cleaning up a group before | ||
1066 | * queue cleanup code get to the group. | ||
1067 | * | ||
1068 | * Do not call synchronize_rcu() unconditionally as there are drivers | ||
1069 | * which create/delete request queue hundreds of times during scan/boot | ||
1070 | * and synchronize_rcu() can take significant time and slow down boot. | ||
1071 | */ | ||
1072 | if (wait) | ||
1073 | synchronize_rcu(); | ||
1074 | |||
1075 | /* | ||
1076 | * Just being safe to make sure after previous flush if some body did | ||
1077 | * update limits through cgroup and another work got queued, cancel | ||
1078 | * it. | ||
1079 | */ | ||
1080 | throtl_shutdown_wq(q); | ||
1081 | |||
1082 | kfree(q->td); | 1051 | kfree(q->td); |
1083 | } | 1052 | } |
1084 | 1053 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 393eaa59913b..9e386d9bcb79 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -3449,7 +3449,6 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3449 | { | 3449 | { |
3450 | struct cfq_data *cfqd = e->elevator_data; | 3450 | struct cfq_data *cfqd = e->elevator_data; |
3451 | struct request_queue *q = cfqd->queue; | 3451 | struct request_queue *q = cfqd->queue; |
3452 | bool wait = false; | ||
3453 | 3452 | ||
3454 | cfq_shutdown_timer_wq(cfqd); | 3453 | cfq_shutdown_timer_wq(cfqd); |
3455 | 3454 | ||
@@ -3462,31 +3461,8 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3462 | 3461 | ||
3463 | spin_unlock_irq(q->queue_lock); | 3462 | spin_unlock_irq(q->queue_lock); |
3464 | 3463 | ||
3465 | #ifdef CONFIG_BLK_CGROUP | ||
3466 | /* | ||
3467 | * If there are groups which we could not unlink from blkcg list, | ||
3468 | * wait for a rcu period for them to be freed. | ||
3469 | */ | ||
3470 | spin_lock_irq(q->queue_lock); | ||
3471 | wait = q->nr_blkgs; | ||
3472 | spin_unlock_irq(q->queue_lock); | ||
3473 | #endif | ||
3474 | cfq_shutdown_timer_wq(cfqd); | 3464 | cfq_shutdown_timer_wq(cfqd); |
3475 | 3465 | ||
3476 | /* | ||
3477 | * Wait for cfqg->blkg->key accessors to exit their grace periods. | ||
3478 | * Do this wait only if there are other unlinked groups out | ||
3479 | * there. This can happen if cgroup deletion path claimed the | ||
3480 | * responsibility of cleaning up a group before queue cleanup code | ||
3481 | * get to the group. | ||
3482 | * | ||
3483 | * Do not call synchronize_rcu() unconditionally as there are drivers | ||
3484 | * which create/delete request queue hundreds of times during scan/boot | ||
3485 | * and synchronize_rcu() can take significant time and slow down boot. | ||
3486 | */ | ||
3487 | if (wait) | ||
3488 | synchronize_rcu(); | ||
3489 | |||
3490 | #ifndef CONFIG_CFQ_GROUP_IOSCHED | 3466 | #ifndef CONFIG_CFQ_GROUP_IOSCHED |
3491 | kfree(cfqd->root_group); | 3467 | kfree(cfqd->root_group); |
3492 | #endif | 3468 | #endif |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4d1d4bfc168..33f1b29e53f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -365,7 +365,6 @@ struct request_queue { | |||
365 | #ifdef CONFIG_BLK_CGROUP | 365 | #ifdef CONFIG_BLK_CGROUP |
366 | /* XXX: array size hardcoded to avoid include dependency (temporary) */ | 366 | /* XXX: array size hardcoded to avoid include dependency (temporary) */ |
367 | struct list_head blkg_list; | 367 | struct list_head blkg_list; |
368 | int nr_blkgs; | ||
369 | #endif | 368 | #endif |
370 | 369 | ||
371 | struct queue_limits limits; | 370 | struct queue_limits limits; |