diff options
| author | Tejun Heo <tj@kernel.org> | 2012-04-13 17:50:53 -0400 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2012-04-20 04:06:06 -0400 |
| commit | 80fd99792b0b9f162abdf3da12fb10eb9eb5f321 (patch) | |
| tree | 3273581c76d9bcad18f0668f6707be9323e650fb | |
| parent | da8b066262e12d1d0a3b1e6d3486e500169bf730 (diff) | |
blkcg: make sure blkg_lookup() returns %NULL if @q is bypassing
Currently, blkg_lookup() doesn't check @q bypass state. This patch
updates blk_queue_bypass_start() to do synchronize_rcu() before
returning and updates blkg_lookup() to check blk_queue_bypass() and
return %NULL if bypassing. This ensures blkg_lookup() returns %NULL
if @q is bypassing.
This is to guarantee that nobody is accessing policy data while @q is
bypassing, which is necessary to allow replacing blkio_cgroup->pd[] in
place on policy [de]activation.
v2: Added more comments explaining bypass guarantees as suggested by
Vivek.
v3: Added more comments explaining why there's no synchronize_rcu() in
blk_cleanup_queue() as suggested by Vivek.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
| -rw-r--r-- | block/blk-cgroup.c | 50 | ||||
| -rw-r--r-- | block/blk-core.c | 15 |
2 files changed, 46 insertions, 19 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f6581a090b9d..d6e4555c982f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
| @@ -137,6 +137,38 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
| 137 | return blkg; | 137 | return blkg; |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | static struct blkio_group *__blkg_lookup(struct blkio_cgroup *blkcg, | ||
| 141 | struct request_queue *q) | ||
| 142 | { | ||
| 143 | struct blkio_group *blkg; | ||
| 144 | struct hlist_node *n; | ||
| 145 | |||
| 146 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) | ||
| 147 | if (blkg->q == q) | ||
| 148 | return blkg; | ||
| 149 | return NULL; | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * blkg_lookup - lookup blkg for the specified blkcg - q pair | ||
| 154 | * @blkcg: blkcg of interest | ||
| 155 | * @q: request_queue of interest | ||
| 156 | * | ||
| 157 | * Lookup blkg for the @blkcg - @q pair. This function should be called | ||
| 158 | * under RCU read lock and is guaranteed to return %NULL if @q is bypassing | ||
| 159 | * - see blk_queue_bypass_start() for details. | ||
| 160 | */ | ||
| 161 | struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | ||
| 162 | struct request_queue *q) | ||
| 163 | { | ||
| 164 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 165 | |||
| 166 | if (unlikely(blk_queue_bypass(q))) | ||
| 167 | return NULL; | ||
| 168 | return __blkg_lookup(blkcg, q); | ||
| 169 | } | ||
| 170 | EXPORT_SYMBOL_GPL(blkg_lookup); | ||
| 171 | |||
| 140 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | 172 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, |
| 141 | struct request_queue *q, | 173 | struct request_queue *q, |
| 142 | bool for_root) | 174 | bool for_root) |
| @@ -150,13 +182,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | |||
| 150 | /* | 182 | /* |
| 151 | * This could be the first entry point of blkcg implementation and | 183 | * This could be the first entry point of blkcg implementation and |
| 152 | * we shouldn't allow anything to go through for a bypassing queue. | 184 | * we shouldn't allow anything to go through for a bypassing queue. |
| 153 | * The following can be removed if blkg lookup is guaranteed to | ||
| 154 | * fail on a bypassing queue. | ||
| 155 | */ | 185 | */ |
| 156 | if (unlikely(blk_queue_bypass(q)) && !for_root) | 186 | if (unlikely(blk_queue_bypass(q)) && !for_root) |
| 157 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | 187 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); |
| 158 | 188 | ||
| 159 | blkg = blkg_lookup(blkcg, q); | 189 | blkg = __blkg_lookup(blkcg, q); |
| 160 | if (blkg) | 190 | if (blkg) |
| 161 | return blkg; | 191 | return blkg; |
| 162 | 192 | ||
| @@ -185,20 +215,6 @@ out: | |||
| 185 | } | 215 | } |
| 186 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 216 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
| 187 | 217 | ||
| 188 | /* called under rcu_read_lock(). */ | ||
| 189 | struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, | ||
| 190 | struct request_queue *q) | ||
| 191 | { | ||
| 192 | struct blkio_group *blkg; | ||
| 193 | struct hlist_node *n; | ||
| 194 | |||
| 195 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) | ||
| 196 | if (blkg->q == q) | ||
| 197 | return blkg; | ||
| 198 | return NULL; | ||
| 199 | } | ||
| 200 | EXPORT_SYMBOL_GPL(blkg_lookup); | ||
| 201 | |||
| 202 | static void blkg_destroy(struct blkio_group *blkg) | 218 | static void blkg_destroy(struct blkio_group *blkg) |
| 203 | { | 219 | { |
| 204 | struct request_queue *q = blkg->q; | 220 | struct request_queue *q = blkg->q; |
diff --git a/block/blk-core.c b/block/blk-core.c index 991c1d6ef245..f2db628aa509 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -416,7 +416,8 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
| 416 | * In bypass mode, only the dispatch FIFO queue of @q is used. This | 416 | * In bypass mode, only the dispatch FIFO queue of @q is used. This |
| 417 | * function makes @q enter bypass mode and drains all requests which were | 417 | * function makes @q enter bypass mode and drains all requests which were |
| 418 | * throttled or issued before. On return, it's guaranteed that no request | 418 | * throttled or issued before. On return, it's guaranteed that no request |
| 419 | * is being throttled or has ELVPRIV set. | 419 | * is being throttled or has ELVPRIV set and blk_queue_bypass() %true |
| 420 | * inside queue or RCU read lock. | ||
| 420 | */ | 421 | */ |
| 421 | void blk_queue_bypass_start(struct request_queue *q) | 422 | void blk_queue_bypass_start(struct request_queue *q) |
| 422 | { | 423 | { |
| @@ -426,6 +427,8 @@ void blk_queue_bypass_start(struct request_queue *q) | |||
| 426 | spin_unlock_irq(q->queue_lock); | 427 | spin_unlock_irq(q->queue_lock); |
| 427 | 428 | ||
| 428 | blk_drain_queue(q, false); | 429 | blk_drain_queue(q, false); |
| 430 | /* ensure blk_queue_bypass() is %true inside RCU read lock */ | ||
| 431 | synchronize_rcu(); | ||
| 429 | } | 432 | } |
| 430 | EXPORT_SYMBOL_GPL(blk_queue_bypass_start); | 433 | EXPORT_SYMBOL_GPL(blk_queue_bypass_start); |
| 431 | 434 | ||
| @@ -462,7 +465,15 @@ void blk_cleanup_queue(struct request_queue *q) | |||
| 462 | 465 | ||
| 463 | spin_lock_irq(lock); | 466 | spin_lock_irq(lock); |
| 464 | 467 | ||
| 465 | /* dead queue is permanently in bypass mode till released */ | 468 | /* |
| 469 | * Dead queue is permanently in bypass mode till released. Note | ||
| 470 | * that, unlike blk_queue_bypass_start(), we aren't performing | ||
| 471 | * synchronize_rcu() after entering bypass mode to avoid the delay | ||
| 472 | * as some drivers create and destroy a lot of queues while | ||
| 473 | * probing. This is still safe because blk_release_queue() will be | ||
| 474 | * called only after the queue refcnt drops to zero and nothing, | ||
| 475 | * RCU or not, would be traversing the queue by then. | ||
| 476 | */ | ||
| 466 | q->bypass_depth++; | 477 | q->bypass_depth++; |
| 467 | queue_flag_set(QUEUE_FLAG_BYPASS, q); | 478 | queue_flag_set(QUEUE_FLAG_BYPASS, q); |
| 468 | 479 | ||
