diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2011-05-19 15:38:23 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-05-20 14:34:52 -0400 |
commit | f469a7b4d5b1d1d053200a9015fd25d59c057f49 (patch) | |
tree | 2c68c0689e40955b186e350b15d44d0b260f4655 /block/blk-throttle.c | |
parent | 56edf7d75db5b14d628b46623c414ffbeed68d7f (diff) |
blk-cgroup: Allow sleeping while dynamically allocating a group
Currently, all the cfq_group or throtl_group allocations happen while
we are holding ->queue_lock and sleeping is not allowed.
Soon, we will move to per cpu stats and also need to allocate the
per group stats. As one can not call alloc_percpu() from atomic
context as it can sleep, we need to drop ->queue_lock, allocate the
group, retake the lock and continue processing.
In throttling code, I check the queue DEAD flag again to make sure
that driver did not call blk_cleanup_queue() in the mean time.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 141 |
1 files changed, 108 insertions, 33 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fa9a900c1254..c201967b33cd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -188,8 +188,40 @@ throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg) | |||
188 | td->nr_undestroyed_grps++; | 188 | td->nr_undestroyed_grps++; |
189 | } | 189 | } |
190 | 190 | ||
191 | static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | 191 | static void throtl_init_add_tg_lists(struct throtl_data *td, |
192 | struct blkio_cgroup *blkcg) | 192 | struct throtl_grp *tg, struct blkio_cgroup *blkcg) |
193 | { | ||
194 | struct backing_dev_info *bdi = &td->queue->backing_dev_info; | ||
195 | unsigned int major, minor; | ||
196 | |||
197 | /* Add group onto cgroup list */ | ||
198 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | ||
199 | blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td, | ||
200 | MKDEV(major, minor), BLKIO_POLICY_THROTL); | ||
201 | |||
202 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | ||
203 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | ||
204 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | ||
205 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
206 | |||
207 | throtl_add_group_to_td_list(td, tg); | ||
208 | } | ||
209 | |||
210 | /* Should be called without queue lock and outside of rcu period */ | ||
211 | static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td) | ||
212 | { | ||
213 | struct throtl_grp *tg = NULL; | ||
214 | |||
215 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); | ||
216 | if (!tg) | ||
217 | return NULL; | ||
218 | |||
219 | throtl_init_group(tg); | ||
220 | return tg; | ||
221 | } | ||
222 | |||
223 | static struct | ||
224 | throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | ||
193 | { | 225 | { |
194 | struct throtl_grp *tg = NULL; | 226 | struct throtl_grp *tg = NULL; |
195 | void *key = td; | 227 | void *key = td; |
@@ -197,12 +229,6 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | |||
197 | unsigned int major, minor; | 229 | unsigned int major, minor; |
198 | 230 | ||
199 | /* | 231 | /* |
200 | * TODO: Speed up blkiocg_lookup_group() by maintaining a radix | ||
201 | * tree of blkg (instead of traversing through hash list all | ||
202 | * the time. | ||
203 | */ | ||
204 | |||
205 | /* | ||
206 | * This is the common case when there are no blkio cgroups. | 232 | * This is the common case when there are no blkio cgroups. |
207 | * Avoid lookup in this case | 233 | * Avoid lookup in this case |
208 | */ | 234 | */ |
@@ -215,43 +241,83 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | |||
215 | if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | 241 | if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { |
216 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 242 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
217 | tg->blkg.dev = MKDEV(major, minor); | 243 | tg->blkg.dev = MKDEV(major, minor); |
218 | goto done; | ||
219 | } | 244 | } |
220 | 245 | ||
221 | if (tg) | ||
222 | goto done; | ||
223 | |||
224 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); | ||
225 | if (!tg) | ||
226 | goto done; | ||
227 | |||
228 | throtl_init_group(tg); | ||
229 | |||
230 | /* Add group onto cgroup list */ | ||
231 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | ||
232 | blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td, | ||
233 | MKDEV(major, minor), BLKIO_POLICY_THROTL); | ||
234 | |||
235 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | ||
236 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | ||
237 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | ||
238 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
239 | |||
240 | throtl_add_group_to_td_list(td, tg); | ||
241 | done: | ||
242 | return tg; | 246 | return tg; |
243 | } | 247 | } |
244 | 248 | ||
249 | /* | ||
250 | * This function returns with queue lock unlocked in case of error, like | ||
251 | * request queue is no more | ||
252 | */ | ||
245 | static struct throtl_grp * throtl_get_tg(struct throtl_data *td) | 253 | static struct throtl_grp * throtl_get_tg(struct throtl_data *td) |
246 | { | 254 | { |
247 | struct throtl_grp *tg = NULL; | 255 | struct throtl_grp *tg = NULL, *__tg = NULL; |
248 | struct blkio_cgroup *blkcg; | 256 | struct blkio_cgroup *blkcg; |
257 | struct request_queue *q = td->queue; | ||
249 | 258 | ||
250 | rcu_read_lock(); | 259 | rcu_read_lock(); |
251 | blkcg = task_blkio_cgroup(current); | 260 | blkcg = task_blkio_cgroup(current); |
252 | tg = throtl_find_alloc_tg(td, blkcg); | 261 | tg = throtl_find_tg(td, blkcg); |
253 | if (!tg) | 262 | if (tg) { |
263 | rcu_read_unlock(); | ||
264 | return tg; | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * Need to allocate a group. Allocation of group also needs allocation | ||
269 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
270 | * we need to drop rcu lock and queue_lock before we call alloc | ||
271 | * | ||
272 | * Take the request queue reference to make sure queue does not | ||
273 | * go away once we return from allocation. | ||
274 | */ | ||
275 | blk_get_queue(q); | ||
276 | rcu_read_unlock(); | ||
277 | spin_unlock_irq(q->queue_lock); | ||
278 | |||
279 | tg = throtl_alloc_tg(td); | ||
280 | /* | ||
281 | * We might have slept in group allocation. Make sure queue is not | ||
282 | * dead | ||
283 | */ | ||
284 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | ||
285 | blk_put_queue(q); | ||
286 | if (tg) | ||
287 | kfree(tg); | ||
288 | |||
289 | return ERR_PTR(-ENODEV); | ||
290 | } | ||
291 | blk_put_queue(q); | ||
292 | |||
293 | /* Group allocated and queue is still alive. take the lock */ | ||
294 | spin_lock_irq(q->queue_lock); | ||
295 | |||
296 | /* | ||
297 | * Initialize the new group. After sleeping, read the blkcg again. | ||
298 | */ | ||
299 | rcu_read_lock(); | ||
300 | blkcg = task_blkio_cgroup(current); | ||
301 | |||
302 | /* | ||
303 | * If some other thread already allocated the group while we were | ||
304 | * not holding queue lock, free up the group | ||
305 | */ | ||
306 | __tg = throtl_find_tg(td, blkcg); | ||
307 | |||
308 | if (__tg) { | ||
309 | kfree(tg); | ||
310 | rcu_read_unlock(); | ||
311 | return __tg; | ||
312 | } | ||
313 | |||
314 | /* Group allocation failed. Account the IO to root group */ | ||
315 | if (!tg) { | ||
254 | tg = &td->root_tg; | 316 | tg = &td->root_tg; |
317 | return tg; | ||
318 | } | ||
319 | |||
320 | throtl_init_add_tg_lists(td, tg, blkcg); | ||
255 | rcu_read_unlock(); | 321 | rcu_read_unlock(); |
256 | return tg; | 322 | return tg; |
257 | } | 323 | } |
@@ -1014,6 +1080,15 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1014 | spin_lock_irq(q->queue_lock); | 1080 | spin_lock_irq(q->queue_lock); |
1015 | tg = throtl_get_tg(td); | 1081 | tg = throtl_get_tg(td); |
1016 | 1082 | ||
1083 | if (IS_ERR(tg)) { | ||
1084 | if (PTR_ERR(tg) == -ENODEV) { | ||
1085 | /* | ||
1086 | * Queue is gone. No queue lock held here. | ||
1087 | */ | ||
1088 | return -ENODEV; | ||
1089 | } | ||
1090 | } | ||
1091 | |||
1017 | if (tg->nr_queued[rw]) { | 1092 | if (tg->nr_queued[rw]) { |
1018 | /* | 1093 | /* |
1019 | * There is already another bio queued in same dir. No | 1094 | * There is already another bio queued in same dir. No |