diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:06 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:22 -0500 |
commit | cd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch) | |
tree | 021881faedc1c2468730f9f54d364083e70dce76 /block/blk-throttle.c | |
parent | f51b802c17e2a21926b29911493f5e7ddf6eee87 (diff) |
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.
* New plkio_policy_ops methods blkio_alloc_group_fn() and
blkio_link_group_fn added. Both are transitional and will be
removed once the blkg management code is fully moved into
blk-cgroup.c.
* blkio_alloc_group_fn() allocates policy-specific blkg which is
usually a larger data structure with blkg as the first entry and
intiailizes it. Note that initialization of blkg proper, including
percpu stats, is responsibility of blk-cgroup proper.
Note that default config (weight, bps...) initialization is done
from this method; otherwise, we end up violating locking order
between blkcg and q locks via blkcg_get_CONF() functions.
* blkio_link_group_fn() is called under queue_lock and responsible for
linking the blkg to the queue. blkcg side is handled by blk-cgroup
proper.
* The common blkg creation function is named blkg_lookup_create() and
blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
Also, throtl / cfq related functions are similarly [re]named for
consistency.
This simplifies blkcg policy implementations and enables further
cleanup.
-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
blk_queue_dead() instead of blk_queue_bypass() leading a user of
the function ending up creating a new blkg on bypassing queue.
This is a bug introduced while relocating bypass patches before
this one. Fixed.
-v3: ERR_PTR patch folded into this one. @for_root added to
blkg_lookup_create() to allow creating root group on a bypassed
queue during elevator switch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 155 |
1 files changed, 52 insertions, 103 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index aeeb798d1cda..2ae637b9e80c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg) | |||
181 | call_rcu(&tg->rcu_head, throtl_free_tg); | 181 | call_rcu(&tg->rcu_head, throtl_free_tg); |
182 | } | 182 | } |
183 | 183 | ||
184 | static void throtl_init_group(struct throtl_grp *tg) | 184 | static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, |
185 | struct blkio_cgroup *blkcg) | ||
185 | { | 186 | { |
187 | struct throtl_grp *tg; | ||
188 | |||
189 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node); | ||
190 | if (!tg) | ||
191 | return NULL; | ||
192 | |||
186 | INIT_HLIST_NODE(&tg->tg_node); | 193 | INIT_HLIST_NODE(&tg->tg_node); |
187 | RB_CLEAR_NODE(&tg->rb_node); | 194 | RB_CLEAR_NODE(&tg->rb_node); |
188 | bio_list_init(&tg->bio_lists[0]); | 195 | bio_list_init(&tg->bio_lists[0]); |
189 | bio_list_init(&tg->bio_lists[1]); | 196 | bio_list_init(&tg->bio_lists[1]); |
190 | tg->limits_changed = false; | 197 | tg->limits_changed = false; |
191 | 198 | ||
192 | /* Practically unlimited BW */ | 199 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); |
193 | tg->bps[0] = tg->bps[1] = -1; | 200 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); |
194 | tg->iops[0] = tg->iops[1] = -1; | 201 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); |
202 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
195 | 203 | ||
196 | /* | 204 | /* |
197 | * Take the initial reference that will be released on destroy | 205 | * Take the initial reference that will be released on destroy |
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg) | |||
200 | * exit or cgroup deletion path depending on who is exiting first. | 208 | * exit or cgroup deletion path depending on who is exiting first. |
201 | */ | 209 | */ |
202 | atomic_set(&tg->ref, 1); | 210 | atomic_set(&tg->ref, 1); |
203 | } | ||
204 | 211 | ||
205 | /* Should be called with rcu read lock held (needed for blkcg) */ | 212 | return &tg->blkg; |
206 | static void | ||
207 | throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg) | ||
208 | { | ||
209 | hlist_add_head(&tg->tg_node, &td->tg_list); | ||
210 | td->nr_undestroyed_grps++; | ||
211 | } | 213 | } |
212 | 214 | ||
213 | static void | 215 | static void |
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg) | |||
246 | spin_unlock_irq(td->queue->queue_lock); | 248 | spin_unlock_irq(td->queue->queue_lock); |
247 | } | 249 | } |
248 | 250 | ||
249 | static void throtl_init_add_tg_lists(struct throtl_data *td, | 251 | static void throtl_link_blkio_group(struct request_queue *q, |
250 | struct throtl_grp *tg, struct blkio_cgroup *blkcg) | 252 | struct blkio_group *blkg) |
251 | { | 253 | { |
252 | __throtl_tg_fill_dev_details(td, tg); | 254 | struct throtl_data *td = q->td; |
253 | 255 | struct throtl_grp *tg = tg_of_blkg(blkg); | |
254 | /* Add group onto cgroup list */ | ||
255 | blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, | ||
256 | tg->blkg.dev, BLKIO_POLICY_THROTL); | ||
257 | |||
258 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | ||
259 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | ||
260 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | ||
261 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
262 | |||
263 | throtl_add_group_to_td_list(td, tg); | ||
264 | } | ||
265 | |||
266 | /* Should be called without queue lock and outside of rcu period */ | ||
267 | static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td) | ||
268 | { | ||
269 | struct throtl_grp *tg = NULL; | ||
270 | int ret; | ||
271 | |||
272 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); | ||
273 | if (!tg) | ||
274 | return NULL; | ||
275 | |||
276 | ret = blkio_alloc_blkg_stats(&tg->blkg); | ||
277 | 256 | ||
278 | if (ret) { | 257 | __throtl_tg_fill_dev_details(td, tg); |
279 | kfree(tg); | ||
280 | return NULL; | ||
281 | } | ||
282 | 258 | ||
283 | throtl_init_group(tg); | 259 | hlist_add_head(&tg->tg_node, &td->tg_list); |
284 | return tg; | 260 | td->nr_undestroyed_grps++; |
285 | } | 261 | } |
286 | 262 | ||
287 | static struct | 263 | static struct |
288 | throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | 264 | throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) |
289 | { | 265 | { |
290 | struct throtl_grp *tg = NULL; | 266 | struct throtl_grp *tg = NULL; |
291 | 267 | ||
292 | /* | 268 | /* |
293 | * This is the common case when there are no blkio cgroups. | 269 | * This is the common case when there are no blkio cgroups. |
294 | * Avoid lookup in this case | 270 | * Avoid lookup in this case |
295 | */ | 271 | */ |
296 | if (blkcg == &blkio_root_cgroup) | 272 | if (blkcg == &blkio_root_cgroup) |
297 | tg = td->root_tg; | 273 | tg = td->root_tg; |
298 | else | 274 | else |
299 | tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, | 275 | tg = tg_of_blkg(blkg_lookup(blkcg, td->queue, |
300 | BLKIO_POLICY_THROTL)); | 276 | BLKIO_POLICY_THROTL)); |
301 | 277 | ||
302 | __throtl_tg_fill_dev_details(td, tg); | 278 | __throtl_tg_fill_dev_details(td, tg); |
303 | return tg; | 279 | return tg; |
304 | } | 280 | } |
305 | 281 | ||
306 | static struct throtl_grp *throtl_get_tg(struct throtl_data *td, | 282 | static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, |
307 | struct blkio_cgroup *blkcg) | 283 | struct blkio_cgroup *blkcg) |
308 | { | 284 | { |
309 | struct throtl_grp *tg = NULL, *__tg = NULL; | ||
310 | struct request_queue *q = td->queue; | 285 | struct request_queue *q = td->queue; |
311 | 286 | struct throtl_grp *tg = NULL; | |
312 | /* no throttling for dead queue */ | ||
313 | if (unlikely(blk_queue_bypass(q))) | ||
314 | return NULL; | ||
315 | |||
316 | tg = throtl_find_tg(td, blkcg); | ||
317 | if (tg) | ||
318 | return tg; | ||
319 | |||
320 | if (!css_tryget(&blkcg->css)) | ||
321 | return NULL; | ||
322 | |||
323 | /* | ||
324 | * Need to allocate a group. Allocation of group also needs allocation | ||
325 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
326 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
327 | */ | ||
328 | spin_unlock_irq(q->queue_lock); | ||
329 | rcu_read_unlock(); | ||
330 | |||
331 | tg = throtl_alloc_tg(td); | ||
332 | |||
333 | /* Group allocated and queue is still alive. take the lock */ | ||
334 | rcu_read_lock(); | ||
335 | spin_lock_irq(q->queue_lock); | ||
336 | css_put(&blkcg->css); | ||
337 | |||
338 | /* Make sure @q is still alive */ | ||
339 | if (unlikely(blk_queue_bypass(q))) { | ||
340 | kfree(tg); | ||
341 | return NULL; | ||
342 | } | ||
343 | 287 | ||
344 | /* | 288 | /* |
345 | * If some other thread already allocated the group while we were | 289 | * This is the common case when there are no blkio cgroups. |
346 | * not holding queue lock, free up the group | 290 | * Avoid lookup in this case |
347 | */ | 291 | */ |
348 | __tg = throtl_find_tg(td, blkcg); | 292 | if (blkcg == &blkio_root_cgroup) { |
293 | tg = td->root_tg; | ||
294 | } else { | ||
295 | struct blkio_group *blkg; | ||
349 | 296 | ||
350 | if (__tg) { | 297 | blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false); |
351 | kfree(tg); | ||
352 | return __tg; | ||
353 | } | ||
354 | 298 | ||
355 | /* Group allocation failed. Account the IO to root group */ | 299 | /* if %NULL and @q is alive, fall back to root_tg */ |
356 | if (!tg) { | 300 | if (!IS_ERR(blkg)) |
357 | tg = td->root_tg; | 301 | tg = tg_of_blkg(blkg); |
358 | return tg; | 302 | else if (!blk_queue_dead(q)) |
303 | tg = td->root_tg; | ||
359 | } | 304 | } |
360 | 305 | ||
361 | throtl_init_add_tg_lists(td, tg, blkcg); | 306 | __throtl_tg_fill_dev_details(td, tg); |
362 | return tg; | 307 | return tg; |
363 | } | 308 | } |
364 | 309 | ||
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q) | |||
1107 | 1052 | ||
1108 | static struct blkio_policy_type blkio_policy_throtl = { | 1053 | static struct blkio_policy_type blkio_policy_throtl = { |
1109 | .ops = { | 1054 | .ops = { |
1055 | .blkio_alloc_group_fn = throtl_alloc_blkio_group, | ||
1056 | .blkio_link_group_fn = throtl_link_blkio_group, | ||
1110 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, | 1057 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, |
1111 | .blkio_clear_queue_fn = throtl_clear_queue, | 1058 | .blkio_clear_queue_fn = throtl_clear_queue, |
1112 | .blkio_update_group_read_bps_fn = | 1059 | .blkio_update_group_read_bps_fn = |
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
1141 | */ | 1088 | */ |
1142 | rcu_read_lock(); | 1089 | rcu_read_lock(); |
1143 | blkcg = task_blkio_cgroup(current); | 1090 | blkcg = task_blkio_cgroup(current); |
1144 | tg = throtl_find_tg(td, blkcg); | 1091 | tg = throtl_lookup_tg(td, blkcg); |
1145 | if (tg) { | 1092 | if (tg) { |
1146 | throtl_tg_fill_dev_details(td, tg); | 1093 | throtl_tg_fill_dev_details(td, tg); |
1147 | 1094 | ||
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
1157 | * IO group | 1104 | * IO group |
1158 | */ | 1105 | */ |
1159 | spin_lock_irq(q->queue_lock); | 1106 | spin_lock_irq(q->queue_lock); |
1160 | tg = throtl_get_tg(td, blkcg); | 1107 | tg = throtl_lookup_create_tg(td, blkcg); |
1161 | if (unlikely(!tg)) | 1108 | if (unlikely(!tg)) |
1162 | goto out_unlock; | 1109 | goto out_unlock; |
1163 | 1110 | ||
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q) | |||
1252 | int blk_throtl_init(struct request_queue *q) | 1199 | int blk_throtl_init(struct request_queue *q) |
1253 | { | 1200 | { |
1254 | struct throtl_data *td; | 1201 | struct throtl_data *td; |
1202 | struct blkio_group *blkg; | ||
1255 | 1203 | ||
1256 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); | 1204 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); |
1257 | if (!td) | 1205 | if (!td) |
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q) | |||
1262 | td->limits_changed = false; | 1210 | td->limits_changed = false; |
1263 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); | 1211 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); |
1264 | 1212 | ||
1265 | /* alloc and Init root group. */ | 1213 | q->td = td; |
1266 | td->queue = q; | 1214 | td->queue = q; |
1267 | 1215 | ||
1216 | /* alloc and init root group. */ | ||
1268 | rcu_read_lock(); | 1217 | rcu_read_lock(); |
1269 | spin_lock_irq(q->queue_lock); | 1218 | spin_lock_irq(q->queue_lock); |
1270 | 1219 | ||
1271 | td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); | 1220 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, |
1221 | true); | ||
1222 | if (!IS_ERR(blkg)) | ||
1223 | td->root_tg = tg_of_blkg(blkg); | ||
1272 | 1224 | ||
1273 | spin_unlock_irq(q->queue_lock); | 1225 | spin_unlock_irq(q->queue_lock); |
1274 | rcu_read_unlock(); | 1226 | rcu_read_unlock(); |
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q) | |||
1277 | kfree(td); | 1229 | kfree(td); |
1278 | return -ENOMEM; | 1230 | return -ENOMEM; |
1279 | } | 1231 | } |
1280 | |||
1281 | /* Attach throtl data to request queue */ | ||
1282 | q->td = td; | ||
1283 | return 0; | 1232 | return 0; |
1284 | } | 1233 | } |
1285 | 1234 | ||