aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:06 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:22 -0500
commitcd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch)
tree021881faedc1c2468730f9f54d364083e70dce76 /block/blk-throttle.c
parentf51b802c17e2a21926b29911493f5e7ddf6eee87 (diff)
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This patch factors out the common code into blkg_lookup_create(), which returns ERR_PTR value so that transitional failures due to queue bypass can be distinguished from other failures. * New plkio_policy_ops methods blkio_alloc_group_fn() and blkio_link_group_fn added. Both are transitional and will be removed once the blkg management code is fully moved into blk-cgroup.c. * blkio_alloc_group_fn() allocates policy-specific blkg which is usually a larger data structure with blkg as the first entry and intiailizes it. Note that initialization of blkg proper, including percpu stats, is responsibility of blk-cgroup proper. Note that default config (weight, bps...) initialization is done from this method; otherwise, we end up violating locking order between blkcg and q locks via blkcg_get_CONF() functions. * blkio_link_group_fn() is called under queue_lock and responsible for linking the blkg to the queue. blkcg side is handled by blk-cgroup proper. * The common blkg creation function is named blkg_lookup_create() and blkiocg_lookup_group() is renamed to blkg_lookup() for consistency. Also, throtl / cfq related functions are similarly [re]named for consistency. This simplifies blkcg policy implementations and enables further cleanup. -v2: Vivek noticed that blkg_lookup_create() incorrectly tested blk_queue_dead() instead of blk_queue_bypass() leading a user of the function ending up creating a new blkg on bypassing queue. This is a bug introduced while relocating bypass patches before this one. Fixed. -v3: ERR_PTR patch folded into this one. @for_root added to blkg_lookup_create() to allow creating root group on a bypassed queue during elevator switch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c155
1 files changed, 52 insertions, 103 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index aeeb798d1cda..2ae637b9e80c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg)
181 call_rcu(&tg->rcu_head, throtl_free_tg); 181 call_rcu(&tg->rcu_head, throtl_free_tg);
182} 182}
183 183
184static void throtl_init_group(struct throtl_grp *tg) 184static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
185 struct blkio_cgroup *blkcg)
185{ 186{
187 struct throtl_grp *tg;
188
189 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
190 if (!tg)
191 return NULL;
192
186 INIT_HLIST_NODE(&tg->tg_node); 193 INIT_HLIST_NODE(&tg->tg_node);
187 RB_CLEAR_NODE(&tg->rb_node); 194 RB_CLEAR_NODE(&tg->rb_node);
188 bio_list_init(&tg->bio_lists[0]); 195 bio_list_init(&tg->bio_lists[0]);
189 bio_list_init(&tg->bio_lists[1]); 196 bio_list_init(&tg->bio_lists[1]);
190 tg->limits_changed = false; 197 tg->limits_changed = false;
191 198
192 /* Practically unlimited BW */ 199 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
193 tg->bps[0] = tg->bps[1] = -1; 200 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
194 tg->iops[0] = tg->iops[1] = -1; 201 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
202 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
195 203
196 /* 204 /*
197 * Take the initial reference that will be released on destroy 205 * Take the initial reference that will be released on destroy
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg)
200 * exit or cgroup deletion path depending on who is exiting first. 208 * exit or cgroup deletion path depending on who is exiting first.
201 */ 209 */
202 atomic_set(&tg->ref, 1); 210 atomic_set(&tg->ref, 1);
203}
204 211
205/* Should be called with rcu read lock held (needed for blkcg) */ 212 return &tg->blkg;
206static void
207throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
208{
209 hlist_add_head(&tg->tg_node, &td->tg_list);
210 td->nr_undestroyed_grps++;
211} 213}
212 214
213static void 215static void
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
246 spin_unlock_irq(td->queue->queue_lock); 248 spin_unlock_irq(td->queue->queue_lock);
247} 249}
248 250
249static void throtl_init_add_tg_lists(struct throtl_data *td, 251static void throtl_link_blkio_group(struct request_queue *q,
250 struct throtl_grp *tg, struct blkio_cgroup *blkcg) 252 struct blkio_group *blkg)
251{ 253{
252 __throtl_tg_fill_dev_details(td, tg); 254 struct throtl_data *td = q->td;
253 255 struct throtl_grp *tg = tg_of_blkg(blkg);
254 /* Add group onto cgroup list */
255 blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
256 tg->blkg.dev, BLKIO_POLICY_THROTL);
257
258 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
259 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
260 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
261 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
262
263 throtl_add_group_to_td_list(td, tg);
264}
265
266/* Should be called without queue lock and outside of rcu period */
267static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
268{
269 struct throtl_grp *tg = NULL;
270 int ret;
271
272 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
273 if (!tg)
274 return NULL;
275
276 ret = blkio_alloc_blkg_stats(&tg->blkg);
277 256
278 if (ret) { 257 __throtl_tg_fill_dev_details(td, tg);
279 kfree(tg);
280 return NULL;
281 }
282 258
283 throtl_init_group(tg); 259 hlist_add_head(&tg->tg_node, &td->tg_list);
284 return tg; 260 td->nr_undestroyed_grps++;
285} 261}
286 262
287static struct 263static struct
288throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) 264throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
289{ 265{
290 struct throtl_grp *tg = NULL; 266 struct throtl_grp *tg = NULL;
291 267
292 /* 268 /*
293 * This is the common case when there are no blkio cgroups. 269 * This is the common case when there are no blkio cgroups.
294 * Avoid lookup in this case 270 * Avoid lookup in this case
295 */ 271 */
296 if (blkcg == &blkio_root_cgroup) 272 if (blkcg == &blkio_root_cgroup)
297 tg = td->root_tg; 273 tg = td->root_tg;
298 else 274 else
299 tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, 275 tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
300 BLKIO_POLICY_THROTL)); 276 BLKIO_POLICY_THROTL));
301 277
302 __throtl_tg_fill_dev_details(td, tg); 278 __throtl_tg_fill_dev_details(td, tg);
303 return tg; 279 return tg;
304} 280}
305 281
306static struct throtl_grp *throtl_get_tg(struct throtl_data *td, 282static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
307 struct blkio_cgroup *blkcg) 283 struct blkio_cgroup *blkcg)
308{ 284{
309 struct throtl_grp *tg = NULL, *__tg = NULL;
310 struct request_queue *q = td->queue; 285 struct request_queue *q = td->queue;
311 286 struct throtl_grp *tg = NULL;
312 /* no throttling for dead queue */
313 if (unlikely(blk_queue_bypass(q)))
314 return NULL;
315
316 tg = throtl_find_tg(td, blkcg);
317 if (tg)
318 return tg;
319
320 if (!css_tryget(&blkcg->css))
321 return NULL;
322
323 /*
324 * Need to allocate a group. Allocation of group also needs allocation
325 * of per cpu stats which in-turn takes a mutex() and can block. Hence
326 * we need to drop rcu lock and queue_lock before we call alloc.
327 */
328 spin_unlock_irq(q->queue_lock);
329 rcu_read_unlock();
330
331 tg = throtl_alloc_tg(td);
332
333 /* Group allocated and queue is still alive. take the lock */
334 rcu_read_lock();
335 spin_lock_irq(q->queue_lock);
336 css_put(&blkcg->css);
337
338 /* Make sure @q is still alive */
339 if (unlikely(blk_queue_bypass(q))) {
340 kfree(tg);
341 return NULL;
342 }
343 287
344 /* 288 /*
345 * If some other thread already allocated the group while we were 289 * This is the common case when there are no blkio cgroups.
346 * not holding queue lock, free up the group 290 * Avoid lookup in this case
347 */ 291 */
348 __tg = throtl_find_tg(td, blkcg); 292 if (blkcg == &blkio_root_cgroup) {
293 tg = td->root_tg;
294 } else {
295 struct blkio_group *blkg;
349 296
350 if (__tg) { 297 blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
351 kfree(tg);
352 return __tg;
353 }
354 298
355 /* Group allocation failed. Account the IO to root group */ 299 /* if %NULL and @q is alive, fall back to root_tg */
356 if (!tg) { 300 if (!IS_ERR(blkg))
357 tg = td->root_tg; 301 tg = tg_of_blkg(blkg);
358 return tg; 302 else if (!blk_queue_dead(q))
303 tg = td->root_tg;
359 } 304 }
360 305
361 throtl_init_add_tg_lists(td, tg, blkcg); 306 __throtl_tg_fill_dev_details(td, tg);
362 return tg; 307 return tg;
363} 308}
364 309
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
1107 1052
1108static struct blkio_policy_type blkio_policy_throtl = { 1053static struct blkio_policy_type blkio_policy_throtl = {
1109 .ops = { 1054 .ops = {
1055 .blkio_alloc_group_fn = throtl_alloc_blkio_group,
1056 .blkio_link_group_fn = throtl_link_blkio_group,
1110 .blkio_unlink_group_fn = throtl_unlink_blkio_group, 1057 .blkio_unlink_group_fn = throtl_unlink_blkio_group,
1111 .blkio_clear_queue_fn = throtl_clear_queue, 1058 .blkio_clear_queue_fn = throtl_clear_queue,
1112 .blkio_update_group_read_bps_fn = 1059 .blkio_update_group_read_bps_fn =
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1141 */ 1088 */
1142 rcu_read_lock(); 1089 rcu_read_lock();
1143 blkcg = task_blkio_cgroup(current); 1090 blkcg = task_blkio_cgroup(current);
1144 tg = throtl_find_tg(td, blkcg); 1091 tg = throtl_lookup_tg(td, blkcg);
1145 if (tg) { 1092 if (tg) {
1146 throtl_tg_fill_dev_details(td, tg); 1093 throtl_tg_fill_dev_details(td, tg);
1147 1094
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1157 * IO group 1104 * IO group
1158 */ 1105 */
1159 spin_lock_irq(q->queue_lock); 1106 spin_lock_irq(q->queue_lock);
1160 tg = throtl_get_tg(td, blkcg); 1107 tg = throtl_lookup_create_tg(td, blkcg);
1161 if (unlikely(!tg)) 1108 if (unlikely(!tg))
1162 goto out_unlock; 1109 goto out_unlock;
1163 1110
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q)
1252int blk_throtl_init(struct request_queue *q) 1199int blk_throtl_init(struct request_queue *q)
1253{ 1200{
1254 struct throtl_data *td; 1201 struct throtl_data *td;
1202 struct blkio_group *blkg;
1255 1203
1256 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); 1204 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
1257 if (!td) 1205 if (!td)
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q)
1262 td->limits_changed = false; 1210 td->limits_changed = false;
1263 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); 1211 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
1264 1212
1265 /* alloc and Init root group. */ 1213 q->td = td;
1266 td->queue = q; 1214 td->queue = q;
1267 1215
1216 /* alloc and init root group. */
1268 rcu_read_lock(); 1217 rcu_read_lock();
1269 spin_lock_irq(q->queue_lock); 1218 spin_lock_irq(q->queue_lock);
1270 1219
1271 td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); 1220 blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
1221 true);
1222 if (!IS_ERR(blkg))
1223 td->root_tg = tg_of_blkg(blkg);
1272 1224
1273 spin_unlock_irq(q->queue_lock); 1225 spin_unlock_irq(q->queue_lock);
1274 rcu_read_unlock(); 1226 rcu_read_unlock();
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q)
1277 kfree(td); 1229 kfree(td);
1278 return -ENOMEM; 1230 return -ENOMEM;
1279 } 1231 }
1280
1281 /* Attach throtl data to request queue */
1282 q->td = td;
1283 return 0; 1232 return 0;
1284} 1233}
1285 1234