aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c313
1 files changed, 237 insertions, 76 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 252a81a306f7..a62be8d0dc1b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -78,6 +78,8 @@ struct throtl_grp {
78 78
79 /* Some throttle limits got updated for the group */ 79 /* Some throttle limits got updated for the group */
80 int limits_changed; 80 int limits_changed;
81
82 struct rcu_head rcu_head;
81}; 83};
82 84
83struct throtl_data 85struct throtl_data
@@ -88,7 +90,7 @@ struct throtl_data
88 /* service tree for active throtl groups */ 90 /* service tree for active throtl groups */
89 struct throtl_rb_root tg_service_tree; 91 struct throtl_rb_root tg_service_tree;
90 92
91 struct throtl_grp root_tg; 93 struct throtl_grp *root_tg;
92 struct request_queue *queue; 94 struct request_queue *queue;
93 95
94 /* Total Number of queued bios on READ and WRITE lists */ 96 /* Total Number of queued bios on READ and WRITE lists */
@@ -151,56 +153,44 @@ static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
151 return tg; 153 return tg;
152} 154}
153 155
154static void throtl_put_tg(struct throtl_grp *tg) 156static void throtl_free_tg(struct rcu_head *head)
155{ 157{
156 BUG_ON(atomic_read(&tg->ref) <= 0); 158 struct throtl_grp *tg;
157 if (!atomic_dec_and_test(&tg->ref)) 159
158 return; 160 tg = container_of(head, struct throtl_grp, rcu_head);
161 free_percpu(tg->blkg.stats_cpu);
159 kfree(tg); 162 kfree(tg);
160} 163}
161 164
162static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, 165static void throtl_put_tg(struct throtl_grp *tg)
163 struct blkio_cgroup *blkcg)
164{ 166{
165 struct throtl_grp *tg = NULL; 167 BUG_ON(atomic_read(&tg->ref) <= 0);
166 void *key = td; 168 if (!atomic_dec_and_test(&tg->ref))
167 struct backing_dev_info *bdi = &td->queue->backing_dev_info; 169 return;
168 unsigned int major, minor;
169 170
170 /* 171 /*
171 * TODO: Speed up blkiocg_lookup_group() by maintaining a radix 172 * A group is freed in rcu manner. But having an rcu lock does not
172 * tree of blkg (instead of traversing through hash list all 173 * mean that one can access all the fields of blkg and assume these
173 * the time. 174 * are valid. For example, don't try to follow throtl_data and
175 * request queue links.
176 *
177 * Having a reference to blkg under an rcu allows acess to only
178 * values local to groups like group stats and group rate limits
174 */ 179 */
180 call_rcu(&tg->rcu_head, throtl_free_tg);
181}
175 182
176 /* 183static void throtl_init_group(struct throtl_grp *tg)
177 * This is the common case when there are no blkio cgroups. 184{
178 * Avoid lookup in this case
179 */
180 if (blkcg == &blkio_root_cgroup)
181 tg = &td->root_tg;
182 else
183 tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
184
185 /* Fill in device details for root group */
186 if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
187 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
188 tg->blkg.dev = MKDEV(major, minor);
189 goto done;
190 }
191
192 if (tg)
193 goto done;
194
195 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
196 if (!tg)
197 goto done;
198
199 INIT_HLIST_NODE(&tg->tg_node); 185 INIT_HLIST_NODE(&tg->tg_node);
200 RB_CLEAR_NODE(&tg->rb_node); 186 RB_CLEAR_NODE(&tg->rb_node);
201 bio_list_init(&tg->bio_lists[0]); 187 bio_list_init(&tg->bio_lists[0]);
202 bio_list_init(&tg->bio_lists[1]); 188 bio_list_init(&tg->bio_lists[1]);
203 td->limits_changed = false; 189 tg->limits_changed = false;
190
191 /* Practically unlimited BW */
192 tg->bps[0] = tg->bps[1] = -1;
193 tg->iops[0] = tg->iops[1] = -1;
204 194
205 /* 195 /*
206 * Take the initial reference that will be released on destroy 196 * Take the initial reference that will be released on destroy
@@ -209,33 +199,181 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
209 * exit or cgroup deletion path depending on who is exiting first. 199 * exit or cgroup deletion path depending on who is exiting first.
210 */ 200 */
211 atomic_set(&tg->ref, 1); 201 atomic_set(&tg->ref, 1);
202}
203
204/* Should be called with rcu read lock held (needed for blkcg) */
205static void
206throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
207{
208 hlist_add_head(&tg->tg_node, &td->tg_list);
209 td->nr_undestroyed_grps++;
210}
211
212static void
213__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
214{
215 struct backing_dev_info *bdi = &td->queue->backing_dev_info;
216 unsigned int major, minor;
217
218 if (!tg || tg->blkg.dev)
219 return;
220
221 /*
222 * Fill in device details for a group which might not have been
223 * filled at group creation time as queue was being instantiated
224 * and driver had not attached a device yet
225 */
226 if (bdi->dev && dev_name(bdi->dev)) {
227 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
228 tg->blkg.dev = MKDEV(major, minor);
229 }
230}
231
232/*
233 * Should be called with without queue lock held. Here queue lock will be
234 * taken rarely. It will be taken only once during life time of a group
235 * if need be
236 */
237static void
238throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
239{
240 if (!tg || tg->blkg.dev)
241 return;
242
243 spin_lock_irq(td->queue->queue_lock);
244 __throtl_tg_fill_dev_details(td, tg);
245 spin_unlock_irq(td->queue->queue_lock);
246}
247
248static void throtl_init_add_tg_lists(struct throtl_data *td,
249 struct throtl_grp *tg, struct blkio_cgroup *blkcg)
250{
251 __throtl_tg_fill_dev_details(td, tg);
212 252
213 /* Add group onto cgroup list */ 253 /* Add group onto cgroup list */
214 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
215 blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td, 254 blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td,
216 MKDEV(major, minor), BLKIO_POLICY_THROTL); 255 tg->blkg.dev, BLKIO_POLICY_THROTL);
217 256
218 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); 257 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
219 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); 258 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
220 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); 259 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
221 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); 260 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
222 261
223 hlist_add_head(&tg->tg_node, &td->tg_list); 262 throtl_add_group_to_td_list(td, tg);
224 td->nr_undestroyed_grps++; 263}
225done: 264
265/* Should be called without queue lock and outside of rcu period */
266static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
267{
268 struct throtl_grp *tg = NULL;
269 int ret;
270
271 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
272 if (!tg)
273 return NULL;
274
275 ret = blkio_alloc_blkg_stats(&tg->blkg);
276
277 if (ret) {
278 kfree(tg);
279 return NULL;
280 }
281
282 throtl_init_group(tg);
226 return tg; 283 return tg;
227} 284}
228 285
229static struct throtl_grp * throtl_get_tg(struct throtl_data *td) 286static struct
287throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
230{ 288{
231 struct throtl_grp *tg = NULL; 289 struct throtl_grp *tg = NULL;
290 void *key = td;
291
292 /*
293 * This is the common case when there are no blkio cgroups.
294 * Avoid lookup in this case
295 */
296 if (blkcg == &blkio_root_cgroup)
297 tg = td->root_tg;
298 else
299 tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key));
300
301 __throtl_tg_fill_dev_details(td, tg);
302 return tg;
303}
304
305/*
306 * This function returns with queue lock unlocked in case of error, like
307 * request queue is no more
308 */
309static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
310{
311 struct throtl_grp *tg = NULL, *__tg = NULL;
232 struct blkio_cgroup *blkcg; 312 struct blkio_cgroup *blkcg;
313 struct request_queue *q = td->queue;
233 314
234 rcu_read_lock(); 315 rcu_read_lock();
235 blkcg = task_blkio_cgroup(current); 316 blkcg = task_blkio_cgroup(current);
236 tg = throtl_find_alloc_tg(td, blkcg); 317 tg = throtl_find_tg(td, blkcg);
237 if (!tg) 318 if (tg) {
238 tg = &td->root_tg; 319 rcu_read_unlock();
320 return tg;
321 }
322
323 /*
324 * Need to allocate a group. Allocation of group also needs allocation
325 * of per cpu stats which in-turn takes a mutex() and can block. Hence
326 * we need to drop rcu lock and queue_lock before we call alloc
327 *
328 * Take the request queue reference to make sure queue does not
329 * go away once we return from allocation.
330 */
331 blk_get_queue(q);
332 rcu_read_unlock();
333 spin_unlock_irq(q->queue_lock);
334
335 tg = throtl_alloc_tg(td);
336 /*
337 * We might have slept in group allocation. Make sure queue is not
338 * dead
339 */
340 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
341 blk_put_queue(q);
342 if (tg)
343 kfree(tg);
344
345 return ERR_PTR(-ENODEV);
346 }
347 blk_put_queue(q);
348
349 /* Group allocated and queue is still alive. take the lock */
350 spin_lock_irq(q->queue_lock);
351
352 /*
353 * Initialize the new group. After sleeping, read the blkcg again.
354 */
355 rcu_read_lock();
356 blkcg = task_blkio_cgroup(current);
357
358 /*
359 * If some other thread already allocated the group while we were
360 * not holding queue lock, free up the group
361 */
362 __tg = throtl_find_tg(td, blkcg);
363
364 if (__tg) {
365 kfree(tg);
366 rcu_read_unlock();
367 return __tg;
368 }
369
370 /* Group allocation failed. Account the IO to root group */
371 if (!tg) {
372 tg = td->root_tg;
373 return tg;
374 }
375
376 throtl_init_add_tg_lists(td, tg, blkcg);
239 rcu_read_unlock(); 377 rcu_read_unlock();
240 return tg; 378 return tg;
241} 379}
@@ -544,6 +682,12 @@ static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg,
544 return 0; 682 return 0;
545} 683}
546 684
685static bool tg_no_rule_group(struct throtl_grp *tg, bool rw) {
686 if (tg->bps[rw] == -1 && tg->iops[rw] == -1)
687 return 1;
688 return 0;
689}
690
547/* 691/*
548 * Returns whether one can dispatch a bio or not. Also returns approx number 692 * Returns whether one can dispatch a bio or not. Also returns approx number
549 * of jiffies to wait before this bio is with-in IO rate and can be dispatched 693 * of jiffies to wait before this bio is with-in IO rate and can be dispatched
@@ -608,10 +752,6 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
608 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
609 tg->io_disp[rw]++; 753 tg->io_disp[rw]++;
610 754
611 /*
612 * TODO: This will take blkg->stats_lock. Figure out a way
613 * to avoid this cost.
614 */
615 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync); 755 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync);
616} 756}
617 757
@@ -989,15 +1129,51 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
989 struct throtl_grp *tg; 1129 struct throtl_grp *tg;
990 struct bio *bio = *biop; 1130 struct bio *bio = *biop;
991 bool rw = bio_data_dir(bio), update_disptime = true; 1131 bool rw = bio_data_dir(bio), update_disptime = true;
1132 struct blkio_cgroup *blkcg;
992 1133
993 if (bio->bi_rw & REQ_THROTTLED) { 1134 if (bio->bi_rw & REQ_THROTTLED) {
994 bio->bi_rw &= ~REQ_THROTTLED; 1135 bio->bi_rw &= ~REQ_THROTTLED;
995 return 0; 1136 return 0;
996 } 1137 }
997 1138
1139 /*
1140 * A throtl_grp pointer retrieved under rcu can be used to access
1141 * basic fields like stats and io rates. If a group has no rules,
1142 * just update the dispatch stats in lockless manner and return.
1143 */
1144
1145 rcu_read_lock();
1146 blkcg = task_blkio_cgroup(current);
1147 tg = throtl_find_tg(td, blkcg);
1148 if (tg) {
1149 throtl_tg_fill_dev_details(td, tg);
1150
1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC);
1154 rcu_read_unlock();
1155 return 0;
1156 }
1157 }
1158 rcu_read_unlock();
1159
1160 /*
1161 * Either group has not been allocated yet or it is not an unlimited
1162 * IO group
1163 */
1164
998 spin_lock_irq(q->queue_lock); 1165 spin_lock_irq(q->queue_lock);
999 tg = throtl_get_tg(td); 1166 tg = throtl_get_tg(td);
1000 1167
1168 if (IS_ERR(tg)) {
1169 if (PTR_ERR(tg) == -ENODEV) {
1170 /*
1171 * Queue is gone. No queue lock held here.
1172 */
1173 return -ENODEV;
1174 }
1175 }
1176
1001 if (tg->nr_queued[rw]) { 1177 if (tg->nr_queued[rw]) {
1002 /* 1178 /*
1003 * There is already another bio queued in same dir. No 1179 * There is already another bio queued in same dir. No
@@ -1060,39 +1236,24 @@ int blk_throtl_init(struct request_queue *q)
1060 INIT_HLIST_HEAD(&td->tg_list); 1236 INIT_HLIST_HEAD(&td->tg_list);
1061 td->tg_service_tree = THROTL_RB_ROOT; 1237 td->tg_service_tree = THROTL_RB_ROOT;
1062 td->limits_changed = false; 1238 td->limits_changed = false;
1239 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
1063 1240
1064 /* Init root group */ 1241 /* alloc and Init root group. */
1065 tg = &td->root_tg; 1242 td->queue = q;
1066 INIT_HLIST_NODE(&tg->tg_node); 1243 tg = throtl_alloc_tg(td);
1067 RB_CLEAR_NODE(&tg->rb_node);
1068 bio_list_init(&tg->bio_lists[0]);
1069 bio_list_init(&tg->bio_lists[1]);
1070
1071 /* Practically unlimited BW */
1072 tg->bps[0] = tg->bps[1] = -1;
1073 tg->iops[0] = tg->iops[1] = -1;
1074 td->limits_changed = false;
1075 1244
1076 /* 1245 if (!tg) {
1077 * Set root group reference to 2. One reference will be dropped when 1246 kfree(td);
1078 * all groups on tg_list are being deleted during queue exit. Other 1247 return -ENOMEM;
1079 * reference will remain there as we don't want to delete this group 1248 }
1080 * as it is statically allocated and gets destroyed when throtl_data
1081 * goes away.
1082 */
1083 atomic_set(&tg->ref, 2);
1084 hlist_add_head(&tg->tg_node, &td->tg_list);
1085 td->nr_undestroyed_grps++;
1086 1249
1087 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); 1250 td->root_tg = tg;
1088 1251
1089 rcu_read_lock(); 1252 rcu_read_lock();
1090 blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td, 1253 throtl_init_add_tg_lists(td, tg, &blkio_root_cgroup);
1091 0, BLKIO_POLICY_THROTL);
1092 rcu_read_unlock(); 1254 rcu_read_unlock();
1093 1255
1094 /* Attach throtl data to request queue */ 1256 /* Attach throtl data to request queue */
1095 td->queue = q;
1096 q->td = td; 1257 q->td = td;
1097 return 0; 1258 return 0;
1098} 1259}