diff options
author | Tejun Heo <tj@kernel.org> | 2012-06-26 18:05:44 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-06-26 18:42:49 -0400 |
commit | a051661ca6d134c18599498b185b667859d4339b (patch) | |
tree | 9d840030874aed9b97a58051bf9568455126e8e8 /block/blk-cgroup.h | |
parent | 5b788ce3e2acac9bf109743b1281d77347cf2101 (diff) |
blkcg: implement per-blkg request allocation
Currently, request_queue has one request_list to allocate requests
from regardless of blkcg of the IO being issued. When the unified
request pool is used up, cfq proportional IO limits become meaningless
- whoever grabs the next request being freed wins the race regardless
of the configured weights.
This can be easily demonstrated by creating a blkio cgroup w/ very low
weight, put a program which can issue a lot of random direct IOs there
and running a sequential IO from a different cgroup. As soon as the
request pool is used up, the sequential IO bandwidth crashes.
This patch implements per-blkg request_list. Each blkg has its own
request_list and any IO allocates its request from the matching blkg
making blkcgs completely isolated in terms of request allocation.
* Root blkcg uses the request_list embedded in each request_queue,
which was renamed to @q->root_rl from @q->rq. While making blkcg rl
handling a bit harier, this enables avoiding most overhead for root
blkcg.
* Queue fullness is properly per request_list but bdi isn't blkcg
aware yet, so congestion state currently just follows the root
blkcg. As writeback isn't aware of blkcg yet, this works okay for
async congestion but readahead may get the wrong signals. It's
better than blkcg completely collapsing with shared request_list but
needs to be improved with future changes.
* After this change, each block cgroup gets a full request pool making
resource consumption of each cgroup higher. This makes allowing
non-root users to create cgroups less desirable; however, note that
allowing non-root users to directly manage cgroups is already
severely broken regardless of this patch - each block cgroup
consumes kernel memory and skews IO weight (IO weights are not
hierarchical).
v2: queue-sysfs.txt updated and patch description udpated as suggested
by Vivek.
v3: blk_get_rl() wasn't checking error return from
blkg_lookup_create() and may cause oops on lookup failure. Fix it
by falling back to root_rl on blkg lookup failures. This problem
was spotted by Rakesh Iyer <rni@google.com>.
v4: Updated to accomodate 458f27a982 "block: Avoid missed wakeup in
request waitqueue". blk_drain_queue() now wakes up waiters on all
blkg->rl on the target queue.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.h')
-rw-r--r-- | block/blk-cgroup.h | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index e74cce1fbac9..24597309e23d 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/u64_stats_sync.h> | 17 | #include <linux/u64_stats_sync.h> |
18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
20 | #include <linux/blkdev.h> | ||
20 | 21 | ||
21 | /* Max limits for throttle policy */ | 22 | /* Max limits for throttle policy */ |
22 | #define THROTL_IOPS_MAX UINT_MAX | 23 | #define THROTL_IOPS_MAX UINT_MAX |
@@ -93,6 +94,8 @@ struct blkcg_gq { | |||
93 | struct list_head q_node; | 94 | struct list_head q_node; |
94 | struct hlist_node blkcg_node; | 95 | struct hlist_node blkcg_node; |
95 | struct blkcg *blkcg; | 96 | struct blkcg *blkcg; |
97 | /* request allocation list for this blkcg-q pair */ | ||
98 | struct request_list rl; | ||
96 | /* reference count */ | 99 | /* reference count */ |
97 | int refcnt; | 100 | int refcnt; |
98 | 101 | ||
@@ -251,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg) | |||
251 | } | 254 | } |
252 | 255 | ||
253 | /** | 256 | /** |
257 | * blk_get_rl - get request_list to use | ||
258 | * @q: request_queue of interest | ||
259 | * @bio: bio which will be attached to the allocated request (may be %NULL) | ||
260 | * | ||
261 | * The caller wants to allocate a request from @q to use for @bio. Find | ||
262 | * the request_list to use and obtain a reference on it. Should be called | ||
263 | * under queue_lock. This function is guaranteed to return non-%NULL | ||
264 | * request_list. | ||
265 | */ | ||
266 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
267 | struct bio *bio) | ||
268 | { | ||
269 | struct blkcg *blkcg; | ||
270 | struct blkcg_gq *blkg; | ||
271 | |||
272 | rcu_read_lock(); | ||
273 | |||
274 | blkcg = bio_blkcg(bio); | ||
275 | |||
276 | /* bypass blkg lookup and use @q->root_rl directly for root */ | ||
277 | if (blkcg == &blkcg_root) | ||
278 | goto root_rl; | ||
279 | |||
280 | /* | ||
281 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | ||
282 | * or if either the blkcg or queue is going away. Fall back to | ||
283 | * root_rl in such cases. | ||
284 | */ | ||
285 | blkg = blkg_lookup_create(blkcg, q); | ||
286 | if (unlikely(IS_ERR(blkg))) | ||
287 | goto root_rl; | ||
288 | |||
289 | blkg_get(blkg); | ||
290 | rcu_read_unlock(); | ||
291 | return &blkg->rl; | ||
292 | root_rl: | ||
293 | rcu_read_unlock(); | ||
294 | return &q->root_rl; | ||
295 | } | ||
296 | |||
297 | /** | ||
298 | * blk_put_rl - put request_list | ||
299 | * @rl: request_list to put | ||
300 | * | ||
301 | * Put the reference acquired by blk_get_rl(). Should be called under | ||
302 | * queue_lock. | ||
303 | */ | ||
304 | static inline void blk_put_rl(struct request_list *rl) | ||
305 | { | ||
306 | /* root_rl may not have blkg set */ | ||
307 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | ||
308 | blkg_put(rl->blkg); | ||
309 | } | ||
310 | |||
311 | /** | ||
312 | * blk_rq_set_rl - associate a request with a request_list | ||
313 | * @rq: request of interest | ||
314 | * @rl: target request_list | ||
315 | * | ||
316 | * Associate @rq with @rl so that accounting and freeing can know the | ||
317 | * request_list @rq came from. | ||
318 | */ | ||
319 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) | ||
320 | { | ||
321 | rq->rl = rl; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * blk_rq_rl - return the request_list a request came from | ||
326 | * @rq: request of interest | ||
327 | * | ||
328 | * Return the request_list @rq is allocated from. | ||
329 | */ | ||
330 | static inline struct request_list *blk_rq_rl(struct request *rq) | ||
331 | { | ||
332 | return rq->rl; | ||
333 | } | ||
334 | |||
335 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
336 | struct request_queue *q); | ||
337 | /** | ||
338 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue | ||
339 | * | ||
340 | * Should be used under queue_lock. | ||
341 | */ | ||
342 | #define blk_queue_for_each_rl(rl, q) \ | ||
343 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | ||
344 | |||
345 | /** | ||
254 | * blkg_stat_add - add a value to a blkg_stat | 346 | * blkg_stat_add - add a value to a blkg_stat |
255 | * @stat: target blkg_stat | 347 | * @stat: target blkg_stat |
256 | * @val: value to add | 348 | * @val: value to add |
@@ -392,6 +484,7 @@ static inline void blkcg_deactivate_policy(struct request_queue *q, | |||
392 | 484 | ||
393 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | 485 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } |
394 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | 486 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
487 | |||
395 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 488 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
396 | struct blkcg_policy *pol) { return NULL; } | 489 | struct blkcg_policy *pol) { return NULL; } |
397 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | 490 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
@@ -399,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | |||
399 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 492 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
400 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 493 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
401 | 494 | ||
495 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
496 | struct bio *bio) { return &q->root_rl; } | ||
497 | static inline void blk_put_rl(struct request_list *rl) { } | ||
498 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | ||
499 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | ||
500 | |||
501 | #define blk_queue_for_each_rl(rl, q) \ | ||
502 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | ||
503 | |||
402 | #endif /* CONFIG_BLK_CGROUP */ | 504 | #endif /* CONFIG_BLK_CGROUP */ |
403 | #endif /* _BLK_CGROUP_H */ | 505 | #endif /* _BLK_CGROUP_H */ |