aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:27 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:24 -0500
commit852c788f8365062c8a383c5a93f7f7289977cb50 (patch)
tree561b69e7f2b6bcc16de165b3b988990de7913615
parentf6e8d01bee036460e03bd4f6a79d014f98ba712e (diff)
block: implement bio_associate_current()
IO scheduling and cgroup are tied to the issuing task via io_context and cgroup of %current. Unfortunately, there are cases where IOs need to be routed via a different task which makes scheduling and cgroup limit enforcement applied completely incorrectly. For example, all bios delayed by blk-throttle end up being issued by a delayed work item and get assigned the io_context of the worker task which happens to serve the work item and dumped to the default block cgroup. This is double confusing as bios which aren't delayed end up in the correct cgroup and makes using blk-throttle and cfq propio together impossible. Any code which punts IO issuing to another task is affected which is getting more and more common (e.g. btrfs). As both io_context and cgroup are firmly tied to task including userland visible APIs to manipulate them, it makes a lot of sense to match up tasks to bios. This patch implements bio_associate_current() which associates the specified bio with %current. The bio will record the associated ioc and blkcg at that point and block layer will use the recorded ones regardless of which task actually ends up issuing the bio. bio release puts the associated ioc and blkcg. It grabs and remembers ioc and blkcg instead of the task itself because task may already be dead by the time the bio is issued making ioc and blkcg inaccessible and those are all block layer cares about. elevator_set_req_fn() is updated such that the bio elvdata is being allocated for is available to the elevator. This doesn't update block cgroup policies yet. Further patches will implement the support. -v2: #ifdef CONFIG_BLK_CGROUP added around bio->bi_ioc dereference in rq_ioc() to fix build breakage. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Kent Overstreet <koverstreet@google.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-core.c32
-rw-r--r--block/cfq-iosched.c3
-rw-r--r--block/elevator.c5
-rw-r--r--fs/bio.c61
-rw-r--r--include/linux/bio.h8
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/elevator.h6
7 files changed, 113 insertions, 12 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index b2d0fcd8f87f..991c1d6ef245 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -696,7 +696,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
696} 696}
697 697
698static struct request * 698static struct request *
699blk_alloc_request(struct request_queue *q, struct io_cq *icq, 699blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq,
700 unsigned int flags, gfp_t gfp_mask) 700 unsigned int flags, gfp_t gfp_mask)
701{ 701{
702 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 702 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -710,7 +710,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
710 710
711 if (flags & REQ_ELVPRIV) { 711 if (flags & REQ_ELVPRIV) {
712 rq->elv.icq = icq; 712 rq->elv.icq = icq;
713 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 713 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
714 mempool_free(rq, q->rq.rq_pool); 714 mempool_free(rq, q->rq.rq_pool);
715 return NULL; 715 return NULL;
716 } 716 }
@@ -810,6 +810,22 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
810} 810}
811 811
812/** 812/**
813 * rq_ioc - determine io_context for request allocation
814 * @bio: request being allocated is for this bio (can be %NULL)
815 *
816 * Determine io_context to use for request allocation for @bio. May return
817 * %NULL if %current->io_context doesn't exist.
818 */
819static struct io_context *rq_ioc(struct bio *bio)
820{
821#ifdef CONFIG_BLK_CGROUP
822 if (bio && bio->bi_ioc)
823 return bio->bi_ioc;
824#endif
825 return current->io_context;
826}
827
828/**
813 * get_request - get a free request 829 * get_request - get a free request
814 * @q: request_queue to allocate request from 830 * @q: request_queue to allocate request from
815 * @rw_flags: RW and SYNC flags 831 * @rw_flags: RW and SYNC flags
@@ -836,7 +852,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
836 int may_queue; 852 int may_queue;
837retry: 853retry:
838 et = q->elevator->type; 854 et = q->elevator->type;
839 ioc = current->io_context; 855 ioc = rq_ioc(bio);
840 856
841 if (unlikely(blk_queue_dead(q))) 857 if (unlikely(blk_queue_dead(q)))
842 return NULL; 858 return NULL;
@@ -919,14 +935,16 @@ retry:
919 935
920 /* create icq if missing */ 936 /* create icq if missing */
921 if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) { 937 if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
922 ioc = create_io_context(gfp_mask, q->node); 938 create_io_context(gfp_mask, q->node);
923 if (ioc) 939 ioc = rq_ioc(bio);
924 icq = ioc_create_icq(ioc, q, gfp_mask); 940 if (!ioc)
941 goto fail_alloc;
942 icq = ioc_create_icq(ioc, q, gfp_mask);
925 if (!icq) 943 if (!icq)
926 goto fail_alloc; 944 goto fail_alloc;
927 } 945 }
928 946
929 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); 947 rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask);
930 if (unlikely(!rq)) 948 if (unlikely(!rq))
931 goto fail_alloc; 949 goto fail_alloc;
932 950
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9a4eac490e0b..abac87337d70 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
3299 * Allocate cfq data structures associated with this request. 3299 * Allocate cfq data structures associated with this request.
3300 */ 3300 */
3301static int 3301static int
3302cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 3302cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
3303 gfp_t gfp_mask)
3303{ 3304{
3304 struct cfq_data *cfqd = q->elevator->elevator_data; 3305 struct cfq_data *cfqd = q->elevator->elevator_data;
3305 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq); 3306 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
diff --git a/block/elevator.c b/block/elevator.c
index 451654fadab0..be3ab6df0fea 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
663 return NULL; 663 return NULL;
664} 664}
665 665
666int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 666int elv_set_request(struct request_queue *q, struct request *rq,
667 struct bio *bio, gfp_t gfp_mask)
667{ 668{
668 struct elevator_queue *e = q->elevator; 669 struct elevator_queue *e = q->elevator;
669 670
670 if (e->type->ops.elevator_set_req_fn) 671 if (e->type->ops.elevator_set_req_fn)
671 return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask); 672 return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
672 return 0; 673 return 0;
673} 674}
674 675
diff --git a/fs/bio.c b/fs/bio.c
index b980ecde026a..142214b80039 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -19,12 +19,14 @@
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/iocontext.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/init.h> 24#include <linux/init.h>
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/module.h> 26#include <linux/module.h>
26#include <linux/mempool.h> 27#include <linux/mempool.h>
27#include <linux/workqueue.h> 28#include <linux/workqueue.h>
29#include <linux/cgroup.h>
28#include <scsi/sg.h> /* for struct sg_iovec */ 30#include <scsi/sg.h> /* for struct sg_iovec */
29 31
30#include <trace/events/block.h> 32#include <trace/events/block.h>
@@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
418 * last put frees it 420 * last put frees it
419 */ 421 */
420 if (atomic_dec_and_test(&bio->bi_cnt)) { 422 if (atomic_dec_and_test(&bio->bi_cnt)) {
423 bio_disassociate_task(bio);
421 bio->bi_next = NULL; 424 bio->bi_next = NULL;
422 bio->bi_destructor(bio); 425 bio->bi_destructor(bio);
423 } 426 }
@@ -1641,6 +1644,64 @@ bad:
1641} 1644}
1642EXPORT_SYMBOL(bioset_create); 1645EXPORT_SYMBOL(bioset_create);
1643 1646
1647#ifdef CONFIG_BLK_CGROUP
1648/**
1649 * bio_associate_current - associate a bio with %current
1650 * @bio: target bio
1651 *
1652 * Associate @bio with %current if it hasn't been associated yet. Block
1653 * layer will treat @bio as if it were issued by %current no matter which
1654 * task actually issues it.
1655 *
1656 * This function takes an extra reference of @task's io_context and blkcg
1657 * which will be put when @bio is released. The caller must own @bio,
1658 * ensure %current->io_context exists, and is responsible for synchronizing
1659 * calls to this function.
1660 */
1661int bio_associate_current(struct bio *bio)
1662{
1663 struct io_context *ioc;
1664 struct cgroup_subsys_state *css;
1665
1666 if (bio->bi_ioc)
1667 return -EBUSY;
1668
1669 ioc = current->io_context;
1670 if (!ioc)
1671 return -ENOENT;
1672
1673 /* acquire active ref on @ioc and associate */
1674 get_io_context_active(ioc);
1675 bio->bi_ioc = ioc;
1676
1677 /* associate blkcg if exists */
1678 rcu_read_lock();
1679 css = task_subsys_state(current, blkio_subsys_id);
1680 if (css && css_tryget(css))
1681 bio->bi_css = css;
1682 rcu_read_unlock();
1683
1684 return 0;
1685}
1686
1687/**
1688 * bio_disassociate_task - undo bio_associate_current()
1689 * @bio: target bio
1690 */
1691void bio_disassociate_task(struct bio *bio)
1692{
1693 if (bio->bi_ioc) {
1694 put_io_context(bio->bi_ioc);
1695 bio->bi_ioc = NULL;
1696 }
1697 if (bio->bi_css) {
1698 css_put(bio->bi_css);
1699 bio->bi_css = NULL;
1700 }
1701}
1702
1703#endif /* CONFIG_BLK_CGROUP */
1704
1644static void __init biovec_init_slabs(void) 1705static void __init biovec_init_slabs(void)
1645{ 1706{
1646 int i; 1707 int i;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 129a9c097958..692d3d5b49f5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
268extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); 268extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
269extern unsigned int bvec_nr_vecs(unsigned short idx); 269extern unsigned int bvec_nr_vecs(unsigned short idx);
270 270
271#ifdef CONFIG_BLK_CGROUP
272int bio_associate_current(struct bio *bio);
273void bio_disassociate_task(struct bio *bio);
274#else /* CONFIG_BLK_CGROUP */
275static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
276static inline void bio_disassociate_task(struct bio *bio) { }
277#endif /* CONFIG_BLK_CGROUP */
278
271/* 279/*
272 * bio_set is used to allow other portions of the IO system to 280 * bio_set is used to allow other portions of the IO system to
273 * allocate their own private memory pools for bio and iovec structures. 281 * allocate their own private memory pools for bio and iovec structures.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4053cbd4490e..0edb65dd8edd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -14,6 +14,8 @@ struct bio;
14struct bio_integrity_payload; 14struct bio_integrity_payload;
15struct page; 15struct page;
16struct block_device; 16struct block_device;
17struct io_context;
18struct cgroup_subsys_state;
17typedef void (bio_end_io_t) (struct bio *, int); 19typedef void (bio_end_io_t) (struct bio *, int);
18typedef void (bio_destructor_t) (struct bio *); 20typedef void (bio_destructor_t) (struct bio *);
19 21
@@ -66,6 +68,14 @@ struct bio {
66 bio_end_io_t *bi_end_io; 68 bio_end_io_t *bi_end_io;
67 69
68 void *bi_private; 70 void *bi_private;
71#ifdef CONFIG_BLK_CGROUP
72 /*
73 * Optional ioc and css associated with this bio. Put on bio
74 * release. Read comment on top of bio_associate_current().
75 */
76 struct io_context *bi_ioc;
77 struct cgroup_subsys_state *bi_css;
78#endif
69#if defined(CONFIG_BLK_DEV_INTEGRITY) 79#if defined(CONFIG_BLK_DEV_INTEGRITY)
70 struct bio_integrity_payload *bi_integrity; /* data integrity */ 80 struct bio_integrity_payload *bi_integrity; /* data integrity */
71#endif 81#endif
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 97fb2557a18c..c03af7687bb4 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);
28 28
29typedef void (elevator_init_icq_fn) (struct io_cq *); 29typedef void (elevator_init_icq_fn) (struct io_cq *);
30typedef void (elevator_exit_icq_fn) (struct io_cq *); 30typedef void (elevator_exit_icq_fn) (struct io_cq *);
31typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); 31typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
32 struct bio *, gfp_t);
32typedef void (elevator_put_req_fn) (struct request *); 33typedef void (elevator_put_req_fn) (struct request *);
33typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); 34typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
34typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); 35typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
@@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
129extern int elv_may_queue(struct request_queue *, int); 130extern int elv_may_queue(struct request_queue *, int);
130extern void elv_abort_queue(struct request_queue *); 131extern void elv_abort_queue(struct request_queue *);
131extern void elv_completed_request(struct request_queue *, struct request *); 132extern void elv_completed_request(struct request_queue *, struct request *);
132extern int elv_set_request(struct request_queue *, struct request *, gfp_t); 133extern int elv_set_request(struct request_queue *q, struct request *rq,
134 struct bio *bio, gfp_t gfp_mask);
133extern void elv_put_request(struct request_queue *, struct request *); 135extern void elv_put_request(struct request_queue *, struct request *);
134extern void elv_drain_elevator(struct request_queue *); 136extern void elv_drain_elevator(struct request_queue *);
135 137