aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blk-ioc.c60
-rw-r--r--block/blk.h1
-rw-r--r--block/cfq-iosched.c135
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/iocontext.h59
6 files changed, 173 insertions, 136 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c26c7f48703..8fbdac7010bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -640,13 +640,18 @@ EXPORT_SYMBOL(blk_get_queue);
640 640
641static inline void blk_free_request(struct request_queue *q, struct request *rq) 641static inline void blk_free_request(struct request_queue *q, struct request *rq)
642{ 642{
643 if (rq->cmd_flags & REQ_ELVPRIV) 643 if (rq->cmd_flags & REQ_ELVPRIV) {
644 elv_put_request(q, rq); 644 elv_put_request(q, rq);
645 if (rq->elv.icq)
646 put_io_context(rq->elv.icq->ioc, q);
647 }
648
645 mempool_free(rq, q->rq.rq_pool); 649 mempool_free(rq, q->rq.rq_pool);
646} 650}
647 651
648static struct request * 652static struct request *
649blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) 653blk_alloc_request(struct request_queue *q, struct io_cq *icq,
654 unsigned int flags, gfp_t gfp_mask)
650{ 655{
651 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 656 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
652 657
@@ -657,10 +662,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
657 662
658 rq->cmd_flags = flags | REQ_ALLOCED; 663 rq->cmd_flags = flags | REQ_ALLOCED;
659 664
660 if ((flags & REQ_ELVPRIV) && 665 if (flags & REQ_ELVPRIV) {
661 unlikely(elv_set_request(q, rq, gfp_mask))) { 666 rq->elv.icq = icq;
662 mempool_free(rq, q->rq.rq_pool); 667 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
663 return NULL; 668 mempool_free(rq, q->rq.rq_pool);
669 return NULL;
670 }
671 /* @rq->elv.icq holds on to io_context until @rq is freed */
672 if (icq)
673 get_io_context(icq->ioc);
664 } 674 }
665 675
666 return rq; 676 return rq;
@@ -772,11 +782,14 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
772{ 782{
773 struct request *rq = NULL; 783 struct request *rq = NULL;
774 struct request_list *rl = &q->rq; 784 struct request_list *rl = &q->rq;
785 struct elevator_type *et;
775 struct io_context *ioc; 786 struct io_context *ioc;
787 struct io_cq *icq = NULL;
776 const bool is_sync = rw_is_sync(rw_flags) != 0; 788 const bool is_sync = rw_is_sync(rw_flags) != 0;
777 bool retried = false; 789 bool retried = false;
778 int may_queue; 790 int may_queue;
779retry: 791retry:
792 et = q->elevator->type;
780 ioc = current->io_context; 793 ioc = current->io_context;
781 794
782 if (unlikely(blk_queue_dead(q))) 795 if (unlikely(blk_queue_dead(q)))
@@ -837,17 +850,36 @@ retry:
837 rl->count[is_sync]++; 850 rl->count[is_sync]++;
838 rl->starved[is_sync] = 0; 851 rl->starved[is_sync] = 0;
839 852
853 /*
854 * Decide whether the new request will be managed by elevator. If
855 * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
856 * prevent the current elevator from being destroyed until the new
857 * request is freed. This guarantees icq's won't be destroyed and
858 * makes creating new ones safe.
859 *
860 * Also, lookup icq while holding queue_lock. If it doesn't exist,
861 * it will be created after releasing queue_lock.
862 */
840 if (blk_rq_should_init_elevator(bio) && 863 if (blk_rq_should_init_elevator(bio) &&
841 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { 864 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
842 rw_flags |= REQ_ELVPRIV; 865 rw_flags |= REQ_ELVPRIV;
843 rl->elvpriv++; 866 rl->elvpriv++;
867 if (et->icq_cache && ioc)
868 icq = ioc_lookup_icq(ioc, q);
844 } 869 }
845 870
846 if (blk_queue_io_stat(q)) 871 if (blk_queue_io_stat(q))
847 rw_flags |= REQ_IO_STAT; 872 rw_flags |= REQ_IO_STAT;
848 spin_unlock_irq(q->queue_lock); 873 spin_unlock_irq(q->queue_lock);
849 874
850 rq = blk_alloc_request(q, rw_flags, gfp_mask); 875 /* create icq if missing */
876 if (unlikely(et->icq_cache && !icq))
877 icq = ioc_create_icq(q, gfp_mask);
878
879 /* rqs are guaranteed to have icq on elv_set_request() if requested */
880 if (likely(!et->icq_cache || icq))
881 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
882
851 if (unlikely(!rq)) { 883 if (unlikely(!rq)) {
852 /* 884 /*
853 * Allocation failed presumably due to memory. Undo anything 885 * Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 0910a5584d38..c04d16b02225 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,6 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
289 kmem_cache_free(iocontext_cachep, ioc); 289 kmem_cache_free(iocontext_cachep, ioc);
290 task_unlock(task); 290 task_unlock(task);
291} 291}
292EXPORT_SYMBOL(create_io_context_slowpath);
293 292
294/** 293/**
295 * get_task_io_context - get io_context of a task 294 * get_task_io_context - get io_context of a task
@@ -362,6 +361,65 @@ out:
362} 361}
363EXPORT_SYMBOL(ioc_lookup_icq); 362EXPORT_SYMBOL(ioc_lookup_icq);
364 363
364/**
365 * ioc_create_icq - create and link io_cq
366 * @q: request_queue of interest
367 * @gfp_mask: allocation mask
368 *
369 * Make sure io_cq linking %current->io_context and @q exists. If either
370 * io_context and/or icq don't exist, they will be created using @gfp_mask.
371 *
372 * The caller is responsible for ensuring @ioc won't go away and @q is
373 * alive and will stay alive until this function returns.
374 */
375struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
376{
377 struct elevator_type *et = q->elevator->type;
378 struct io_context *ioc;
379 struct io_cq *icq;
380
381 /* allocate stuff */
382 ioc = create_io_context(current, gfp_mask, q->node);
383 if (!ioc)
384 return NULL;
385
386 icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
387 q->node);
388 if (!icq)
389 return NULL;
390
391 if (radix_tree_preload(gfp_mask) < 0) {
392 kmem_cache_free(et->icq_cache, icq);
393 return NULL;
394 }
395
396 icq->ioc = ioc;
397 icq->q = q;
398 INIT_LIST_HEAD(&icq->q_node);
399 INIT_HLIST_NODE(&icq->ioc_node);
400
401 /* lock both q and ioc and try to link @icq */
402 spin_lock_irq(q->queue_lock);
403 spin_lock(&ioc->lock);
404
405 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
406 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
407 list_add(&icq->q_node, &q->icq_list);
408 if (et->ops.elevator_init_icq_fn)
409 et->ops.elevator_init_icq_fn(icq);
410 } else {
411 kmem_cache_free(et->icq_cache, icq);
412 icq = ioc_lookup_icq(ioc, q);
413 if (!icq)
414 printk(KERN_ERR "cfq: icq link failed!\n");
415 }
416
417 spin_unlock(&ioc->lock);
418 spin_unlock_irq(q->queue_lock);
419 radix_tree_preload_end();
420 return icq;
421}
422
365void ioc_set_changed(struct io_context *ioc, int which) 423void ioc_set_changed(struct io_context *ioc, int which)
366{ 424{
367 struct io_cq *icq; 425 struct io_cq *icq;
diff --git a/block/blk.h b/block/blk.h
index ed4d9bf2ab16..7efd772336de 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
200 */ 200 */
201void get_io_context(struct io_context *ioc); 201void get_io_context(struct io_context *ioc);
202struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); 202struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
203struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask);
203void ioc_clear_queue(struct request_queue *q); 204void ioc_clear_queue(struct request_queue *q);
204 205
205void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, 206void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 11f49d036845..f3b44c394e6d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2935,117 +2935,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2935 return cfqq; 2935 return cfqq;
2936} 2936}
2937 2937
2938/**
2939 * ioc_create_icq - create and link io_cq
2940 * @q: request_queue of interest
2941 * @gfp_mask: allocation mask
2942 *
2943 * Make sure io_cq linking %current->io_context and @q exists. If either
2944 * io_context and/or icq don't exist, they will be created using @gfp_mask.
2945 *
2946 * The caller is responsible for ensuring @ioc won't go away and @q is
2947 * alive and will stay alive until this function returns.
2948 */
2949static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
2950{
2951 struct elevator_type *et = q->elevator->type;
2952 struct io_context *ioc;
2953 struct io_cq *icq;
2954
2955 /* allocate stuff */
2956 ioc = create_io_context(current, gfp_mask, q->node);
2957 if (!ioc)
2958 return NULL;
2959
2960 icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
2961 q->node);
2962 if (!icq)
2963 return NULL;
2964
2965 if (radix_tree_preload(gfp_mask) < 0) {
2966 kmem_cache_free(et->icq_cache, icq);
2967 return NULL;
2968 }
2969
2970 icq->ioc = ioc;
2971 icq->q = q;
2972 INIT_LIST_HEAD(&icq->q_node);
2973 INIT_HLIST_NODE(&icq->ioc_node);
2974
2975 /* lock both q and ioc and try to link @icq */
2976 spin_lock_irq(q->queue_lock);
2977 spin_lock(&ioc->lock);
2978
2979 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
2980 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
2981 list_add(&icq->q_node, &q->icq_list);
2982 if (et->ops.elevator_init_icq_fn)
2983 et->ops.elevator_init_icq_fn(icq);
2984 } else {
2985 kmem_cache_free(et->icq_cache, icq);
2986 icq = ioc_lookup_icq(ioc, q);
2987 if (!icq)
2988 printk(KERN_ERR "cfq: icq link failed!\n");
2989 }
2990
2991 spin_unlock(&ioc->lock);
2992 spin_unlock_irq(q->queue_lock);
2993 radix_tree_preload_end();
2994 return icq;
2995}
2996
2997/**
2998 * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
2999 * @cfqd: cfqd to setup cic for
3000 * @gfp_mask: allocation mask
3001 *
3002 * Return cfq_io_cq associating @cfqd and %current->io_context and
3003 * bump refcnt on io_context. If ioc or cic doesn't exist, they're created
3004 * using @gfp_mask.
3005 *
3006 * Must be called under queue_lock which may be released and re-acquired.
3007 * This function also may sleep depending on @gfp_mask.
3008 */
3009static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
3010{
3011 struct request_queue *q = cfqd->queue;
3012 struct cfq_io_cq *cic = NULL;
3013 struct io_context *ioc;
3014
3015 lockdep_assert_held(q->queue_lock);
3016
3017 while (true) {
3018 /* fast path */
3019 ioc = current->io_context;
3020 if (likely(ioc)) {
3021 cic = cfq_cic_lookup(cfqd, ioc);
3022 if (likely(cic))
3023 break;
3024 }
3025
3026 /* slow path - unlock, create missing ones and retry */
3027 spin_unlock_irq(q->queue_lock);
3028 cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
3029 spin_lock_irq(q->queue_lock);
3030 if (!cic)
3031 return NULL;
3032 }
3033
3034 /* bump @ioc's refcnt and handle changed notifications */
3035 get_io_context(ioc);
3036
3037 if (unlikely(cic->icq.changed)) {
3038 if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
3039 changed_ioprio(cic);
3040#ifdef CONFIG_CFQ_GROUP_IOSCHED
3041 if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
3042 changed_cgroup(cic);
3043#endif
3044 }
3045
3046 return cic;
3047}
3048
3049static void 2938static void
3050__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) 2939__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3051{ 2940{
@@ -3524,8 +3413,6 @@ static void cfq_put_request(struct request *rq)
3524 BUG_ON(!cfqq->allocated[rw]); 3413 BUG_ON(!cfqq->allocated[rw]);
3525 cfqq->allocated[rw]--; 3414 cfqq->allocated[rw]--;
3526 3415
3527 put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
3528
3529 /* Put down rq reference on cfqg */ 3416 /* Put down rq reference on cfqg */
3530 cfq_put_cfqg(RQ_CFQG(rq)); 3417 cfq_put_cfqg(RQ_CFQG(rq));
3531 rq->elv.priv[0] = NULL; 3418 rq->elv.priv[0] = NULL;
@@ -3574,7 +3461,7 @@ static int
3574cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 3461cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3575{ 3462{
3576 struct cfq_data *cfqd = q->elevator->elevator_data; 3463 struct cfq_data *cfqd = q->elevator->elevator_data;
3577 struct cfq_io_cq *cic; 3464 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
3578 const int rw = rq_data_dir(rq); 3465 const int rw = rq_data_dir(rq);
3579 const bool is_sync = rq_is_sync(rq); 3466 const bool is_sync = rq_is_sync(rq);
3580 struct cfq_queue *cfqq; 3467 struct cfq_queue *cfqq;
@@ -3582,9 +3469,16 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3582 might_sleep_if(gfp_mask & __GFP_WAIT); 3469 might_sleep_if(gfp_mask & __GFP_WAIT);
3583 3470
3584 spin_lock_irq(q->queue_lock); 3471 spin_lock_irq(q->queue_lock);
3585 cic = cfq_get_cic(cfqd, gfp_mask); 3472
3586 if (!cic) 3473 /* handle changed notifications */
3587 goto queue_fail; 3474 if (unlikely(cic->icq.changed)) {
3475 if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
3476 changed_ioprio(cic);
3477#ifdef CONFIG_CFQ_GROUP_IOSCHED
3478 if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
3479 changed_cgroup(cic);
3480#endif
3481 }
3588 3482
3589new_queue: 3483new_queue:
3590 cfqq = cic_to_cfqq(cic, is_sync); 3484 cfqq = cic_to_cfqq(cic, is_sync);
@@ -3615,17 +3509,10 @@ new_queue:
3615 cfqq->allocated[rw]++; 3509 cfqq->allocated[rw]++;
3616 3510
3617 cfqq->ref++; 3511 cfqq->ref++;
3618 rq->elv.icq = &cic->icq;
3619 rq->elv.priv[0] = cfqq; 3512 rq->elv.priv[0] = cfqq;
3620 rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); 3513 rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
3621 spin_unlock_irq(q->queue_lock); 3514 spin_unlock_irq(q->queue_lock);
3622 return 0; 3515 return 0;
3623
3624queue_fail:
3625 cfq_schedule_dispatch(cfqd);
3626 spin_unlock_irq(q->queue_lock);
3627 cfq_log(cfqd, "set_request fail");
3628 return 1;
3629} 3516}
3630 3517
3631static void cfq_kick_queue(struct work_struct *work) 3518static void cfq_kick_queue(struct work_struct *work)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c8f1e67a8ebe..c24f3d7fbf1e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -60,8 +60,8 @@ struct elevator_ops
60 elevator_request_list_fn *elevator_former_req_fn; 60 elevator_request_list_fn *elevator_former_req_fn;
61 elevator_request_list_fn *elevator_latter_req_fn; 61 elevator_request_list_fn *elevator_latter_req_fn;
62 62
63 elevator_init_icq_fn *elevator_init_icq_fn; 63 elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */
64 elevator_exit_icq_fn *elevator_exit_icq_fn; 64 elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */
65 65
66 elevator_set_req_fn *elevator_set_req_fn; 66 elevator_set_req_fn *elevator_set_req_fn;
67 elevator_put_req_fn *elevator_put_req_fn; 67 elevator_put_req_fn *elevator_put_req_fn;
@@ -90,8 +90,8 @@ struct elevator_type
90 90
91 /* fields provided by elevator implementation */ 91 /* fields provided by elevator implementation */
92 struct elevator_ops ops; 92 struct elevator_ops ops;
93 size_t icq_size; 93 size_t icq_size; /* see iocontext.h */
94 size_t icq_align; 94 size_t icq_align; /* ditto */
95 struct elv_fs_entry *elevator_attrs; 95 struct elv_fs_entry *elevator_attrs;
96 char elevator_name[ELV_NAME_MAX]; 96 char elevator_name[ELV_NAME_MAX];
97 struct module *elevator_owner; 97 struct module *elevator_owner;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index ac390a34c0e7..7e1371c4bccf 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -10,6 +10,65 @@ enum {
10 ICQ_CGROUP_CHANGED, 10 ICQ_CGROUP_CHANGED,
11}; 11};
12 12
13/*
14 * An io_cq (icq) is association between an io_context (ioc) and a
15 * request_queue (q). This is used by elevators which need to track
16 * information per ioc - q pair.
17 *
18 * Elevator can request use of icq by setting elevator_type->icq_size and
19 * ->icq_align. Both size and align must be larger than that of struct
20 * io_cq and elevator can use the tail area for private information. The
21 * recommended way to do this is defining a struct which contains io_cq as
22 * the first member followed by private members and using its size and
23 * align. For example,
24 *
25 * struct snail_io_cq {
26 * struct io_cq icq;
27 * int poke_snail;
28 * int feed_snail;
29 * };
30 *
31 * struct elevator_type snail_elv_type {
32 * .ops = { ... },
33 * .icq_size = sizeof(struct snail_io_cq),
34 * .icq_align = __alignof__(struct snail_io_cq),
35 * ...
36 * };
37 *
38 * If icq_size is set, block core will manage icq's. All requests will
39 * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
40 * is called and be holding a reference to the associated io_context.
41 *
42 * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
43 * called and, on destruction, ->elevator_exit_icq_fn(). Both functions
44 * are called with both the associated io_context and queue locks held.
45 *
46 * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
47 * queue lock but the returned icq is valid only until the queue lock is
48 * released. Elevators can not and should not try to create or destroy
49 * icq's.
50 *
51 * As icq's are linked from both ioc and q, the locking rules are a bit
52 * complex.
53 *
54 * - ioc lock nests inside q lock.
55 *
56 * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
57 * q->icq_list and icq->q_node by q lock.
58 *
59 * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
60 * itself is protected by q lock. However, both the indexes and icq
61 * itself are also RCU managed and lookup can be performed holding only
62 * the q lock.
63 *
64 * - icq's are not reference counted. They are destroyed when either the
65 * ioc or q goes away. Each request with icq set holds an extra
66 * reference to ioc to ensure it stays until the request is completed.
67 *
68 * - Linking and unlinking icq's are performed while holding both ioc and q
69 * locks. Due to the lock ordering, q exit is simple but ioc exit
70 * requires reverse-order double lock dance.
71 */
13struct io_cq { 72struct io_cq {
14 struct request_queue *q; 73 struct request_queue *q;
15 struct io_context *ioc; 74 struct io_context *ioc;