aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-12-13 18:33:42 -0500
committerJens Axboe <axboe@kernel.dk>2011-12-13 18:33:42 -0500
commitf1f8cc94651738b418ba54c039df536303b91704 (patch)
treeeb8bc5a33dec104ab32a935a5bb1e1da2e7cdd34 /block
parent9b84cacd013996f244d85b3d873287c2a8f88658 (diff)
block, cfq: move icq creation and rq->elv.icq association to block core
Now block layer knows everything necessary to create and associate icq's with requests. Move ioc_create_icq() to blk-ioc.c and update get_request() such that, if elevator_type->icq_size is set, requests are automatically associated with their matching icq's before elv_set_request(). io_context reference is also managed by block core on request alloc/free. * Only ioprio/cgroup changed handling remains from cfq_get_cic(). Collapsed into cfq_set_request(). * This removes queue kicking on icq allocation failure (for now). As icq allocation failure is rare and the only effect of queue kicking achieved was possibily accelerating queue processing, this change shouldn't be noticeable. There is a larger underlying problem. Unlike request allocation, icq allocation is not guaranteed to succeed eventually after retries. The number of icq is unbound and thus mempool can't be the solution either. This effectively adds allocation dependency on memory free path and thus possibility of deadlock. This usually wouldn't happen because icq allocation is not a hot path and, even when the condition triggers, it's highly unlikely that none of the writeback workers already has icq. However, this is still possible especially if elevator is being switched under high memory pressure, so we better get it fixed. Probably the only solution is just bypassing elevator and appending to dispatch queue on any elevator allocation failure. * Comment added to explain how icq's are managed and synchronized. This completes cleanup of io_context interface. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c46
-rw-r--r--block/blk-ioc.c60
-rw-r--r--block/blk.h1
-rw-r--r--block/cfq-iosched.c135
4 files changed, 110 insertions, 132 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c26c7f48703..8fbdac7010bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -640,13 +640,18 @@ EXPORT_SYMBOL(blk_get_queue);
640 640
641static inline void blk_free_request(struct request_queue *q, struct request *rq) 641static inline void blk_free_request(struct request_queue *q, struct request *rq)
642{ 642{
643 if (rq->cmd_flags & REQ_ELVPRIV) 643 if (rq->cmd_flags & REQ_ELVPRIV) {
644 elv_put_request(q, rq); 644 elv_put_request(q, rq);
645 if (rq->elv.icq)
646 put_io_context(rq->elv.icq->ioc, q);
647 }
648
645 mempool_free(rq, q->rq.rq_pool); 649 mempool_free(rq, q->rq.rq_pool);
646} 650}
647 651
648static struct request * 652static struct request *
649blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) 653blk_alloc_request(struct request_queue *q, struct io_cq *icq,
654 unsigned int flags, gfp_t gfp_mask)
650{ 655{
651 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 656 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
652 657
@@ -657,10 +662,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
657 662
658 rq->cmd_flags = flags | REQ_ALLOCED; 663 rq->cmd_flags = flags | REQ_ALLOCED;
659 664
660 if ((flags & REQ_ELVPRIV) && 665 if (flags & REQ_ELVPRIV) {
661 unlikely(elv_set_request(q, rq, gfp_mask))) { 666 rq->elv.icq = icq;
662 mempool_free(rq, q->rq.rq_pool); 667 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
663 return NULL; 668 mempool_free(rq, q->rq.rq_pool);
669 return NULL;
670 }
671 /* @rq->elv.icq holds on to io_context until @rq is freed */
672 if (icq)
673 get_io_context(icq->ioc);
664 } 674 }
665 675
666 return rq; 676 return rq;
@@ -772,11 +782,14 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
772{ 782{
773 struct request *rq = NULL; 783 struct request *rq = NULL;
774 struct request_list *rl = &q->rq; 784 struct request_list *rl = &q->rq;
785 struct elevator_type *et;
775 struct io_context *ioc; 786 struct io_context *ioc;
787 struct io_cq *icq = NULL;
776 const bool is_sync = rw_is_sync(rw_flags) != 0; 788 const bool is_sync = rw_is_sync(rw_flags) != 0;
777 bool retried = false; 789 bool retried = false;
778 int may_queue; 790 int may_queue;
779retry: 791retry:
792 et = q->elevator->type;
780 ioc = current->io_context; 793 ioc = current->io_context;
781 794
782 if (unlikely(blk_queue_dead(q))) 795 if (unlikely(blk_queue_dead(q)))
@@ -837,17 +850,36 @@ retry:
837 rl->count[is_sync]++; 850 rl->count[is_sync]++;
838 rl->starved[is_sync] = 0; 851 rl->starved[is_sync] = 0;
839 852
853 /*
854 * Decide whether the new request will be managed by elevator. If
855 * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
856 * prevent the current elevator from being destroyed until the new
857 * request is freed. This guarantees icq's won't be destroyed and
858 * makes creating new ones safe.
859 *
860 * Also, lookup icq while holding queue_lock. If it doesn't exist,
861 * it will be created after releasing queue_lock.
862 */
840 if (blk_rq_should_init_elevator(bio) && 863 if (blk_rq_should_init_elevator(bio) &&
841 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { 864 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
842 rw_flags |= REQ_ELVPRIV; 865 rw_flags |= REQ_ELVPRIV;
843 rl->elvpriv++; 866 rl->elvpriv++;
867 if (et->icq_cache && ioc)
868 icq = ioc_lookup_icq(ioc, q);
844 } 869 }
845 870
846 if (blk_queue_io_stat(q)) 871 if (blk_queue_io_stat(q))
847 rw_flags |= REQ_IO_STAT; 872 rw_flags |= REQ_IO_STAT;
848 spin_unlock_irq(q->queue_lock); 873 spin_unlock_irq(q->queue_lock);
849 874
850 rq = blk_alloc_request(q, rw_flags, gfp_mask); 875 /* create icq if missing */
876 if (unlikely(et->icq_cache && !icq))
877 icq = ioc_create_icq(q, gfp_mask);
878
879 /* rqs are guaranteed to have icq on elv_set_request() if requested */
880 if (likely(!et->icq_cache || icq))
881 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
882
851 if (unlikely(!rq)) { 883 if (unlikely(!rq)) {
852 /* 884 /*
853 * Allocation failed presumably due to memory. Undo anything 885 * Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 0910a5584d38..c04d16b02225 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,6 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
289 kmem_cache_free(iocontext_cachep, ioc); 289 kmem_cache_free(iocontext_cachep, ioc);
290 task_unlock(task); 290 task_unlock(task);
291} 291}
292EXPORT_SYMBOL(create_io_context_slowpath);
293 292
294/** 293/**
295 * get_task_io_context - get io_context of a task 294 * get_task_io_context - get io_context of a task
@@ -362,6 +361,65 @@ out:
362} 361}
363EXPORT_SYMBOL(ioc_lookup_icq); 362EXPORT_SYMBOL(ioc_lookup_icq);
364 363
364/**
365 * ioc_create_icq - create and link io_cq
366 * @q: request_queue of interest
367 * @gfp_mask: allocation mask
368 *
369 * Make sure io_cq linking %current->io_context and @q exists. If either
370 * io_context and/or icq don't exist, they will be created using @gfp_mask.
371 *
372 * The caller is responsible for ensuring @ioc won't go away and @q is
373 * alive and will stay alive until this function returns.
374 */
375struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
376{
377 struct elevator_type *et = q->elevator->type;
378 struct io_context *ioc;
379 struct io_cq *icq;
380
381 /* allocate stuff */
382 ioc = create_io_context(current, gfp_mask, q->node);
383 if (!ioc)
384 return NULL;
385
386 icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
387 q->node);
388 if (!icq)
389 return NULL;
390
391 if (radix_tree_preload(gfp_mask) < 0) {
392 kmem_cache_free(et->icq_cache, icq);
393 return NULL;
394 }
395
396 icq->ioc = ioc;
397 icq->q = q;
398 INIT_LIST_HEAD(&icq->q_node);
399 INIT_HLIST_NODE(&icq->ioc_node);
400
401 /* lock both q and ioc and try to link @icq */
402 spin_lock_irq(q->queue_lock);
403 spin_lock(&ioc->lock);
404
405 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
406 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
407 list_add(&icq->q_node, &q->icq_list);
408 if (et->ops.elevator_init_icq_fn)
409 et->ops.elevator_init_icq_fn(icq);
410 } else {
411 kmem_cache_free(et->icq_cache, icq);
412 icq = ioc_lookup_icq(ioc, q);
413 if (!icq)
414 printk(KERN_ERR "cfq: icq link failed!\n");
415 }
416
417 spin_unlock(&ioc->lock);
418 spin_unlock_irq(q->queue_lock);
419 radix_tree_preload_end();
420 return icq;
421}
422
365void ioc_set_changed(struct io_context *ioc, int which) 423void ioc_set_changed(struct io_context *ioc, int which)
366{ 424{
367 struct io_cq *icq; 425 struct io_cq *icq;
diff --git a/block/blk.h b/block/blk.h
index ed4d9bf2ab16..7efd772336de 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
200 */ 200 */
201void get_io_context(struct io_context *ioc); 201void get_io_context(struct io_context *ioc);
202struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q); 202struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
203struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask);
203void ioc_clear_queue(struct request_queue *q); 204void ioc_clear_queue(struct request_queue *q);
204 205
205void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask, 206void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 11f49d036845..f3b44c394e6d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2935,117 +2935,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
2935 return cfqq; 2935 return cfqq;
2936} 2936}
2937 2937
2938/**
2939 * ioc_create_icq - create and link io_cq
2940 * @q: request_queue of interest
2941 * @gfp_mask: allocation mask
2942 *
2943 * Make sure io_cq linking %current->io_context and @q exists. If either
2944 * io_context and/or icq don't exist, they will be created using @gfp_mask.
2945 *
2946 * The caller is responsible for ensuring @ioc won't go away and @q is
2947 * alive and will stay alive until this function returns.
2948 */
2949static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
2950{
2951 struct elevator_type *et = q->elevator->type;
2952 struct io_context *ioc;
2953 struct io_cq *icq;
2954
2955 /* allocate stuff */
2956 ioc = create_io_context(current, gfp_mask, q->node);
2957 if (!ioc)
2958 return NULL;
2959
2960 icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
2961 q->node);
2962 if (!icq)
2963 return NULL;
2964
2965 if (radix_tree_preload(gfp_mask) < 0) {
2966 kmem_cache_free(et->icq_cache, icq);
2967 return NULL;
2968 }
2969
2970 icq->ioc = ioc;
2971 icq->q = q;
2972 INIT_LIST_HEAD(&icq->q_node);
2973 INIT_HLIST_NODE(&icq->ioc_node);
2974
2975 /* lock both q and ioc and try to link @icq */
2976 spin_lock_irq(q->queue_lock);
2977 spin_lock(&ioc->lock);
2978
2979 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
2980 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
2981 list_add(&icq->q_node, &q->icq_list);
2982 if (et->ops.elevator_init_icq_fn)
2983 et->ops.elevator_init_icq_fn(icq);
2984 } else {
2985 kmem_cache_free(et->icq_cache, icq);
2986 icq = ioc_lookup_icq(ioc, q);
2987 if (!icq)
2988 printk(KERN_ERR "cfq: icq link failed!\n");
2989 }
2990
2991 spin_unlock(&ioc->lock);
2992 spin_unlock_irq(q->queue_lock);
2993 radix_tree_preload_end();
2994 return icq;
2995}
2996
2997/**
2998 * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
2999 * @cfqd: cfqd to setup cic for
3000 * @gfp_mask: allocation mask
3001 *
3002 * Return cfq_io_cq associating @cfqd and %current->io_context and
3003 * bump refcnt on io_context. If ioc or cic doesn't exist, they're created
3004 * using @gfp_mask.
3005 *
3006 * Must be called under queue_lock which may be released and re-acquired.
3007 * This function also may sleep depending on @gfp_mask.
3008 */
3009static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
3010{
3011 struct request_queue *q = cfqd->queue;
3012 struct cfq_io_cq *cic = NULL;
3013 struct io_context *ioc;
3014
3015 lockdep_assert_held(q->queue_lock);
3016
3017 while (true) {
3018 /* fast path */
3019 ioc = current->io_context;
3020 if (likely(ioc)) {
3021 cic = cfq_cic_lookup(cfqd, ioc);
3022 if (likely(cic))
3023 break;
3024 }
3025
3026 /* slow path - unlock, create missing ones and retry */
3027 spin_unlock_irq(q->queue_lock);
3028 cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
3029 spin_lock_irq(q->queue_lock);
3030 if (!cic)
3031 return NULL;
3032 }
3033
3034 /* bump @ioc's refcnt and handle changed notifications */
3035 get_io_context(ioc);
3036
3037 if (unlikely(cic->icq.changed)) {
3038 if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
3039 changed_ioprio(cic);
3040#ifdef CONFIG_CFQ_GROUP_IOSCHED
3041 if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
3042 changed_cgroup(cic);
3043#endif
3044 }
3045
3046 return cic;
3047}
3048
3049static void 2938static void
3050__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) 2939__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3051{ 2940{
@@ -3524,8 +3413,6 @@ static void cfq_put_request(struct request *rq)
3524 BUG_ON(!cfqq->allocated[rw]); 3413 BUG_ON(!cfqq->allocated[rw]);
3525 cfqq->allocated[rw]--; 3414 cfqq->allocated[rw]--;
3526 3415
3527 put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
3528
3529 /* Put down rq reference on cfqg */ 3416 /* Put down rq reference on cfqg */
3530 cfq_put_cfqg(RQ_CFQG(rq)); 3417 cfq_put_cfqg(RQ_CFQG(rq));
3531 rq->elv.priv[0] = NULL; 3418 rq->elv.priv[0] = NULL;
@@ -3574,7 +3461,7 @@ static int
3574cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 3461cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3575{ 3462{
3576 struct cfq_data *cfqd = q->elevator->elevator_data; 3463 struct cfq_data *cfqd = q->elevator->elevator_data;
3577 struct cfq_io_cq *cic; 3464 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
3578 const int rw = rq_data_dir(rq); 3465 const int rw = rq_data_dir(rq);
3579 const bool is_sync = rq_is_sync(rq); 3466 const bool is_sync = rq_is_sync(rq);
3580 struct cfq_queue *cfqq; 3467 struct cfq_queue *cfqq;
@@ -3582,9 +3469,16 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3582 might_sleep_if(gfp_mask & __GFP_WAIT); 3469 might_sleep_if(gfp_mask & __GFP_WAIT);
3583 3470
3584 spin_lock_irq(q->queue_lock); 3471 spin_lock_irq(q->queue_lock);
3585 cic = cfq_get_cic(cfqd, gfp_mask); 3472
3586 if (!cic) 3473 /* handle changed notifications */
3587 goto queue_fail; 3474 if (unlikely(cic->icq.changed)) {
3475 if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
3476 changed_ioprio(cic);
3477#ifdef CONFIG_CFQ_GROUP_IOSCHED
3478 if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
3479 changed_cgroup(cic);
3480#endif
3481 }
3588 3482
3589new_queue: 3483new_queue:
3590 cfqq = cic_to_cfqq(cic, is_sync); 3484 cfqq = cic_to_cfqq(cic, is_sync);
@@ -3615,17 +3509,10 @@ new_queue:
3615 cfqq->allocated[rw]++; 3509 cfqq->allocated[rw]++;
3616 3510
3617 cfqq->ref++; 3511 cfqq->ref++;
3618 rq->elv.icq = &cic->icq;
3619 rq->elv.priv[0] = cfqq; 3512 rq->elv.priv[0] = cfqq;
3620 rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); 3513 rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
3621 spin_unlock_irq(q->queue_lock); 3514 spin_unlock_irq(q->queue_lock);
3622 return 0; 3515 return 0;
3623
3624queue_fail:
3625 cfq_schedule_dispatch(cfqd);
3626 spin_unlock_irq(q->queue_lock);
3627 cfq_log(cfqd, "set_request fail");
3628 return 1;
3629} 3516}
3630 3517
3631static void cfq_kick_queue(struct work_struct *work) 3518static void cfq_kick_queue(struct work_struct *work)