aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 15:24:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 15:24:45 -0500
commitb3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch)
treead48ad4d923fee147c736318d0fad35b3755f4f5 /block/blk-core.c
parent83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (diff)
parent5d381efb3d1f1ef10535a31ca0dd9b22fe1e1922 (diff)
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits) Revert "block: recursive merge requests" block: Stop using macro stubs for the bio data integrity calls blockdev: convert some macros to static inlines fs: remove unneeded plug in mpage_readpages() block: Add BLKROTATIONAL ioctl block: Introduce blk_set_stacking_limits function block: remove WARN_ON_ONCE() in exit_io_context() block: an exiting task should be allowed to create io_context block: ioc_cgroup_changed() needs to be exported block: recursive merge requests block, cfq: fix empty queue crash caused by request merge block, cfq: move icq creation and rq->elv.icq association to block core block, cfq: restructure io_cq creation path for io_context interface cleanup block, cfq: move io_cq exit/release to blk-ioc.c block, cfq: move icq cache management to block core block, cfq: move io_cq lookup to blk-ioc.c block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq block, cfq: reorganize cfq_io_context into generic and cfq specific parts block: remove elevator_queue->ops block: reorder elevator switch sequence ... Fix up conflicts in: - block/blk-cgroup.c Switch from can_attach_task to can_attach - block/cfq-iosched.c conflict with now removed cic index changes (we now use q->id instead)
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c203
1 files changed, 122 insertions, 81 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 15de223c7f93..e6c05a97ee2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 39EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
40EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 40EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
41 41
42DEFINE_IDA(blk_queue_ida);
43
42/* 44/*
43 * For the allocated request tables 45 * For the allocated request tables
44 */ 46 */
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
358void blk_drain_queue(struct request_queue *q, bool drain_all) 360void blk_drain_queue(struct request_queue *q, bool drain_all)
359{ 361{
360 while (true) { 362 while (true) {
361 int nr_rqs; 363 bool drain = false;
364 int i;
362 365
363 spin_lock_irq(q->queue_lock); 366 spin_lock_irq(q->queue_lock);
364 367
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
375 if (!list_empty(&q->queue_head)) 378 if (!list_empty(&q->queue_head))
376 __blk_run_queue(q); 379 __blk_run_queue(q);
377 380
378 if (drain_all) 381 drain |= q->rq.elvpriv;
379 nr_rqs = q->rq.count[0] + q->rq.count[1]; 382
380 else 383 /*
381 nr_rqs = q->rq.elvpriv; 384 * Unfortunately, requests are queued at and tracked from
385 * multiple places and there's no single counter which can
386 * be drained. Check all the queues and counters.
387 */
388 if (drain_all) {
389 drain |= !list_empty(&q->queue_head);
390 for (i = 0; i < 2; i++) {
391 drain |= q->rq.count[i];
392 drain |= q->in_flight[i];
393 drain |= !list_empty(&q->flush_queue[i]);
394 }
395 }
382 396
383 spin_unlock_irq(q->queue_lock); 397 spin_unlock_irq(q->queue_lock);
384 398
385 if (!nr_rqs) 399 if (!drain)
386 break; 400 break;
387 msleep(10); 401 msleep(10);
388 } 402 }
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
469 if (!q) 483 if (!q)
470 return NULL; 484 return NULL;
471 485
486 q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
487 if (q->id < 0)
488 goto fail_q;
489
472 q->backing_dev_info.ra_pages = 490 q->backing_dev_info.ra_pages =
473 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 491 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
474 q->backing_dev_info.state = 0; 492 q->backing_dev_info.state = 0;
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
477 q->node = node_id; 495 q->node = node_id;
478 496
479 err = bdi_init(&q->backing_dev_info); 497 err = bdi_init(&q->backing_dev_info);
480 if (err) { 498 if (err)
481 kmem_cache_free(blk_requestq_cachep, q); 499 goto fail_id;
482 return NULL;
483 }
484 500
485 if (blk_throtl_init(q)) { 501 if (blk_throtl_init(q))
486 kmem_cache_free(blk_requestq_cachep, q); 502 goto fail_id;
487 return NULL;
488 }
489 503
490 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, 504 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
491 laptop_mode_timer_fn, (unsigned long) q); 505 laptop_mode_timer_fn, (unsigned long) q);
492 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 506 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
493 INIT_LIST_HEAD(&q->timeout_list); 507 INIT_LIST_HEAD(&q->timeout_list);
508 INIT_LIST_HEAD(&q->icq_list);
494 INIT_LIST_HEAD(&q->flush_queue[0]); 509 INIT_LIST_HEAD(&q->flush_queue[0]);
495 INIT_LIST_HEAD(&q->flush_queue[1]); 510 INIT_LIST_HEAD(&q->flush_queue[1]);
496 INIT_LIST_HEAD(&q->flush_data_in_flight); 511 INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
508 q->queue_lock = &q->__queue_lock; 523 q->queue_lock = &q->__queue_lock;
509 524
510 return q; 525 return q;
526
527fail_id:
528 ida_simple_remove(&blk_queue_ida, q->id);
529fail_q:
530 kmem_cache_free(blk_requestq_cachep, q);
531 return NULL;
511} 532}
512EXPORT_SYMBOL(blk_alloc_queue_node); 533EXPORT_SYMBOL(blk_alloc_queue_node);
513 534
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
605} 626}
606EXPORT_SYMBOL(blk_init_allocated_queue); 627EXPORT_SYMBOL(blk_init_allocated_queue);
607 628
608int blk_get_queue(struct request_queue *q) 629bool blk_get_queue(struct request_queue *q)
609{ 630{
610 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { 631 if (likely(!blk_queue_dead(q))) {
611 kobject_get(&q->kobj); 632 __blk_get_queue(q);
612 return 0; 633 return true;
613 } 634 }
614 635
615 return 1; 636 return false;
616} 637}
617EXPORT_SYMBOL(blk_get_queue); 638EXPORT_SYMBOL(blk_get_queue);
618 639
619static inline void blk_free_request(struct request_queue *q, struct request *rq) 640static inline void blk_free_request(struct request_queue *q, struct request *rq)
620{ 641{
621 if (rq->cmd_flags & REQ_ELVPRIV) 642 if (rq->cmd_flags & REQ_ELVPRIV) {
622 elv_put_request(q, rq); 643 elv_put_request(q, rq);
644 if (rq->elv.icq)
645 put_io_context(rq->elv.icq->ioc, q);
646 }
647
623 mempool_free(rq, q->rq.rq_pool); 648 mempool_free(rq, q->rq.rq_pool);
624} 649}
625 650
626static struct request * 651static struct request *
627blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) 652blk_alloc_request(struct request_queue *q, struct io_cq *icq,
653 unsigned int flags, gfp_t gfp_mask)
628{ 654{
629 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 655 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
630 656
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
635 661
636 rq->cmd_flags = flags | REQ_ALLOCED; 662 rq->cmd_flags = flags | REQ_ALLOCED;
637 663
638 if ((flags & REQ_ELVPRIV) && 664 if (flags & REQ_ELVPRIV) {
639 unlikely(elv_set_request(q, rq, gfp_mask))) { 665 rq->elv.icq = icq;
640 mempool_free(rq, q->rq.rq_pool); 666 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
641 return NULL; 667 mempool_free(rq, q->rq.rq_pool);
668 return NULL;
669 }
670 /* @rq->elv.icq holds on to io_context until @rq is freed */
671 if (icq)
672 get_io_context(icq->ioc);
642 } 673 }
643 674
644 return rq; 675 return rq;
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
750{ 781{
751 struct request *rq = NULL; 782 struct request *rq = NULL;
752 struct request_list *rl = &q->rq; 783 struct request_list *rl = &q->rq;
753 struct io_context *ioc = NULL; 784 struct elevator_type *et;
785 struct io_context *ioc;
786 struct io_cq *icq = NULL;
754 const bool is_sync = rw_is_sync(rw_flags) != 0; 787 const bool is_sync = rw_is_sync(rw_flags) != 0;
788 bool retried = false;
755 int may_queue; 789 int may_queue;
790retry:
791 et = q->elevator->type;
792 ioc = current->io_context;
756 793
757 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 794 if (unlikely(blk_queue_dead(q)))
758 return NULL; 795 return NULL;
759 796
760 may_queue = elv_may_queue(q, rw_flags); 797 may_queue = elv_may_queue(q, rw_flags);
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
763 800
764 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { 801 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
765 if (rl->count[is_sync]+1 >= q->nr_requests) { 802 if (rl->count[is_sync]+1 >= q->nr_requests) {
766 ioc = current_io_context(GFP_ATOMIC, q->node); 803 /*
804 * We want ioc to record batching state. If it's
805 * not already there, creating a new one requires
806 * dropping queue_lock, which in turn requires
807 * retesting conditions to avoid queue hang.
808 */
809 if (!ioc && !retried) {
810 spin_unlock_irq(q->queue_lock);
811 create_io_context(current, gfp_mask, q->node);
812 spin_lock_irq(q->queue_lock);
813 retried = true;
814 goto retry;
815 }
816
767 /* 817 /*
768 * The queue will fill after this allocation, so set 818 * The queue will fill after this allocation, so set
769 * it as full, and mark this process as "batching". 819 * it as full, and mark this process as "batching".
@@ -799,17 +849,36 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
799 rl->count[is_sync]++; 849 rl->count[is_sync]++;
800 rl->starved[is_sync] = 0; 850 rl->starved[is_sync] = 0;
801 851
852 /*
853 * Decide whether the new request will be managed by elevator. If
854 * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
855 * prevent the current elevator from being destroyed until the new
856 * request is freed. This guarantees icq's won't be destroyed and
857 * makes creating new ones safe.
858 *
859 * Also, lookup icq while holding queue_lock. If it doesn't exist,
860 * it will be created after releasing queue_lock.
861 */
802 if (blk_rq_should_init_elevator(bio) && 862 if (blk_rq_should_init_elevator(bio) &&
803 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { 863 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
804 rw_flags |= REQ_ELVPRIV; 864 rw_flags |= REQ_ELVPRIV;
805 rl->elvpriv++; 865 rl->elvpriv++;
866 if (et->icq_cache && ioc)
867 icq = ioc_lookup_icq(ioc, q);
806 } 868 }
807 869
808 if (blk_queue_io_stat(q)) 870 if (blk_queue_io_stat(q))
809 rw_flags |= REQ_IO_STAT; 871 rw_flags |= REQ_IO_STAT;
810 spin_unlock_irq(q->queue_lock); 872 spin_unlock_irq(q->queue_lock);
811 873
812 rq = blk_alloc_request(q, rw_flags, gfp_mask); 874 /* create icq if missing */
875 if (unlikely(et->icq_cache && !icq))
876 icq = ioc_create_icq(q, gfp_mask);
877
878 /* rqs are guaranteed to have icq on elv_set_request() if requested */
879 if (likely(!et->icq_cache || icq))
880 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
881
813 if (unlikely(!rq)) { 882 if (unlikely(!rq)) {
814 /* 883 /*
815 * Allocation failed presumably due to memory. Undo anything 884 * Allocation failed presumably due to memory. Undo anything
@@ -871,10 +940,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
871 rq = get_request(q, rw_flags, bio, GFP_NOIO); 940 rq = get_request(q, rw_flags, bio, GFP_NOIO);
872 while (!rq) { 941 while (!rq) {
873 DEFINE_WAIT(wait); 942 DEFINE_WAIT(wait);
874 struct io_context *ioc;
875 struct request_list *rl = &q->rq; 943 struct request_list *rl = &q->rq;
876 944
877 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 945 if (unlikely(blk_queue_dead(q)))
878 return NULL; 946 return NULL;
879 947
880 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 948 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@ -891,8 +959,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
891 * up to a big batch of them for a small period time. 959 * up to a big batch of them for a small period time.
892 * See ioc_batching, ioc_set_batching 960 * See ioc_batching, ioc_set_batching
893 */ 961 */
894 ioc = current_io_context(GFP_NOIO, q->node); 962 create_io_context(current, GFP_NOIO, q->node);
895 ioc_set_batching(q, ioc); 963 ioc_set_batching(q, current->io_context);
896 964
897 spin_lock_irq(q->queue_lock); 965 spin_lock_irq(q->queue_lock);
898 finish_wait(&rl->wait[is_sync], &wait); 966 finish_wait(&rl->wait[is_sync], &wait);
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
1009 __elv_add_request(q, rq, where); 1077 __elv_add_request(q, rq, where);
1010} 1078}
1011 1079
1012/**
1013 * blk_insert_request - insert a special request into a request queue
1014 * @q: request queue where request should be inserted
1015 * @rq: request to be inserted
1016 * @at_head: insert request at head or tail of queue
1017 * @data: private data
1018 *
1019 * Description:
1020 * Many block devices need to execute commands asynchronously, so they don't
1021 * block the whole kernel from preemption during request execution. This is
1022 * accomplished normally by inserting aritficial requests tagged as
1023 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
1024 * be scheduled for actual execution by the request queue.
1025 *
1026 * We have the option of inserting the head or the tail of the queue.
1027 * Typically we use the tail for new ioctls and so forth. We use the head
1028 * of the queue for things like a QUEUE_FULL message from a device, or a
1029 * host that is unable to accept a particular command.
1030 */
1031void blk_insert_request(struct request_queue *q, struct request *rq,
1032 int at_head, void *data)
1033{
1034 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1035 unsigned long flags;
1036
1037 /*
1038 * tell I/O scheduler that this isn't a regular read/write (ie it
1039 * must not attempt merges on this) and that it acts as a soft
1040 * barrier
1041 */
1042 rq->cmd_type = REQ_TYPE_SPECIAL;
1043
1044 rq->special = data;
1045
1046 spin_lock_irqsave(q->queue_lock, flags);
1047
1048 /*
1049 * If command is tagged, release the tag
1050 */
1051 if (blk_rq_tagged(rq))
1052 blk_queue_end_tag(q, rq);
1053
1054 add_acct_request(q, rq, where);
1055 __blk_run_queue(q);
1056 spin_unlock_irqrestore(q->queue_lock, flags);
1057}
1058EXPORT_SYMBOL(blk_insert_request);
1059
1060static void part_round_stats_single(int cpu, struct hd_struct *part, 1080static void part_round_stats_single(int cpu, struct hd_struct *part,
1061 unsigned long now) 1081 unsigned long now)
1062{ 1082{
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1766 return -EIO; 1786 return -EIO;
1767 1787
1768 spin_lock_irqsave(q->queue_lock, flags); 1788 spin_lock_irqsave(q->queue_lock, flags);
1789 if (unlikely(blk_queue_dead(q))) {
1790 spin_unlock_irqrestore(q->queue_lock, flags);
1791 return -ENODEV;
1792 }
1769 1793
1770 /* 1794 /*
1771 * Submitting request must be dequeued before calling this function 1795 * Submitting request must be dequeued before calling this function
@@ -2740,6 +2764,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
2740 trace_block_unplug(q, depth, !from_schedule); 2764 trace_block_unplug(q, depth, !from_schedule);
2741 2765
2742 /* 2766 /*
2767 * Don't mess with dead queue.
2768 */
2769 if (unlikely(blk_queue_dead(q))) {
2770 spin_unlock(q->queue_lock);
2771 return;
2772 }
2773
2774 /*
2743 * If we are punting this to kblockd, then we can safely drop 2775 * If we are punting this to kblockd, then we can safely drop
2744 * the queue_lock before waking kblockd (which needs to take 2776 * the queue_lock before waking kblockd (which needs to take
2745 * this lock). 2777 * this lock).
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2815 depth = 0; 2847 depth = 0;
2816 spin_lock(q->queue_lock); 2848 spin_lock(q->queue_lock);
2817 } 2849 }
2850
2851 /*
2852 * Short-circuit if @q is dead
2853 */
2854 if (unlikely(blk_queue_dead(q))) {
2855 __blk_end_request_all(rq, -ENODEV);
2856 continue;
2857 }
2858
2818 /* 2859 /*
2819 * rq is already accounted, so use raw insert 2860 * rq is already accounted, so use raw insert
2820 */ 2861 */