diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 15:24:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-15 15:24:45 -0500 |
commit | b3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch) | |
tree | ad48ad4d923fee147c736318d0fad35b3755f4f5 /block/blk-core.c | |
parent | 83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (diff) | |
parent | 5d381efb3d1f1ef10535a31ca0dd9b22fe1e1922 (diff) |
Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block
* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits)
Revert "block: recursive merge requests"
block: Stop using macro stubs for the bio data integrity calls
blockdev: convert some macros to static inlines
fs: remove unneeded plug in mpage_readpages()
block: Add BLKROTATIONAL ioctl
block: Introduce blk_set_stacking_limits function
block: remove WARN_ON_ONCE() in exit_io_context()
block: an exiting task should be allowed to create io_context
block: ioc_cgroup_changed() needs to be exported
block: recursive merge requests
block, cfq: fix empty queue crash caused by request merge
block, cfq: move icq creation and rq->elv.icq association to block core
block, cfq: restructure io_cq creation path for io_context interface cleanup
block, cfq: move io_cq exit/release to blk-ioc.c
block, cfq: move icq cache management to block core
block, cfq: move io_cq lookup to blk-ioc.c
block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq
block, cfq: reorganize cfq_io_context into generic and cfq specific parts
block: remove elevator_queue->ops
block: reorder elevator switch sequence
...
Fix up conflicts in:
- block/blk-cgroup.c
Switch from can_attach_task to can_attach
- block/cfq-iosched.c
conflict with now removed cic index changes (we now use q->id instead)
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 203 |
1 files changed, 122 insertions, 81 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 15de223c7f93..e6c05a97ee2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | |||
39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | 39 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); |
40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | 40 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); |
41 | 41 | ||
42 | DEFINE_IDA(blk_queue_ida); | ||
43 | |||
42 | /* | 44 | /* |
43 | * For the allocated request tables | 45 | * For the allocated request tables |
44 | */ | 46 | */ |
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue); | |||
358 | void blk_drain_queue(struct request_queue *q, bool drain_all) | 360 | void blk_drain_queue(struct request_queue *q, bool drain_all) |
359 | { | 361 | { |
360 | while (true) { | 362 | while (true) { |
361 | int nr_rqs; | 363 | bool drain = false; |
364 | int i; | ||
362 | 365 | ||
363 | spin_lock_irq(q->queue_lock); | 366 | spin_lock_irq(q->queue_lock); |
364 | 367 | ||
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) | |||
375 | if (!list_empty(&q->queue_head)) | 378 | if (!list_empty(&q->queue_head)) |
376 | __blk_run_queue(q); | 379 | __blk_run_queue(q); |
377 | 380 | ||
378 | if (drain_all) | 381 | drain |= q->rq.elvpriv; |
379 | nr_rqs = q->rq.count[0] + q->rq.count[1]; | 382 | |
380 | else | 383 | /* |
381 | nr_rqs = q->rq.elvpriv; | 384 | * Unfortunately, requests are queued at and tracked from |
385 | * multiple places and there's no single counter which can | ||
386 | * be drained. Check all the queues and counters. | ||
387 | */ | ||
388 | if (drain_all) { | ||
389 | drain |= !list_empty(&q->queue_head); | ||
390 | for (i = 0; i < 2; i++) { | ||
391 | drain |= q->rq.count[i]; | ||
392 | drain |= q->in_flight[i]; | ||
393 | drain |= !list_empty(&q->flush_queue[i]); | ||
394 | } | ||
395 | } | ||
382 | 396 | ||
383 | spin_unlock_irq(q->queue_lock); | 397 | spin_unlock_irq(q->queue_lock); |
384 | 398 | ||
385 | if (!nr_rqs) | 399 | if (!drain) |
386 | break; | 400 | break; |
387 | msleep(10); | 401 | msleep(10); |
388 | } | 402 | } |
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
469 | if (!q) | 483 | if (!q) |
470 | return NULL; | 484 | return NULL; |
471 | 485 | ||
486 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); | ||
487 | if (q->id < 0) | ||
488 | goto fail_q; | ||
489 | |||
472 | q->backing_dev_info.ra_pages = | 490 | q->backing_dev_info.ra_pages = |
473 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 491 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
474 | q->backing_dev_info.state = 0; | 492 | q->backing_dev_info.state = 0; |
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
477 | q->node = node_id; | 495 | q->node = node_id; |
478 | 496 | ||
479 | err = bdi_init(&q->backing_dev_info); | 497 | err = bdi_init(&q->backing_dev_info); |
480 | if (err) { | 498 | if (err) |
481 | kmem_cache_free(blk_requestq_cachep, q); | 499 | goto fail_id; |
482 | return NULL; | ||
483 | } | ||
484 | 500 | ||
485 | if (blk_throtl_init(q)) { | 501 | if (blk_throtl_init(q)) |
486 | kmem_cache_free(blk_requestq_cachep, q); | 502 | goto fail_id; |
487 | return NULL; | ||
488 | } | ||
489 | 503 | ||
490 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 504 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
491 | laptop_mode_timer_fn, (unsigned long) q); | 505 | laptop_mode_timer_fn, (unsigned long) q); |
492 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 506 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
493 | INIT_LIST_HEAD(&q->timeout_list); | 507 | INIT_LIST_HEAD(&q->timeout_list); |
508 | INIT_LIST_HEAD(&q->icq_list); | ||
494 | INIT_LIST_HEAD(&q->flush_queue[0]); | 509 | INIT_LIST_HEAD(&q->flush_queue[0]); |
495 | INIT_LIST_HEAD(&q->flush_queue[1]); | 510 | INIT_LIST_HEAD(&q->flush_queue[1]); |
496 | INIT_LIST_HEAD(&q->flush_data_in_flight); | 511 | INIT_LIST_HEAD(&q->flush_data_in_flight); |
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
508 | q->queue_lock = &q->__queue_lock; | 523 | q->queue_lock = &q->__queue_lock; |
509 | 524 | ||
510 | return q; | 525 | return q; |
526 | |||
527 | fail_id: | ||
528 | ida_simple_remove(&blk_queue_ida, q->id); | ||
529 | fail_q: | ||
530 | kmem_cache_free(blk_requestq_cachep, q); | ||
531 | return NULL; | ||
511 | } | 532 | } |
512 | EXPORT_SYMBOL(blk_alloc_queue_node); | 533 | EXPORT_SYMBOL(blk_alloc_queue_node); |
513 | 534 | ||
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
605 | } | 626 | } |
606 | EXPORT_SYMBOL(blk_init_allocated_queue); | 627 | EXPORT_SYMBOL(blk_init_allocated_queue); |
607 | 628 | ||
608 | int blk_get_queue(struct request_queue *q) | 629 | bool blk_get_queue(struct request_queue *q) |
609 | { | 630 | { |
610 | if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { | 631 | if (likely(!blk_queue_dead(q))) { |
611 | kobject_get(&q->kobj); | 632 | __blk_get_queue(q); |
612 | return 0; | 633 | return true; |
613 | } | 634 | } |
614 | 635 | ||
615 | return 1; | 636 | return false; |
616 | } | 637 | } |
617 | EXPORT_SYMBOL(blk_get_queue); | 638 | EXPORT_SYMBOL(blk_get_queue); |
618 | 639 | ||
619 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 640 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
620 | { | 641 | { |
621 | if (rq->cmd_flags & REQ_ELVPRIV) | 642 | if (rq->cmd_flags & REQ_ELVPRIV) { |
622 | elv_put_request(q, rq); | 643 | elv_put_request(q, rq); |
644 | if (rq->elv.icq) | ||
645 | put_io_context(rq->elv.icq->ioc, q); | ||
646 | } | ||
647 | |||
623 | mempool_free(rq, q->rq.rq_pool); | 648 | mempool_free(rq, q->rq.rq_pool); |
624 | } | 649 | } |
625 | 650 | ||
626 | static struct request * | 651 | static struct request * |
627 | blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | 652 | blk_alloc_request(struct request_queue *q, struct io_cq *icq, |
653 | unsigned int flags, gfp_t gfp_mask) | ||
628 | { | 654 | { |
629 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 655 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
630 | 656 | ||
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) | |||
635 | 661 | ||
636 | rq->cmd_flags = flags | REQ_ALLOCED; | 662 | rq->cmd_flags = flags | REQ_ALLOCED; |
637 | 663 | ||
638 | if ((flags & REQ_ELVPRIV) && | 664 | if (flags & REQ_ELVPRIV) { |
639 | unlikely(elv_set_request(q, rq, gfp_mask))) { | 665 | rq->elv.icq = icq; |
640 | mempool_free(rq, q->rq.rq_pool); | 666 | if (unlikely(elv_set_request(q, rq, gfp_mask))) { |
641 | return NULL; | 667 | mempool_free(rq, q->rq.rq_pool); |
668 | return NULL; | ||
669 | } | ||
670 | /* @rq->elv.icq holds on to io_context until @rq is freed */ | ||
671 | if (icq) | ||
672 | get_io_context(icq->ioc); | ||
642 | } | 673 | } |
643 | 674 | ||
644 | return rq; | 675 | return rq; |
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
750 | { | 781 | { |
751 | struct request *rq = NULL; | 782 | struct request *rq = NULL; |
752 | struct request_list *rl = &q->rq; | 783 | struct request_list *rl = &q->rq; |
753 | struct io_context *ioc = NULL; | 784 | struct elevator_type *et; |
785 | struct io_context *ioc; | ||
786 | struct io_cq *icq = NULL; | ||
754 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 787 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
788 | bool retried = false; | ||
755 | int may_queue; | 789 | int may_queue; |
790 | retry: | ||
791 | et = q->elevator->type; | ||
792 | ioc = current->io_context; | ||
756 | 793 | ||
757 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 794 | if (unlikely(blk_queue_dead(q))) |
758 | return NULL; | 795 | return NULL; |
759 | 796 | ||
760 | may_queue = elv_may_queue(q, rw_flags); | 797 | may_queue = elv_may_queue(q, rw_flags); |
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
763 | 800 | ||
764 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { | 801 | if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { |
765 | if (rl->count[is_sync]+1 >= q->nr_requests) { | 802 | if (rl->count[is_sync]+1 >= q->nr_requests) { |
766 | ioc = current_io_context(GFP_ATOMIC, q->node); | 803 | /* |
804 | * We want ioc to record batching state. If it's | ||
805 | * not already there, creating a new one requires | ||
806 | * dropping queue_lock, which in turn requires | ||
807 | * retesting conditions to avoid queue hang. | ||
808 | */ | ||
809 | if (!ioc && !retried) { | ||
810 | spin_unlock_irq(q->queue_lock); | ||
811 | create_io_context(current, gfp_mask, q->node); | ||
812 | spin_lock_irq(q->queue_lock); | ||
813 | retried = true; | ||
814 | goto retry; | ||
815 | } | ||
816 | |||
767 | /* | 817 | /* |
768 | * The queue will fill after this allocation, so set | 818 | * The queue will fill after this allocation, so set |
769 | * it as full, and mark this process as "batching". | 819 | * it as full, and mark this process as "batching". |
@@ -799,17 +849,36 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
799 | rl->count[is_sync]++; | 849 | rl->count[is_sync]++; |
800 | rl->starved[is_sync] = 0; | 850 | rl->starved[is_sync] = 0; |
801 | 851 | ||
852 | /* | ||
853 | * Decide whether the new request will be managed by elevator. If | ||
854 | * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will | ||
855 | * prevent the current elevator from being destroyed until the new | ||
856 | * request is freed. This guarantees icq's won't be destroyed and | ||
857 | * makes creating new ones safe. | ||
858 | * | ||
859 | * Also, lookup icq while holding queue_lock. If it doesn't exist, | ||
860 | * it will be created after releasing queue_lock. | ||
861 | */ | ||
802 | if (blk_rq_should_init_elevator(bio) && | 862 | if (blk_rq_should_init_elevator(bio) && |
803 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { | 863 | !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { |
804 | rw_flags |= REQ_ELVPRIV; | 864 | rw_flags |= REQ_ELVPRIV; |
805 | rl->elvpriv++; | 865 | rl->elvpriv++; |
866 | if (et->icq_cache && ioc) | ||
867 | icq = ioc_lookup_icq(ioc, q); | ||
806 | } | 868 | } |
807 | 869 | ||
808 | if (blk_queue_io_stat(q)) | 870 | if (blk_queue_io_stat(q)) |
809 | rw_flags |= REQ_IO_STAT; | 871 | rw_flags |= REQ_IO_STAT; |
810 | spin_unlock_irq(q->queue_lock); | 872 | spin_unlock_irq(q->queue_lock); |
811 | 873 | ||
812 | rq = blk_alloc_request(q, rw_flags, gfp_mask); | 874 | /* create icq if missing */ |
875 | if (unlikely(et->icq_cache && !icq)) | ||
876 | icq = ioc_create_icq(q, gfp_mask); | ||
877 | |||
878 | /* rqs are guaranteed to have icq on elv_set_request() if requested */ | ||
879 | if (likely(!et->icq_cache || icq)) | ||
880 | rq = blk_alloc_request(q, icq, rw_flags, gfp_mask); | ||
881 | |||
813 | if (unlikely(!rq)) { | 882 | if (unlikely(!rq)) { |
814 | /* | 883 | /* |
815 | * Allocation failed presumably due to memory. Undo anything | 884 | * Allocation failed presumably due to memory. Undo anything |
@@ -871,10 +940,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
871 | rq = get_request(q, rw_flags, bio, GFP_NOIO); | 940 | rq = get_request(q, rw_flags, bio, GFP_NOIO); |
872 | while (!rq) { | 941 | while (!rq) { |
873 | DEFINE_WAIT(wait); | 942 | DEFINE_WAIT(wait); |
874 | struct io_context *ioc; | ||
875 | struct request_list *rl = &q->rq; | 943 | struct request_list *rl = &q->rq; |
876 | 944 | ||
877 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 945 | if (unlikely(blk_queue_dead(q))) |
878 | return NULL; | 946 | return NULL; |
879 | 947 | ||
880 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, | 948 | prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, |
@@ -891,8 +959,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
891 | * up to a big batch of them for a small period time. | 959 | * up to a big batch of them for a small period time. |
892 | * See ioc_batching, ioc_set_batching | 960 | * See ioc_batching, ioc_set_batching |
893 | */ | 961 | */ |
894 | ioc = current_io_context(GFP_NOIO, q->node); | 962 | create_io_context(current, GFP_NOIO, q->node); |
895 | ioc_set_batching(q, ioc); | 963 | ioc_set_batching(q, current->io_context); |
896 | 964 | ||
897 | spin_lock_irq(q->queue_lock); | 965 | spin_lock_irq(q->queue_lock); |
898 | finish_wait(&rl->wait[is_sync], &wait); | 966 | finish_wait(&rl->wait[is_sync], &wait); |
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq, | |||
1009 | __elv_add_request(q, rq, where); | 1077 | __elv_add_request(q, rq, where); |
1010 | } | 1078 | } |
1011 | 1079 | ||
1012 | /** | ||
1013 | * blk_insert_request - insert a special request into a request queue | ||
1014 | * @q: request queue where request should be inserted | ||
1015 | * @rq: request to be inserted | ||
1016 | * @at_head: insert request at head or tail of queue | ||
1017 | * @data: private data | ||
1018 | * | ||
1019 | * Description: | ||
1020 | * Many block devices need to execute commands asynchronously, so they don't | ||
1021 | * block the whole kernel from preemption during request execution. This is | ||
1022 | * accomplished normally by inserting aritficial requests tagged as | ||
1023 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them | ||
1024 | * be scheduled for actual execution by the request queue. | ||
1025 | * | ||
1026 | * We have the option of inserting the head or the tail of the queue. | ||
1027 | * Typically we use the tail for new ioctls and so forth. We use the head | ||
1028 | * of the queue for things like a QUEUE_FULL message from a device, or a | ||
1029 | * host that is unable to accept a particular command. | ||
1030 | */ | ||
1031 | void blk_insert_request(struct request_queue *q, struct request *rq, | ||
1032 | int at_head, void *data) | ||
1033 | { | ||
1034 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | ||
1035 | unsigned long flags; | ||
1036 | |||
1037 | /* | ||
1038 | * tell I/O scheduler that this isn't a regular read/write (ie it | ||
1039 | * must not attempt merges on this) and that it acts as a soft | ||
1040 | * barrier | ||
1041 | */ | ||
1042 | rq->cmd_type = REQ_TYPE_SPECIAL; | ||
1043 | |||
1044 | rq->special = data; | ||
1045 | |||
1046 | spin_lock_irqsave(q->queue_lock, flags); | ||
1047 | |||
1048 | /* | ||
1049 | * If command is tagged, release the tag | ||
1050 | */ | ||
1051 | if (blk_rq_tagged(rq)) | ||
1052 | blk_queue_end_tag(q, rq); | ||
1053 | |||
1054 | add_acct_request(q, rq, where); | ||
1055 | __blk_run_queue(q); | ||
1056 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1057 | } | ||
1058 | EXPORT_SYMBOL(blk_insert_request); | ||
1059 | |||
1060 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1080 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1061 | unsigned long now) | 1081 | unsigned long now) |
1062 | { | 1082 | { |
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1766 | return -EIO; | 1786 | return -EIO; |
1767 | 1787 | ||
1768 | spin_lock_irqsave(q->queue_lock, flags); | 1788 | spin_lock_irqsave(q->queue_lock, flags); |
1789 | if (unlikely(blk_queue_dead(q))) { | ||
1790 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1791 | return -ENODEV; | ||
1792 | } | ||
1769 | 1793 | ||
1770 | /* | 1794 | /* |
1771 | * Submitting request must be dequeued before calling this function | 1795 | * Submitting request must be dequeued before calling this function |
@@ -2740,6 +2764,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, | |||
2740 | trace_block_unplug(q, depth, !from_schedule); | 2764 | trace_block_unplug(q, depth, !from_schedule); |
2741 | 2765 | ||
2742 | /* | 2766 | /* |
2767 | * Don't mess with dead queue. | ||
2768 | */ | ||
2769 | if (unlikely(blk_queue_dead(q))) { | ||
2770 | spin_unlock(q->queue_lock); | ||
2771 | return; | ||
2772 | } | ||
2773 | |||
2774 | /* | ||
2743 | * If we are punting this to kblockd, then we can safely drop | 2775 | * If we are punting this to kblockd, then we can safely drop |
2744 | * the queue_lock before waking kblockd (which needs to take | 2776 | * the queue_lock before waking kblockd (which needs to take |
2745 | * this lock). | 2777 | * this lock). |
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
2815 | depth = 0; | 2847 | depth = 0; |
2816 | spin_lock(q->queue_lock); | 2848 | spin_lock(q->queue_lock); |
2817 | } | 2849 | } |
2850 | |||
2851 | /* | ||
2852 | * Short-circuit if @q is dead | ||
2853 | */ | ||
2854 | if (unlikely(blk_queue_dead(q))) { | ||
2855 | __blk_end_request_all(rq, -ENODEV); | ||
2856 | continue; | ||
2857 | } | ||
2858 | |||
2818 | /* | 2859 | /* |
2819 | * rq is already accounted, so use raw insert | 2860 | * rq is already accounted, so use raw insert |
2820 | */ | 2861 | */ |