aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c209
1 files changed, 111 insertions, 98 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 93eb3e4f88ce..dd134d834d58 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
387 if (!list_empty(&q->queue_head) && q->request_fn) 387 if (!list_empty(&q->queue_head) && q->request_fn)
388 __blk_run_queue(q); 388 __blk_run_queue(q);
389 389
390 drain |= q->rq.elvpriv; 390 drain |= q->nr_rqs_elvpriv;
391 391
392 /* 392 /*
393 * Unfortunately, requests are queued at and tracked from 393 * Unfortunately, requests are queued at and tracked from
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
397 if (drain_all) { 397 if (drain_all) {
398 drain |= !list_empty(&q->queue_head); 398 drain |= !list_empty(&q->queue_head);
399 for (i = 0; i < 2; i++) { 399 for (i = 0; i < 2; i++) {
400 drain |= q->rq.count[i]; 400 drain |= q->nr_rqs[i];
401 drain |= q->in_flight[i]; 401 drain |= q->in_flight[i];
402 drain |= !list_empty(&q->flush_queue[i]); 402 drain |= !list_empty(&q->flush_queue[i]);
403 } 403 }
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
416 * left with hung waiters. We need to wake up those waiters. 416 * left with hung waiters. We need to wake up those waiters.
417 */ 417 */
418 if (q->request_fn) { 418 if (q->request_fn) {
419 struct request_list *rl;
420
419 spin_lock_irq(q->queue_lock); 421 spin_lock_irq(q->queue_lock);
420 for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) 422
421 wake_up_all(&q->rq.wait[i]); 423 blk_queue_for_each_rl(rl, q)
424 for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
425 wake_up_all(&rl->wait[i]);
426
422 spin_unlock_irq(q->queue_lock); 427 spin_unlock_irq(q->queue_lock);
423 } 428 }
424} 429}
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
517} 522}
518EXPORT_SYMBOL(blk_cleanup_queue); 523EXPORT_SYMBOL(blk_cleanup_queue);
519 524
520static int blk_init_free_list(struct request_queue *q) 525int blk_init_rl(struct request_list *rl, struct request_queue *q,
526 gfp_t gfp_mask)
521{ 527{
522 struct request_list *rl = &q->rq;
523
524 if (unlikely(rl->rq_pool)) 528 if (unlikely(rl->rq_pool))
525 return 0; 529 return 0;
526 530
531 rl->q = q;
527 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; 532 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
528 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; 533 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
529 rl->elvpriv = 0;
530 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 534 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
531 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 535 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
532 536
533 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 537 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
534 mempool_free_slab, request_cachep, q->node); 538 mempool_free_slab, request_cachep,
535 539 gfp_mask, q->node);
536 if (!rl->rq_pool) 540 if (!rl->rq_pool)
537 return -ENOMEM; 541 return -ENOMEM;
538 542
539 return 0; 543 return 0;
540} 544}
541 545
546void blk_exit_rl(struct request_list *rl)
547{
548 if (rl->rq_pool)
549 mempool_destroy(rl->rq_pool);
550}
551
542struct request_queue *blk_alloc_queue(gfp_t gfp_mask) 552struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
543{ 553{
544 return blk_alloc_queue_node(gfp_mask, -1); 554 return blk_alloc_queue_node(gfp_mask, -1);
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
680 if (!q) 690 if (!q)
681 return NULL; 691 return NULL;
682 692
683 if (blk_init_free_list(q)) 693 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
684 return NULL; 694 return NULL;
685 695
686 q->request_fn = rfn; 696 q->request_fn = rfn;
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
722} 732}
723EXPORT_SYMBOL(blk_get_queue); 733EXPORT_SYMBOL(blk_get_queue);
724 734
725static inline void blk_free_request(struct request_queue *q, struct request *rq) 735static inline void blk_free_request(struct request_list *rl, struct request *rq)
726{ 736{
727 if (rq->cmd_flags & REQ_ELVPRIV) { 737 if (rq->cmd_flags & REQ_ELVPRIV) {
728 elv_put_request(q, rq); 738 elv_put_request(rl->q, rq);
729 if (rq->elv.icq) 739 if (rq->elv.icq)
730 put_io_context(rq->elv.icq->ioc); 740 put_io_context(rq->elv.icq->ioc);
731 } 741 }
732 742
733 mempool_free(rq, q->rq.rq_pool); 743 mempool_free(rq, rl->rq_pool);
734} 744}
735 745
736/* 746/*
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
767 ioc->last_waited = jiffies; 777 ioc->last_waited = jiffies;
768} 778}
769 779
770static void __freed_request(struct request_queue *q, int sync) 780static void __freed_request(struct request_list *rl, int sync)
771{ 781{
772 struct request_list *rl = &q->rq; 782 struct request_queue *q = rl->q;
773 783
774 if (rl->count[sync] < queue_congestion_off_threshold(q)) 784 /*
785 * bdi isn't aware of blkcg yet. As all async IOs end up root
786 * blkcg anyway, just use root blkcg state.
787 */
788 if (rl == &q->root_rl &&
789 rl->count[sync] < queue_congestion_off_threshold(q))
775 blk_clear_queue_congested(q, sync); 790 blk_clear_queue_congested(q, sync);
776 791
777 if (rl->count[sync] + 1 <= q->nr_requests) { 792 if (rl->count[sync] + 1 <= q->nr_requests) {
778 if (waitqueue_active(&rl->wait[sync])) 793 if (waitqueue_active(&rl->wait[sync]))
779 wake_up(&rl->wait[sync]); 794 wake_up(&rl->wait[sync]);
780 795
781 blk_clear_queue_full(q, sync); 796 blk_clear_rl_full(rl, sync);
782 } 797 }
783} 798}
784 799
@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
786 * A request has just been released. Account for it, update the full and 801 * A request has just been released. Account for it, update the full and
787 * congestion status, wake up any waiters. Called under q->queue_lock. 802 * congestion status, wake up any waiters. Called under q->queue_lock.
788 */ 803 */
789static void freed_request(struct request_queue *q, unsigned int flags) 804static void freed_request(struct request_list *rl, unsigned int flags)
790{ 805{
791 struct request_list *rl = &q->rq; 806 struct request_queue *q = rl->q;
792 int sync = rw_is_sync(flags); 807 int sync = rw_is_sync(flags);
793 808
809 q->nr_rqs[sync]--;
794 rl->count[sync]--; 810 rl->count[sync]--;
795 if (flags & REQ_ELVPRIV) 811 if (flags & REQ_ELVPRIV)
796 rl->elvpriv--; 812 q->nr_rqs_elvpriv--;
797 813
798 __freed_request(q, sync); 814 __freed_request(rl, sync);
799 815
800 if (unlikely(rl->starved[sync ^ 1])) 816 if (unlikely(rl->starved[sync ^ 1]))
801 __freed_request(q, sync ^ 1); 817 __freed_request(rl, sync ^ 1);
802} 818}
803 819
804/* 820/*
@@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
837} 853}
838 854
839/** 855/**
840 * get_request - get a free request 856 * __get_request - get a free request
841 * @q: request_queue to allocate request from 857 * @rl: request list to allocate from
842 * @rw_flags: RW and SYNC flags 858 * @rw_flags: RW and SYNC flags
843 * @bio: bio to allocate request for (can be %NULL) 859 * @bio: bio to allocate request for (can be %NULL)
844 * @gfp_mask: allocation mask 860 * @gfp_mask: allocation mask
@@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
850 * Returns %NULL on failure, with @q->queue_lock held. 866 * Returns %NULL on failure, with @q->queue_lock held.
851 * Returns !%NULL on success, with @q->queue_lock *not held*. 867 * Returns !%NULL on success, with @q->queue_lock *not held*.
852 */ 868 */
853static struct request *get_request(struct request_queue *q, int rw_flags, 869static struct request *__get_request(struct request_list *rl, int rw_flags,
854 struct bio *bio, gfp_t gfp_mask) 870 struct bio *bio, gfp_t gfp_mask)
855{ 871{
872 struct request_queue *q = rl->q;
856 struct request *rq; 873 struct request *rq;
857 struct request_list *rl = &q->rq; 874 struct elevator_type *et = q->elevator->type;
858 struct elevator_type *et; 875 struct io_context *ioc = rq_ioc(bio);
859 struct io_context *ioc;
860 struct io_cq *icq = NULL; 876 struct io_cq *icq = NULL;
861 const bool is_sync = rw_is_sync(rw_flags) != 0; 877 const bool is_sync = rw_is_sync(rw_flags) != 0;
862 bool retried = false;
863 int may_queue; 878 int may_queue;
864retry:
865 et = q->elevator->type;
866 ioc = rq_ioc(bio);
867 879
868 if (unlikely(blk_queue_dead(q))) 880 if (unlikely(blk_queue_dead(q)))
869 return NULL; 881 return NULL;
@@ -875,28 +887,14 @@ retry:
875 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { 887 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
876 if (rl->count[is_sync]+1 >= q->nr_requests) { 888 if (rl->count[is_sync]+1 >= q->nr_requests) {
877 /* 889 /*
878 * We want ioc to record batching state. If it's
879 * not already there, creating a new one requires
880 * dropping queue_lock, which in turn requires
881 * retesting conditions to avoid queue hang.
882 */
883 if (!ioc && !retried) {
884 spin_unlock_irq(q->queue_lock);
885 create_io_context(gfp_mask, q->node);
886 spin_lock_irq(q->queue_lock);
887 retried = true;
888 goto retry;
889 }
890
891 /*
892 * The queue will fill after this allocation, so set 890 * The queue will fill after this allocation, so set
893 * it as full, and mark this process as "batching". 891 * it as full, and mark this process as "batching".
894 * This process will be allowed to complete a batch of 892 * This process will be allowed to complete a batch of
895 * requests, others will be blocked. 893 * requests, others will be blocked.
896 */ 894 */
897 if (!blk_queue_full(q, is_sync)) { 895 if (!blk_rl_full(rl, is_sync)) {
898 ioc_set_batching(q, ioc); 896 ioc_set_batching(q, ioc);
899 blk_set_queue_full(q, is_sync); 897 blk_set_rl_full(rl, is_sync);
900 } else { 898 } else {
901 if (may_queue != ELV_MQUEUE_MUST 899 if (may_queue != ELV_MQUEUE_MUST
902 && !ioc_batching(q, ioc)) { 900 && !ioc_batching(q, ioc)) {
@@ -909,7 +907,12 @@ retry:
909 } 907 }
910 } 908 }
911 } 909 }
912 blk_set_queue_congested(q, is_sync); 910 /*
911 * bdi isn't aware of blkcg yet. As all async IOs end up
912 * root blkcg anyway, just use root blkcg state.
913 */
914 if (rl == &q->root_rl)
915 blk_set_queue_congested(q, is_sync);
913 } 916 }
914 917
915 /* 918 /*
@@ -920,6 +923,7 @@ retry:
920 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 923 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
921 return NULL; 924 return NULL;
922 925
926 q->nr_rqs[is_sync]++;
923 rl->count[is_sync]++; 927 rl->count[is_sync]++;
924 rl->starved[is_sync] = 0; 928 rl->starved[is_sync] = 0;
925 929
@@ -935,7 +939,7 @@ retry:
935 */ 939 */
936 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { 940 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
937 rw_flags |= REQ_ELVPRIV; 941 rw_flags |= REQ_ELVPRIV;
938 rl->elvpriv++; 942 q->nr_rqs_elvpriv++;
939 if (et->icq_cache && ioc) 943 if (et->icq_cache && ioc)
940 icq = ioc_lookup_icq(ioc, q); 944 icq = ioc_lookup_icq(ioc, q);
941 } 945 }
@@ -945,22 +949,19 @@ retry:
945 spin_unlock_irq(q->queue_lock); 949 spin_unlock_irq(q->queue_lock);
946 950
947 /* allocate and init request */ 951 /* allocate and init request */
948 rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 952 rq = mempool_alloc(rl->rq_pool, gfp_mask);
949 if (!rq) 953 if (!rq)
950 goto fail_alloc; 954 goto fail_alloc;
951 955
952 blk_rq_init(q, rq); 956 blk_rq_init(q, rq);
957 blk_rq_set_rl(rq, rl);
953 rq->cmd_flags = rw_flags | REQ_ALLOCED; 958 rq->cmd_flags = rw_flags | REQ_ALLOCED;
954 959
955 /* init elvpriv */ 960 /* init elvpriv */
956 if (rw_flags & REQ_ELVPRIV) { 961 if (rw_flags & REQ_ELVPRIV) {
957 if (unlikely(et->icq_cache && !icq)) { 962 if (unlikely(et->icq_cache && !icq)) {
958 create_io_context(gfp_mask, q->node); 963 if (ioc)
959 ioc = rq_ioc(bio); 964 icq = ioc_create_icq(ioc, q, gfp_mask);
960 if (!ioc)
961 goto fail_elvpriv;
962
963 icq = ioc_create_icq(ioc, q, gfp_mask);
964 if (!icq) 965 if (!icq)
965 goto fail_elvpriv; 966 goto fail_elvpriv;
966 } 967 }
@@ -1000,7 +1001,7 @@ fail_elvpriv:
1000 rq->elv.icq = NULL; 1001 rq->elv.icq = NULL;
1001 1002
1002 spin_lock_irq(q->queue_lock); 1003 spin_lock_irq(q->queue_lock);
1003 rl->elvpriv--; 1004 q->nr_rqs_elvpriv--;
1004 spin_unlock_irq(q->queue_lock); 1005 spin_unlock_irq(q->queue_lock);
1005 goto out; 1006 goto out;
1006 1007
@@ -1013,7 +1014,7 @@ fail_alloc:
1013 * queue, but this is pretty rare. 1014 * queue, but this is pretty rare.
1014 */ 1015 */
1015 spin_lock_irq(q->queue_lock); 1016 spin_lock_irq(q->queue_lock);
1016 freed_request(q, rw_flags); 1017 freed_request(rl, rw_flags);
1017 1018
1018 /* 1019 /*
1019 * in the very unlikely event that allocation failed and no 1020 * in the very unlikely event that allocation failed and no
@@ -1029,56 +1030,58 @@ rq_starved:
1029} 1030}
1030 1031
1031/** 1032/**
1032 * get_request_wait - get a free request with retry 1033 * get_request - get a free request
1033 * @q: request_queue to allocate request from 1034 * @q: request_queue to allocate request from
1034 * @rw_flags: RW and SYNC flags 1035 * @rw_flags: RW and SYNC flags
1035 * @bio: bio to allocate request for (can be %NULL) 1036 * @bio: bio to allocate request for (can be %NULL)
1037 * @gfp_mask: allocation mask
1036 * 1038 *
1037 * Get a free request from @q. This function keeps retrying under memory 1039 * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
1038 * pressure and fails iff @q is dead. 1040 * function keeps retrying under memory pressure and fails iff @q is dead.
1039 * 1041 *
1040 * Must be callled with @q->queue_lock held and, 1042 * Must be callled with @q->queue_lock held and,
1041 * Returns %NULL on failure, with @q->queue_lock held. 1043 * Returns %NULL on failure, with @q->queue_lock held.
1042 * Returns !%NULL on success, with @q->queue_lock *not held*. 1044 * Returns !%NULL on success, with @q->queue_lock *not held*.
1043 */ 1045 */
1044static struct request *get_request_wait(struct request_queue *q, int rw_flags, 1046static struct request *get_request(struct request_queue *q, int rw_flags,
1045 struct bio *bio) 1047 struct bio *bio, gfp_t gfp_mask)
1046{ 1048{
1047 const bool is_sync = rw_is_sync(rw_flags) != 0; 1049 const bool is_sync = rw_is_sync(rw_flags) != 0;
1050 DEFINE_WAIT(wait);
1051 struct request_list *rl;
1048 struct request *rq; 1052 struct request *rq;
1049 1053
1050 rq = get_request(q, rw_flags, bio, GFP_NOIO); 1054 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1051 while (!rq) { 1055retry:
1052 DEFINE_WAIT(wait); 1056 rq = __get_request(rl, rw_flags, bio, gfp_mask);
1053 struct request_list *rl = &q->rq; 1057 if (rq)
1054 1058 return rq;
1055 if (unlikely(blk_queue_dead(q)))
1056 return NULL;
1057 1059
1058 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 1060 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
1059 TASK_UNINTERRUPTIBLE); 1061 blk_put_rl(rl);
1062 return NULL;
1063 }
1060 1064
1061 trace_block_sleeprq(q, bio, rw_flags & 1); 1065 /* wait on @rl and retry */
1066 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1067 TASK_UNINTERRUPTIBLE);
1062 1068
1063 spin_unlock_irq(q->queue_lock); 1069 trace_block_sleeprq(q, bio, rw_flags & 1);
1064 io_schedule();
1065 1070
1066 /* 1071 spin_unlock_irq(q->queue_lock);
1067 * After sleeping, we become a "batching" process and 1072 io_schedule();
1068 * will be able to allocate at least one request, and
1069 * up to a big batch of them for a small period time.
1070 * See ioc_batching, ioc_set_batching
1071 */
1072 create_io_context(GFP_NOIO, q->node);
1073 ioc_set_batching(q, current->io_context);
1074 1073
1075 spin_lock_irq(q->queue_lock); 1074 /*
1076 finish_wait(&rl->wait[is_sync], &wait); 1075 * After sleeping, we become a "batching" process and will be able
1076 * to allocate at least one request, and up to a big batch of them
1077 * for a small period time. See ioc_batching, ioc_set_batching
1078 */
1079 ioc_set_batching(q, current->io_context);
1077 1080
1078 rq = get_request(q, rw_flags, bio, GFP_NOIO); 1081 spin_lock_irq(q->queue_lock);
1079 }; 1082 finish_wait(&rl->wait[is_sync], &wait);
1080 1083
1081 return rq; 1084 goto retry;
1082} 1085}
1083 1086
1084struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1087struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
@@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1087 1090
1088 BUG_ON(rw != READ && rw != WRITE); 1091 BUG_ON(rw != READ && rw != WRITE);
1089 1092
1093 /* create ioc upfront */
1094 create_io_context(gfp_mask, q->node);
1095
1090 spin_lock_irq(q->queue_lock); 1096 spin_lock_irq(q->queue_lock);
1091 if (gfp_mask & __GFP_WAIT) 1097 rq = get_request(q, rw, NULL, gfp_mask);
1092 rq = get_request_wait(q, rw, NULL);
1093 else
1094 rq = get_request(q, rw, NULL, gfp_mask);
1095 if (!rq) 1098 if (!rq)
1096 spin_unlock_irq(q->queue_lock); 1099 spin_unlock_irq(q->queue_lock);
1097 /* q->queue_lock is unlocked at this point */ 1100 /* q->queue_lock is unlocked at this point */
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1248 */ 1251 */
1249 if (req->cmd_flags & REQ_ALLOCED) { 1252 if (req->cmd_flags & REQ_ALLOCED) {
1250 unsigned int flags = req->cmd_flags; 1253 unsigned int flags = req->cmd_flags;
1254 struct request_list *rl = blk_rq_rl(req);
1251 1255
1252 BUG_ON(!list_empty(&req->queuelist)); 1256 BUG_ON(!list_empty(&req->queuelist));
1253 BUG_ON(!hlist_unhashed(&req->hash)); 1257 BUG_ON(!hlist_unhashed(&req->hash));
1254 1258
1255 blk_free_request(q, req); 1259 blk_free_request(rl, req);
1256 freed_request(q, flags); 1260 freed_request(rl, flags);
1261 blk_put_rl(rl);
1257 } 1262 }
1258} 1263}
1259EXPORT_SYMBOL_GPL(__blk_put_request); 1264EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1481,7 +1486,7 @@ get_rq:
1481 * Grab a free request. This is might sleep but can not fail. 1486 * Grab a free request. This is might sleep but can not fail.
1482 * Returns with the queue unlocked. 1487 * Returns with the queue unlocked.
1483 */ 1488 */
1484 req = get_request_wait(q, rw_flags, bio); 1489 req = get_request(q, rw_flags, bio, GFP_NOIO);
1485 if (unlikely(!req)) { 1490 if (unlikely(!req)) {
1486 bio_endio(bio, -ENODEV); /* @q is dead */ 1491 bio_endio(bio, -ENODEV); /* @q is dead */
1487 goto out_unlock; 1492 goto out_unlock;
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
1702 goto end_io; 1707 goto end_io;
1703 } 1708 }
1704 1709
1710 /*
1711 * Various block parts want %current->io_context and lazy ioc
1712 * allocation ends up trading a lot of pain for a small amount of
1713 * memory. Just allocate it upfront. This may fail and block
1714 * layer knows how to live with it.
1715 */
1716 create_io_context(GFP_ATOMIC, q->node);
1717
1705 if (blk_throtl_bio(q, bio)) 1718 if (blk_throtl_bio(q, bio))
1706 return false; /* throttled, will be resubmitted later */ 1719 return false; /* throttled, will be resubmitted later */
1707 1720