1 files changed, 111 insertions, 98 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 93eb3e4f88ce..dd134d834d58 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                if (!list_empty(&q->queue_head) && q->request_fn)
                        __blk_run_queue(q);
-                drain |= q->rq.elvpriv;
+                drain |= q->nr_rqs_elvpriv;
                /*
                 * Unfortunately, requests are queued at and tracked from
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                if (drain_all) {
                        drain |= !list_empty(&q->queue_head);
                        for (i = 0; i < 2; i++) {
-                                drain |= q->rq.count[i];
+                                drain |= q->nr_rqs[i];
                                drain |= q->in_flight[i];
                                drain |= !list_empty(&q->flush_queue[i]);
                        }
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
         * left with hung waiters. We need to wake up those waiters.
         */
        if (q->request_fn) {
+                struct request_list *rl;
                spin_lock_irq(q->queue_lock);
-                for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
-                        wake_up_all(&q->rq.wait[i]);
+                blk_queue_for_each_rl(rl, q)
+                        for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
+                                wake_up_all(&rl->wait[i]);
                spin_unlock_irq(q->queue_lock);
        }
 }
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_cleanup_queue);
-static int blk_init_free_list(struct request_queue *q)
+int blk_init_rl(struct request_list *rl, struct request_queue *q,
+                gfp_t gfp_mask)
 {
-        struct request_list *rl = &q->rq;
        if (unlikely(rl->rq_pool))
                return 0;
+        rl->q = q;
        rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
        rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
-        rl->elvpriv = 0;
        init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
        init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
        rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
-                                mempool_free_slab, request_cachep, q->node);
+                                          mempool_free_slab, request_cachep,
+                                          gfp_mask, q->node);
        if (!rl->rq_pool)
                return -ENOMEM;
        return 0;
 }
+void blk_exit_rl(struct request_list *rl)
+{
+        if (rl->rq_pool)
+                mempool_destroy(rl->rq_pool);
+}
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
        return blk_alloc_queue_node(gfp_mask, -1);
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
        if (!q)
                return NULL;
-        if (blk_init_free_list(q))
+        if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
                return NULL;
        q->request_fn           = rfn;
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_get_queue);
-static inline void blk_free_request(struct request_queue *q, struct request *rq)
+static inline void blk_free_request(struct request_list *rl, struct request *rq)
 {
        if (rq->cmd_flags & REQ_ELVPRIV) {
-                elv_put_request(q, rq);
+                elv_put_request(rl->q, rq);
                if (rq->elv.icq)
                        put_io_context(rq->elv.icq->ioc);
        }
-        mempool_free(rq, q->rq.rq_pool);
+        mempool_free(rq, rl->rq_pool);
 }
 /*
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
        ioc->last_waited = jiffies;
 }
-static void __freed_request(struct request_queue *q, int sync)
+static void __freed_request(struct request_list *rl, int sync)
 {
-        struct request_list *rl = &q->rq;
+        struct request_queue *q = rl->q;
-        if (rl->count[sync] < queue_congestion_off_threshold(q))
+        /*
+         * bdi isn't aware of blkcg yet.  As all async IOs end up root
+         * blkcg anyway, just use root blkcg state.
+         */
+        if (rl == &q->root_rl &&
+            rl->count[sync] < queue_congestion_off_threshold(q))
                blk_clear_queue_congested(q, sync);
        if (rl->count[sync] + 1 <= q->nr_requests) {
                if (waitqueue_active(&rl->wait[sync]))
                        wake_up(&rl->wait[sync]);
-                blk_clear_queue_full(q, sync);
+                blk_clear_rl_full(rl, sync);
        }
 }
@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
 * A request has just been released.  Account for it, update the full and
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 */
-static void freed_request(struct request_queue *q, unsigned int flags)
+static void freed_request(struct request_list *rl, unsigned int flags)
 {
-        struct request_list *rl = &q->rq;
+        struct request_queue *q = rl->q;
        int sync = rw_is_sync(flags);
+        q->nr_rqs[sync]--;
        rl->count[sync]--;
        if (flags & REQ_ELVPRIV)
-                rl->elvpriv--;
+                q->nr_rqs_elvpriv--;
-        __freed_request(q, sync);
+        __freed_request(rl, sync);
        if (unlikely(rl->starved[sync ^ 1]))
-                __freed_request(q, sync ^ 1);
+                __freed_request(rl, sync ^ 1);
 }
 /*
@@ -837,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
 }
 /**
- * get_request - get a free request
+ * __get_request - get a free request
- * @q: request_queue to allocate request from
+ * @rl: request list to allocate from
 * @rw_flags: RW and SYNC flags
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
@@ -850,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 */
-static struct request *get_request(struct request_queue *q, int rw_flags,
+static struct request *__get_request(struct request_list *rl, int rw_flags,
-                                   struct bio *bio, gfp_t gfp_mask)
+                                     struct bio *bio, gfp_t gfp_mask)
 {
+        struct request_queue *q = rl->q;
        struct request *rq;
-        struct request_list *rl = &q->rq;
+        struct elevator_type *et = q->elevator->type;
-        struct elevator_type *et;
+        struct io_context *ioc = rq_ioc(bio);
-        struct io_context *ioc;
        struct io_cq *icq = NULL;
        const bool is_sync = rw_is_sync(rw_flags) != 0;
-        bool retried = false;
        int may_queue;
-retry:
-        et = q->elevator->type;
-        ioc = rq_ioc(bio);
        if (unlikely(blk_queue_dead(q)))
                return NULL;
@@ -875,28 +887,14 @@ retry:
        if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
                if (rl->count[is_sync]+1 >= q->nr_requests) {
                        /*
-                         * We want ioc to record batching state.  If it's
-                         * not already there, creating a new one requires
-                         * dropping queue_lock, which in turn requires
-                         * retesting conditions to avoid queue hang.
-                         */
-                        if (!ioc && !retried) {
-                                spin_unlock_irq(q->queue_lock);
-                                create_io_context(gfp_mask, q->node);
-                                spin_lock_irq(q->queue_lock);
-                                retried = true;
-                                goto retry;
-                        }
-                        /*
                         * The queue will fill after this allocation, so set
                         * it as full, and mark this process as "batching".
                         * This process will be allowed to complete a batch of
                         * requests, others will be blocked.
                         */
-                        if (!blk_queue_full(q, is_sync)) {
+                        if (!blk_rl_full(rl, is_sync)) {
                                ioc_set_batching(q, ioc);
-                                blk_set_queue_full(q, is_sync);
+                                blk_set_rl_full(rl, is_sync);
                        } else {
                                if (may_queue != ELV_MQUEUE_MUST
                                                && !ioc_batching(q, ioc)) {
@@ -909,7 +907,12 @@ retry:
                                }
                        }
                }
-                blk_set_queue_congested(q, is_sync);
+                /*
+                 * bdi isn't aware of blkcg yet.  As all async IOs end up
+                 * root blkcg anyway, just use root blkcg state.
+                 */
+                if (rl == &q->root_rl)
+                        blk_set_queue_congested(q, is_sync);
        }
        /*
@@ -920,6 +923,7 @@ retry:
        if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
                return NULL;
+        q->nr_rqs[is_sync]++;
        rl->count[is_sync]++;
        rl->starved[is_sync] = 0;
@@ -935,7 +939,7 @@ retry:
         */
        if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
                rw_flags |= REQ_ELVPRIV;
-                rl->elvpriv++;
+                q->nr_rqs_elvpriv++;
                if (et->icq_cache && ioc)
                        icq = ioc_lookup_icq(ioc, q);
        }
@@ -945,22 +949,19 @@ retry:
        spin_unlock_irq(q->queue_lock);
        /* allocate and init request */
-        rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+        rq = mempool_alloc(rl->rq_pool, gfp_mask);
        if (!rq)
                goto fail_alloc;
        blk_rq_init(q, rq);
+        blk_rq_set_rl(rq, rl);
        rq->cmd_flags = rw_flags | REQ_ALLOCED;
        /* init elvpriv */
        if (rw_flags & REQ_ELVPRIV) {
                if (unlikely(et->icq_cache && !icq)) {
-                        create_io_context(gfp_mask, q->node);
+                        if (ioc)
-                        ioc = rq_ioc(bio);
+                                icq = ioc_create_icq(ioc, q, gfp_mask);
-                        if (!ioc)
-                                goto fail_elvpriv;
-                        icq = ioc_create_icq(ioc, q, gfp_mask);
                        if (!icq)
                                goto fail_elvpriv;
                }
@@ -1000,7 +1001,7 @@ fail_elvpriv:
        rq->elv.icq = NULL;
        spin_lock_irq(q->queue_lock);
-        rl->elvpriv--;
+        q->nr_rqs_elvpriv--;
        spin_unlock_irq(q->queue_lock);
        goto out;
@@ -1013,7 +1014,7 @@ fail_alloc:
         * queue, but this is pretty rare.
         */
        spin_lock_irq(q->queue_lock);
-        freed_request(q, rw_flags);
+        freed_request(rl, rw_flags);
        /*
         * in the very unlikely event that allocation failed and no
@@ -1029,56 +1030,58 @@ rq_starved:
 }
 /**
- * get_request_wait - get a free request with retry
+ * get_request - get a free request
 * @q: request_queue to allocate request from
 * @rw_flags: RW and SYNC flags
 * @bio: bio to allocate request for (can be %NULL)
+ * @gfp_mask: allocation mask
 *
- * Get a free request from @q.  This function keeps retrying under memory
+ * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
- * pressure and fails iff @q is dead.
+ * function keeps retrying under memory pressure and fails iff @q is dead.
 *
 * Must be callled with @q->queue_lock held and,
 * Returns %NULL on failure, with @q->queue_lock held.
 * Returns !%NULL on success, with @q->queue_lock *not held*.
 */
-static struct request *get_request_wait(struct request_queue *q, int rw_flags,
+static struct request *get_request(struct request_queue *q, int rw_flags,
-                                        struct bio *bio)
+                                   struct bio *bio, gfp_t gfp_mask)
 {
        const bool is_sync = rw_is_sync(rw_flags) != 0;
+        DEFINE_WAIT(wait);
+        struct request_list *rl;
        struct request *rq;
-        rq = get_request(q, rw_flags, bio, GFP_NOIO);
+        rl = blk_get_rl(q, bio);        /* transferred to @rq on success */
-        while (!rq) {
+retry:
-                DEFINE_WAIT(wait);
+        rq = __get_request(rl, rw_flags, bio, gfp_mask);
-                struct request_list *rl = &q->rq;
+        if (rq)
+                return rq;
-                if (unlikely(blk_queue_dead(q)))
-                        return NULL;
-                prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
+        if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
-                                TASK_UNINTERRUPTIBLE);
+                blk_put_rl(rl);
+                return NULL;
+        }
-                trace_block_sleeprq(q, bio, rw_flags & 1);
+        /* wait on @rl and retry */
+        prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
+                                  TASK_UNINTERRUPTIBLE);
-                spin_unlock_irq(q->queue_lock);
+        trace_block_sleeprq(q, bio, rw_flags & 1);
-                io_schedule();
-                /*
+        spin_unlock_irq(q->queue_lock);
-                 * After sleeping, we become a "batching" process and
+        io_schedule();
-                 * will be able to allocate at least one request, and
-                 * up to a big batch of them for a small period time.
-                 * See ioc_batching, ioc_set_batching
-                 */
-                create_io_context(GFP_NOIO, q->node);
-                ioc_set_batching(q, current->io_context);
-                spin_lock_irq(q->queue_lock);
+        /*
-                finish_wait(&rl->wait[is_sync], &wait);
+         * After sleeping, we become a "batching" process and will be able
+         * to allocate at least one request, and up to a big batch of them
+         * for a small period time.  See ioc_batching, ioc_set_batching
+         */
+        ioc_set_batching(q, current->io_context);
-                rq = get_request(q, rw_flags, bio, GFP_NOIO);
+        spin_lock_irq(q->queue_lock);
-        };
+        finish_wait(&rl->wait[is_sync], &wait);
-        return rq;
+        goto retry;
 }
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
@@ -1087,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
        BUG_ON(rw != READ && rw != WRITE);
+        /* create ioc upfront */
+        create_io_context(gfp_mask, q->node);
        spin_lock_irq(q->queue_lock);
-        if (gfp_mask & __GFP_WAIT)
+        rq = get_request(q, rw, NULL, gfp_mask);
-                rq = get_request_wait(q, rw, NULL);
-        else
-                rq = get_request(q, rw, NULL, gfp_mask);
        if (!rq)
                spin_unlock_irq(q->queue_lock);
        /* q->queue_lock is unlocked at this point */
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
         */
        if (req->cmd_flags & REQ_ALLOCED) {
                unsigned int flags = req->cmd_flags;
+                struct request_list *rl = blk_rq_rl(req);
                BUG_ON(!list_empty(&req->queuelist));
                BUG_ON(!hlist_unhashed(&req->hash));
-                blk_free_request(q, req);
+                blk_free_request(rl, req);
-                freed_request(q, flags);
+                freed_request(rl, flags);
+                blk_put_rl(rl);
        }
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1481,7 +1486,7 @@ get_rq:
         * Grab a free request. This is might sleep but can not fail.
         * Returns with the queue unlocked.
         */
-        req = get_request_wait(q, rw_flags, bio);
+        req = get_request(q, rw_flags, bio, GFP_NOIO);
        if (unlikely(!req)) {
                bio_endio(bio, -ENODEV);        /* @q is dead */
                goto out_unlock;
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
                goto end_io;
        }
+        /*
+         * Various block parts want %current->io_context and lazy ioc
+         * allocation ends up trading a lot of pain for a small amount of
+         * memory.  Just allocate it upfront.  This may fail and block
+         * layer knows how to live with it.
+         */
+        create_io_context(GFP_ATOMIC, q->node);
        if (blk_throtl_bio(q, bio))
                return false;   /* throttled, will be resubmitted later */

diff --git a/block/blk-core.c b/block/blk-core.c index 93eb3e4f88ce..dd134d834d58 100644 --- a/block/blk-core.c +++ b/block/blk-core.c
@@ -387,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
387	if (!list_empty(&q->queue_head) && q->request_fn)	387	if (!list_empty(&q->queue_head) && q->request_fn)
388	__blk_run_queue(q);	388	__blk_run_queue(q);
389		389
390	drain \|= q->rq.elvpriv;	390	drain \|= q->nr_rqs_elvpriv;
391		391
392	/*	392	/*
393	* Unfortunately, requests are queued at and tracked from	393	* Unfortunately, requests are queued at and tracked from
@@ -397,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
397	if (drain_all) {	397	if (drain_all) {
398	drain \|= !list_empty(&q->queue_head);	398	drain \|= !list_empty(&q->queue_head);
399	for (i = 0; i < 2; i++) {	399	for (i = 0; i < 2; i++) {
400	drain \|= q->rq.count[i];	400	drain \|= q->nr_rqs[i];
401	drain \|= q->in_flight[i];	401	drain \|= q->in_flight[i];
402	drain \|= !list_empty(&q->flush_queue[i]);	402	drain \|= !list_empty(&q->flush_queue[i]);
403	}	403	}
@@ -416,9 +416,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
416	* left with hung waiters. We need to wake up those waiters.	416	* left with hung waiters. We need to wake up those waiters.
417	*/	417	*/
418	if (q->request_fn) {	418	if (q->request_fn) {
		419	struct request_list *rl;
		420
419	spin_lock_irq(q->queue_lock);	421	spin_lock_irq(q->queue_lock);
420	for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)	422
421	wake_up_all(&q->rq.wait[i]);	423	blk_queue_for_each_rl(rl, q)
		424	for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
		425	wake_up_all(&rl->wait[i]);
		426
422	spin_unlock_irq(q->queue_lock);	427	spin_unlock_irq(q->queue_lock);
423	}	428	}
424	}	429	}
@@ -517,28 +522,33 @@ void blk_cleanup_queue(struct request_queue *q)
517	}	522	}
518	EXPORT_SYMBOL(blk_cleanup_queue);	523	EXPORT_SYMBOL(blk_cleanup_queue);
519		524
520	static int blk_init_free_list(struct request_queue *q)	525	int blk_init_rl(struct request_list rl, struct request_queue q,
		526	gfp_t gfp_mask)
521	{	527	{
522	struct request_list *rl = &q->rq;
523
524	if (unlikely(rl->rq_pool))	528	if (unlikely(rl->rq_pool))
525	return 0;	529	return 0;
526		530
		531	rl->q = q;
527	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;	532	rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
528	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;	533	rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
529	rl->elvpriv = 0;
530	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);	534	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
531	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);	535	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
532		536
533	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,	537	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
534	mempool_free_slab, request_cachep, q->node);	538	mempool_free_slab, request_cachep,
535		539	gfp_mask, q->node);
536	if (!rl->rq_pool)	540	if (!rl->rq_pool)
537	return -ENOMEM;	541	return -ENOMEM;
538		542
539	return 0;	543	return 0;
540	}	544	}
541		545
		546	void blk_exit_rl(struct request_list *rl)
		547	{
		548	if (rl->rq_pool)
		549	mempool_destroy(rl->rq_pool);
		550	}
		551
542	struct request_queue *blk_alloc_queue(gfp_t gfp_mask)	552	struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
543	{	553	{
544	return blk_alloc_queue_node(gfp_mask, -1);	554	return blk_alloc_queue_node(gfp_mask, -1);
@@ -680,7 +690,7 @@ blk_init_allocated_queue(struct request_queue q, request_fn_proc rfn,
680	if (!q)	690	if (!q)
681	return NULL;	691	return NULL;
682		692
683	if (blk_init_free_list(q))	693	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
684	return NULL;	694	return NULL;
685		695
686	q->request_fn = rfn;	696	q->request_fn = rfn;
@@ -722,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
722	}	732	}
723	EXPORT_SYMBOL(blk_get_queue);	733	EXPORT_SYMBOL(blk_get_queue);
724		734
725	static inline void blk_free_request(struct request_queue q, struct request rq)	735	static inline void blk_free_request(struct request_list rl, struct request rq)
726	{	736	{
727	if (rq->cmd_flags & REQ_ELVPRIV) {	737	if (rq->cmd_flags & REQ_ELVPRIV) {
728	elv_put_request(q, rq);	738	elv_put_request(rl->q, rq);
729	if (rq->elv.icq)	739	if (rq->elv.icq)
730	put_io_context(rq->elv.icq->ioc);	740	put_io_context(rq->elv.icq->ioc);
731	}	741	}
732		742
733	mempool_free(rq, q->rq.rq_pool);	743	mempool_free(rq, rl->rq_pool);
734	}	744	}
735		745
736	/*	746	/*
@@ -767,18 +777,23 @@ static void ioc_set_batching(struct request_queue q, struct io_context ioc)
767	ioc->last_waited = jiffies;	777	ioc->last_waited = jiffies;
768	}	778	}
769		779
770	static void __freed_request(struct request_queue *q, int sync)	780	static void __freed_request(struct request_list *rl, int sync)
771	{	781	{
772	struct request_list *rl = &q->rq;	782	struct request_queue *q = rl->q;
773		783
774	if (rl->count[sync] < queue_congestion_off_threshold(q))	784	/*
		785	* bdi isn't aware of blkcg yet. As all async IOs end up root
		786	* blkcg anyway, just use root blkcg state.
		787	*/
		788	if (rl == &q->root_rl &&
		789	rl->count[sync] < queue_congestion_off_threshold(q))
775	blk_clear_queue_congested(q, sync);	790	blk_clear_queue_congested(q, sync);
776		791
777	if (rl->count[sync] + 1 <= q->nr_requests) {	792	if (rl->count[sync] + 1 <= q->nr_requests) {
778	if (waitqueue_active(&rl->wait[sync]))	793	if (waitqueue_active(&rl->wait[sync]))
779	wake_up(&rl->wait[sync]);	794	wake_up(&rl->wait[sync]);
780		795
781	blk_clear_queue_full(q, sync);	796	blk_clear_rl_full(rl, sync);
782	}	797	}
783	}	798	}
784		799
@@ -786,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
786	* A request has just been released. Account for it, update the full and	801	* A request has just been released. Account for it, update the full and
787	* congestion status, wake up any waiters. Called under q->queue_lock.	802	* congestion status, wake up any waiters. Called under q->queue_lock.
788	*/	803	*/
789	static void freed_request(struct request_queue *q, unsigned int flags)	804	static void freed_request(struct request_list *rl, unsigned int flags)
790	{	805	{
791	struct request_list *rl = &q->rq;	806	struct request_queue *q = rl->q;
792	int sync = rw_is_sync(flags);	807	int sync = rw_is_sync(flags);
793		808
		809	q->nr_rqs[sync]--;
794	rl->count[sync]--;	810	rl->count[sync]--;
795	if (flags & REQ_ELVPRIV)	811	if (flags & REQ_ELVPRIV)
796	rl->elvpriv--;	812	q->nr_rqs_elvpriv--;
797		813
798	__freed_request(q, sync);	814	__freed_request(rl, sync);
799		815
800	if (unlikely(rl->starved[sync ^ 1]))	816	if (unlikely(rl->starved[sync ^ 1]))
801	__freed_request(q, sync ^ 1);	817	__freed_request(rl, sync ^ 1);
802	}	818	}
803		819
804	/*	820	/*
@@ -837,8 +853,8 @@ static struct io_context rq_ioc(struct bio bio)
837	}	853	}
838		854
839	/**	855	/**
840	* get_request - get a free request	856	* __get_request - get a free request
841	* @q: request_queue to allocate request from	857	* @rl: request list to allocate from
842	* @rw_flags: RW and SYNC flags	858	* @rw_flags: RW and SYNC flags
843	* @bio: bio to allocate request for (can be %NULL)	859	* @bio: bio to allocate request for (can be %NULL)
844	* @gfp_mask: allocation mask	860	* @gfp_mask: allocation mask
@@ -850,20 +866,16 @@ static struct io_context rq_ioc(struct bio bio)
850	* Returns %NULL on failure, with @q->queue_lock held.	866	* Returns %NULL on failure, with @q->queue_lock held.
851	* Returns !%NULL on success, with @q->queue_lock not held.	867	* Returns !%NULL on success, with @q->queue_lock not held.
852	*/	868	*/
853	static struct request get_request(struct request_queue q, int rw_flags,	869	static struct request __get_request(struct request_list rl, int rw_flags,
854	struct bio *bio, gfp_t gfp_mask)	870	struct bio *bio, gfp_t gfp_mask)
855	{	871	{
		872	struct request_queue *q = rl->q;
856	struct request *rq;	873	struct request *rq;
857	struct request_list *rl = &q->rq;	874	struct elevator_type *et = q->elevator->type;
858	struct elevator_type *et;	875	struct io_context *ioc = rq_ioc(bio);
859	struct io_context *ioc;
860	struct io_cq *icq = NULL;	876	struct io_cq *icq = NULL;
861	const bool is_sync = rw_is_sync(rw_flags) != 0;	877	const bool is_sync = rw_is_sync(rw_flags) != 0;
862	bool retried = false;
863	int may_queue;	878	int may_queue;
864	retry:
865	et = q->elevator->type;
866	ioc = rq_ioc(bio);
867		879
868	if (unlikely(blk_queue_dead(q)))	880	if (unlikely(blk_queue_dead(q)))
869	return NULL;	881	return NULL;
@@ -875,28 +887,14 @@ retry:
875	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {	887	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
876	if (rl->count[is_sync]+1 >= q->nr_requests) {	888	if (rl->count[is_sync]+1 >= q->nr_requests) {
877	/*	889	/*
878	* We want ioc to record batching state. If it's
879	* not already there, creating a new one requires
880	* dropping queue_lock, which in turn requires
881	* retesting conditions to avoid queue hang.
882	*/
883	if (!ioc && !retried) {
884	spin_unlock_irq(q->queue_lock);
885	create_io_context(gfp_mask, q->node);
886	spin_lock_irq(q->queue_lock);
887	retried = true;
888	goto retry;
889	}
890
891	/*
892	* The queue will fill after this allocation, so set	890	* The queue will fill after this allocation, so set
893	* it as full, and mark this process as "batching".	891	* it as full, and mark this process as "batching".
894	* This process will be allowed to complete a batch of	892	* This process will be allowed to complete a batch of
895	* requests, others will be blocked.	893	* requests, others will be blocked.
896	*/	894	*/
897	if (!blk_queue_full(q, is_sync)) {	895	if (!blk_rl_full(rl, is_sync)) {
898	ioc_set_batching(q, ioc);	896	ioc_set_batching(q, ioc);
899	blk_set_queue_full(q, is_sync);	897	blk_set_rl_full(rl, is_sync);
900	} else {	898	} else {
901	if (may_queue != ELV_MQUEUE_MUST	899	if (may_queue != ELV_MQUEUE_MUST
902	&& !ioc_batching(q, ioc)) {	900	&& !ioc_batching(q, ioc)) {
@@ -909,7 +907,12 @@ retry:
909	}	907	}
910	}	908	}
911	}	909	}
912	blk_set_queue_congested(q, is_sync);	910	/*
		911	* bdi isn't aware of blkcg yet. As all async IOs end up
		912	* root blkcg anyway, just use root blkcg state.
		913	*/
		914	if (rl == &q->root_rl)
		915	blk_set_queue_congested(q, is_sync);
913	}	916	}
914		917
915	/*	918	/*
@@ -920,6 +923,7 @@ retry:
920	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))	923	if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
921	return NULL;	924	return NULL;
922		925
		926	q->nr_rqs[is_sync]++;
923	rl->count[is_sync]++;	927	rl->count[is_sync]++;
924	rl->starved[is_sync] = 0;	928	rl->starved[is_sync] = 0;
925		929
@@ -935,7 +939,7 @@ retry:
935	*/	939	*/
936	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {	940	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
937	rw_flags \|= REQ_ELVPRIV;	941	rw_flags \|= REQ_ELVPRIV;
938	rl->elvpriv++;	942	q->nr_rqs_elvpriv++;
939	if (et->icq_cache && ioc)	943	if (et->icq_cache && ioc)
940	icq = ioc_lookup_icq(ioc, q);	944	icq = ioc_lookup_icq(ioc, q);
941	}	945	}
@@ -945,22 +949,19 @@ retry:
945	spin_unlock_irq(q->queue_lock);	949	spin_unlock_irq(q->queue_lock);
946		950
947	/* allocate and init request */	951	/* allocate and init request */
948	rq = mempool_alloc(q->rq.rq_pool, gfp_mask);	952	rq = mempool_alloc(rl->rq_pool, gfp_mask);
949	if (!rq)	953	if (!rq)
950	goto fail_alloc;	954	goto fail_alloc;
951		955
952	blk_rq_init(q, rq);	956	blk_rq_init(q, rq);
		957	blk_rq_set_rl(rq, rl);
953	rq->cmd_flags = rw_flags \| REQ_ALLOCED;	958	rq->cmd_flags = rw_flags \| REQ_ALLOCED;
954		959
955	/* init elvpriv */	960	/* init elvpriv */
956	if (rw_flags & REQ_ELVPRIV) {	961	if (rw_flags & REQ_ELVPRIV) {
957	if (unlikely(et->icq_cache && !icq)) {	962	if (unlikely(et->icq_cache && !icq)) {
958	create_io_context(gfp_mask, q->node);	963	if (ioc)
959	ioc = rq_ioc(bio);	964	icq = ioc_create_icq(ioc, q, gfp_mask);
960	if (!ioc)
961	goto fail_elvpriv;
962
963	icq = ioc_create_icq(ioc, q, gfp_mask);
964	if (!icq)	965	if (!icq)
965	goto fail_elvpriv;	966	goto fail_elvpriv;
966	}	967	}
@@ -1000,7 +1001,7 @@ fail_elvpriv:
1000	rq->elv.icq = NULL;	1001	rq->elv.icq = NULL;
1001		1002
1002	spin_lock_irq(q->queue_lock);	1003	spin_lock_irq(q->queue_lock);
1003	rl->elvpriv--;	1004	q->nr_rqs_elvpriv--;
1004	spin_unlock_irq(q->queue_lock);	1005	spin_unlock_irq(q->queue_lock);
1005	goto out;	1006	goto out;
1006		1007
@@ -1013,7 +1014,7 @@ fail_alloc:
1013	* queue, but this is pretty rare.	1014	* queue, but this is pretty rare.
1014	*/	1015	*/
1015	spin_lock_irq(q->queue_lock);	1016	spin_lock_irq(q->queue_lock);
1016	freed_request(q, rw_flags);	1017	freed_request(rl, rw_flags);
1017		1018
1018	/*	1019	/*
1019	* in the very unlikely event that allocation failed and no	1020	* in the very unlikely event that allocation failed and no
@@ -1029,56 +1030,58 @@ rq_starved:
1029	}	1030	}
1030		1031
1031	/**	1032	/**
1032	* get_request_wait - get a free request with retry	1033	* get_request - get a free request
1033	* @q: request_queue to allocate request from	1034	* @q: request_queue to allocate request from
1034	* @rw_flags: RW and SYNC flags	1035	* @rw_flags: RW and SYNC flags
1035	* @bio: bio to allocate request for (can be %NULL)	1036	* @bio: bio to allocate request for (can be %NULL)
		1037	* @gfp_mask: allocation mask
1036	*	1038	*
1037	* Get a free request from @q. This function keeps retrying under memory	1039	* Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
1038	* pressure and fails iff @q is dead.	1040	* function keeps retrying under memory pressure and fails iff @q is dead.
1039	*	1041	*
1040	* Must be callled with @q->queue_lock held and,	1042	* Must be callled with @q->queue_lock held and,
1041	* Returns %NULL on failure, with @q->queue_lock held.	1043	* Returns %NULL on failure, with @q->queue_lock held.
1042	* Returns !%NULL on success, with @q->queue_lock not held.	1044	* Returns !%NULL on success, with @q->queue_lock not held.
1043	*/	1045	*/
1044	static struct request get_request_wait(struct request_queue q, int rw_flags,	1046	static struct request get_request(struct request_queue q, int rw_flags,
1045	struct bio *bio)	1047	struct bio *bio, gfp_t gfp_mask)
1046	{	1048	{
1047	const bool is_sync = rw_is_sync(rw_flags) != 0;	1049	const bool is_sync = rw_is_sync(rw_flags) != 0;
		1050	DEFINE_WAIT(wait);
		1051	struct request_list *rl;
1048	struct request *rq;	1052	struct request *rq;
1049		1053
1050	rq = get_request(q, rw_flags, bio, GFP_NOIO);	1054	rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1051	while (!rq) {	1055	retry:
1052	DEFINE_WAIT(wait);	1056	rq = __get_request(rl, rw_flags, bio, gfp_mask);
1053	struct request_list *rl = &q->rq;	1057	if (rq)
1054		1058	return rq;
1055	if (unlikely(blk_queue_dead(q)))
1056	return NULL;
1057		1059
1058	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,	1060	if (!(gfp_mask & __GFP_WAIT) \|\| unlikely(blk_queue_dead(q))) {
1059	TASK_UNINTERRUPTIBLE);	1061	blk_put_rl(rl);
		1062	return NULL;
		1063	}
1060		1064
1061	trace_block_sleeprq(q, bio, rw_flags & 1);	1065	/* wait on @rl and retry */
		1066	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
		1067	TASK_UNINTERRUPTIBLE);
1062		1068
1063	spin_unlock_irq(q->queue_lock);	1069	trace_block_sleeprq(q, bio, rw_flags & 1);
1064	io_schedule();
1065		1070
1066	/*	1071	spin_unlock_irq(q->queue_lock);
1067	* After sleeping, we become a "batching" process and	1072	io_schedule();
1068	* will be able to allocate at least one request, and
1069	* up to a big batch of them for a small period time.
1070	* See ioc_batching, ioc_set_batching
1071	*/
1072	create_io_context(GFP_NOIO, q->node);
1073	ioc_set_batching(q, current->io_context);
1074		1073
1075	spin_lock_irq(q->queue_lock);	1074	/*
1076	finish_wait(&rl->wait[is_sync], &wait);	1075	* After sleeping, we become a "batching" process and will be able
		1076	* to allocate at least one request, and up to a big batch of them
		1077	* for a small period time. See ioc_batching, ioc_set_batching
		1078	*/
		1079	ioc_set_batching(q, current->io_context);
1077		1080
1078	rq = get_request(q, rw_flags, bio, GFP_NOIO);	1081	spin_lock_irq(q->queue_lock);
1079	};	1082	finish_wait(&rl->wait[is_sync], &wait);
1080		1083
1081	return rq;	1084	goto retry;
1082	}	1085	}
1083		1086
1084	struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)	1087	struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)
@@ -1087,11 +1090,11 @@ struct request blk_get_request(struct request_queue q, int rw, gfp_t gfp_mask)
1087		1090
1088	BUG_ON(rw != READ && rw != WRITE);	1091	BUG_ON(rw != READ && rw != WRITE);
1089		1092
		1093	/* create ioc upfront */
		1094	create_io_context(gfp_mask, q->node);
		1095
1090	spin_lock_irq(q->queue_lock);	1096	spin_lock_irq(q->queue_lock);
1091	if (gfp_mask & __GFP_WAIT)	1097	rq = get_request(q, rw, NULL, gfp_mask);
1092	rq = get_request_wait(q, rw, NULL);
1093	else
1094	rq = get_request(q, rw, NULL, gfp_mask);
1095	if (!rq)	1098	if (!rq)
1096	spin_unlock_irq(q->queue_lock);	1099	spin_unlock_irq(q->queue_lock);
1097	/* q->queue_lock is unlocked at this point */	1100	/* q->queue_lock is unlocked at this point */
@@ -1248,12 +1251,14 @@ void __blk_put_request(struct request_queue q, struct request req)
1248	*/	1251	*/
1249	if (req->cmd_flags & REQ_ALLOCED) {	1252	if (req->cmd_flags & REQ_ALLOCED) {
1250	unsigned int flags = req->cmd_flags;	1253	unsigned int flags = req->cmd_flags;
		1254	struct request_list *rl = blk_rq_rl(req);
1251		1255
1252	BUG_ON(!list_empty(&req->queuelist));	1256	BUG_ON(!list_empty(&req->queuelist));
1253	BUG_ON(!hlist_unhashed(&req->hash));	1257	BUG_ON(!hlist_unhashed(&req->hash));
1254		1258
1255	blk_free_request(q, req);	1259	blk_free_request(rl, req);
1256	freed_request(q, flags);	1260	freed_request(rl, flags);
		1261	blk_put_rl(rl);
1257	}	1262	}
1258	}	1263	}
1259	EXPORT_SYMBOL_GPL(__blk_put_request);	1264	EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1481,7 +1486,7 @@ get_rq:
1481	* Grab a free request. This is might sleep but can not fail.	1486	* Grab a free request. This is might sleep but can not fail.
1482	* Returns with the queue unlocked.	1487	* Returns with the queue unlocked.
1483	*/	1488	*/
1484	req = get_request_wait(q, rw_flags, bio);	1489	req = get_request(q, rw_flags, bio, GFP_NOIO);
1485	if (unlikely(!req)) {	1490	if (unlikely(!req)) {
1486	bio_endio(bio, -ENODEV); /* @q is dead */	1491	bio_endio(bio, -ENODEV); /* @q is dead */
1487	goto out_unlock;	1492	goto out_unlock;
@@ -1702,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
1702	goto end_io;	1707	goto end_io;
1703	}	1708	}
1704		1709
		1710	/*
		1711	* Various block parts want %current->io_context and lazy ioc
		1712	* allocation ends up trading a lot of pain for a small amount of
		1713	* memory. Just allocate it upfront. This may fail and block
		1714	* layer knows how to live with it.
		1715	*/
		1716	create_io_context(GFP_ATOMIC, q->node);
		1717
1705	if (blk_throtl_bio(q, bio))	1718	if (blk_throtl_bio(q, bio))
1706	return false; /* throttled, will be resubmitted later */	1719	return false; /* throttled, will be resubmitted later */
1707		1720