Merge branch 'for-3.3/core' of git://git.kernel.dk/linux-block

* 'for-3.3/core' of git://git.kernel.dk/linux-block: (37 commits) Revert "block: recursive merge requests" block: Stop using macro stubs for the bio data integrity calls blockdev: convert some macros to static inlines fs: remove unneeded plug in mpage_readpages() block: Add BLKROTATIONAL ioctl block: Introduce blk_set_stacking_limits function block: remove WARN_ON_ONCE() in exit_io_context() block: an exiting task should be allowed to create io_context block: ioc_cgroup_changed() needs to be exported block: recursive merge requests block, cfq: fix empty queue crash caused by request merge block, cfq: move icq creation and rq->elv.icq association to block core block, cfq: restructure io_cq creation path for io_context interface cleanup block, cfq: move io_cq exit/release to blk-ioc.c block, cfq: move icq cache management to block core block, cfq: move io_cq lookup to blk-ioc.c block, cfq: move cfqd->icq_list to request_queue and add request->elv.icq block, cfq: reorganize cfq_io_context into generic and cfq specific parts block: remove elevator_queue->ops block: reorder elevator switch sequence ... Fix up conflicts in: - block/blk-cgroup.c Switch from can_attach_task to can_attach - block/cfq-iosched.c conflict with now removed cic index changes (we now use q->id instead)
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-15 15:24:45 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-15 15:24:45 -0500
commit: b3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch)
tree: ad48ad4d923fee147c736318d0fad35b3755f4f5 /block/blk-core.c
parent: 83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (diff)
parent: 5d381efb3d1f1ef10535a31ca0dd9b22fe1e1922 (diff)
1 files changed, 122 insertions, 81 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 15de223c7f93..e6c05a97ee2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+DEFINE_IDA(blk_queue_ida);
 /*
 * For the allocated request tables
 */
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
 void blk_drain_queue(struct request_queue *q, bool drain_all)
 {
        while (true) {
-                int nr_rqs;
+                bool drain = false;
+                int i;
                spin_lock_irq(q->queue_lock);
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                if (!list_empty(&q->queue_head))
                        __blk_run_queue(q);
-                if (drain_all)
+                drain |= q->rq.elvpriv;
-                        nr_rqs = q->rq.count[0] + q->rq.count[1];
-                else
+                /*
-                        nr_rqs = q->rq.elvpriv;
+                 * Unfortunately, requests are queued at and tracked from
+                 * multiple places and there's no single counter which can
+                 * be drained.  Check all the queues and counters.
+                 */
+                if (drain_all) {
+                        drain |= !list_empty(&q->queue_head);
+                        for (i = 0; i < 2; i++) {
+                                drain |= q->rq.count[i];
+                                drain |= q->in_flight[i];
+                                drain |= !list_empty(&q->flush_queue[i]);
+                        }
+                }
                spin_unlock_irq(q->queue_lock);
-                if (!nr_rqs)
+                if (!drain)
                        break;
                msleep(10);
        }
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
+        q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+        if (q->id < 0)
+                goto fail_q;
        q->backing_dev_info.ra_pages =
                        (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
        q->backing_dev_info.state = 0;
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        q->node = node_id;
        err = bdi_init(&q->backing_dev_info);
-        if (err) {
+        if (err)
-                kmem_cache_free(blk_requestq_cachep, q);
+                goto fail_id;
-                return NULL;
-        }
-        if (blk_throtl_init(q)) {
+        if (blk_throtl_init(q))
-                kmem_cache_free(blk_requestq_cachep, q);
+                goto fail_id;
-                return NULL;
-        }
        setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                    laptop_mode_timer_fn, (unsigned long) q);
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
        INIT_LIST_HEAD(&q->timeout_list);
+        INIT_LIST_HEAD(&q->icq_list);
        INIT_LIST_HEAD(&q->flush_queue[0]);
        INIT_LIST_HEAD(&q->flush_queue[1]);
        INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        q->queue_lock = &q->__queue_lock;
        return q;
+fail_id:
+        ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+        kmem_cache_free(blk_requestq_cachep, q);
+        return NULL;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
 {
-        if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+        if (likely(!blk_queue_dead(q))) {
-                kobject_get(&q->kobj);
+                __blk_get_queue(q);
-                return 0;
+                return true;
        }
-        return 1;
+        return false;
 }
 EXPORT_SYMBOL(blk_get_queue);
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-        if (rq->cmd_flags & REQ_ELVPRIV)
+        if (rq->cmd_flags & REQ_ELVPRIV) {
                elv_put_request(q, rq);
+                if (rq->elv.icq)
+                        put_io_context(rq->elv.icq->ioc, q);
+        }
        mempool_free(rq, q->rq.rq_pool);
 }
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+                  unsigned int flags, gfp_t gfp_mask)
 {
        struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
        rq->cmd_flags = flags | REQ_ALLOCED;
-        if ((flags & REQ_ELVPRIV) &&
+        if (flags & REQ_ELVPRIV) {
-            unlikely(elv_set_request(q, rq, gfp_mask))) {
+                rq->elv.icq = icq;
-                mempool_free(rq, q->rq.rq_pool);
+                if (unlikely(elv_set_request(q, rq, gfp_mask))) {
-                return NULL;
+                        mempool_free(rq, q->rq.rq_pool);
+                        return NULL;
+                }
+                /* @rq->elv.icq holds on to io_context until @rq is freed */
+                if (icq)
+                        get_io_context(icq->ioc);
        }
        return rq;
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 {
        struct request *rq = NULL;
        struct request_list *rl = &q->rq;
-        struct io_context *ioc = NULL;
+        struct elevator_type *et;
+        struct io_context *ioc;
+        struct io_cq *icq = NULL;
        const bool is_sync = rw_is_sync(rw_flags) != 0;
+        bool retried = false;
        int may_queue;
+retry:
+        et = q->elevator->type;
+        ioc = current->io_context;
-        if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+        if (unlikely(blk_queue_dead(q)))
                return NULL;
        may_queue = elv_may_queue(q, rw_flags);
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
                if (rl->count[is_sync]+1 >= q->nr_requests) {
-                        ioc = current_io_context(GFP_ATOMIC, q->node);
+                        /*
+                         * We want ioc to record batching state.  If it's
+                         * not already there, creating a new one requires
+                         * dropping queue_lock, which in turn requires
+                         * retesting conditions to avoid queue hang.
+                         */
+                        if (!ioc && !retried) {
+                                spin_unlock_irq(q->queue_lock);
+                                create_io_context(current, gfp_mask, q->node);
+                                spin_lock_irq(q->queue_lock);
+                                retried = true;
+                                goto retry;
+                        }
                        /*
                         * The queue will fill after this allocation, so set
                         * it as full, and mark this process as "batching".
@@ -799,17 +849,36 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        rl->count[is_sync]++;
        rl->starved[is_sync] = 0;
+        /*
+         * Decide whether the new request will be managed by elevator.  If
+         * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+         * prevent the current elevator from being destroyed until the new
+         * request is freed.  This guarantees icq's won't be destroyed and
+         * makes creating new ones safe.
+         *
+         * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+         * it will be created after releasing queue_lock.
+         */
        if (blk_rq_should_init_elevator(bio) &&
            !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
                rw_flags |= REQ_ELVPRIV;
                rl->elvpriv++;
+                if (et->icq_cache && ioc)
+                        icq = ioc_lookup_icq(ioc, q);
        }
        if (blk_queue_io_stat(q))
                rw_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
-        rq = blk_alloc_request(q, rw_flags, gfp_mask);
+        /* create icq if missing */
+        if (unlikely(et->icq_cache && !icq))
+                icq = ioc_create_icq(q, gfp_mask);
+        /* rqs are guaranteed to have icq on elv_set_request() if requested */
+        if (likely(!et->icq_cache || icq))
+                rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
        if (unlikely(!rq)) {
                /*
                 * Allocation failed presumably due to memory. Undo anything
@@ -871,10 +940,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
        rq = get_request(q, rw_flags, bio, GFP_NOIO);
        while (!rq) {
                DEFINE_WAIT(wait);
-                struct io_context *ioc;
                struct request_list *rl = &q->rq;
-                if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+                if (unlikely(blk_queue_dead(q)))
                        return NULL;
                prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@ -891,8 +959,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                 * up to a big batch of them for a small period time.
                 * See ioc_batching, ioc_set_batching
                 */
-                ioc = current_io_context(GFP_NOIO, q->node);
+                create_io_context(current, GFP_NOIO, q->node);
-                ioc_set_batching(q, ioc);
+                ioc_set_batching(q, current->io_context);
                spin_lock_irq(q->queue_lock);
                finish_wait(&rl->wait[is_sync], &wait);
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
        __elv_add_request(q, rq, where);
 }
-/**
- * blk_insert_request - insert a special request into a request queue
- * @q:          request queue where request should be inserted
- * @rq:         request to be inserted
- * @at_head:    insert request at head or tail of queue
- * @data:       private data
- *
- * Description:
- *    Many block devices need to execute commands asynchronously, so they don't
- *    block the whole kernel from preemption during request execution.  This is
- *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
- *    be scheduled for actual execution by the request queue.
- *
- *    We have the option of inserting the head or the tail of the queue.
- *    Typically we use the tail for new ioctls and so forth.  We use the head
- *    of the queue for things like a QUEUE_FULL message from a device, or a
- *    host that is unable to accept a particular command.
- */
-void blk_insert_request(struct request_queue *q, struct request *rq,
-                        int at_head, void *data)
-{
-        int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-        unsigned long flags;
-        /*
-         * tell I/O scheduler that this isn't a regular read/write (ie it
-         * must not attempt merges on this) and that it acts as a soft
-         * barrier
-         */
-        rq->cmd_type = REQ_TYPE_SPECIAL;
-        rq->special = data;
-        spin_lock_irqsave(q->queue_lock, flags);
-        /*
-         * If command is tagged, release the tag
-         */
-        if (blk_rq_tagged(rq))
-                blk_queue_end_tag(q, rq);
-        add_acct_request(q, rq, where);
-        __blk_run_queue(q);
-        spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_insert_request);
 static void part_round_stats_single(int cpu, struct hd_struct *part,
                                    unsigned long now)
 {
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
                return -EIO;
        spin_lock_irqsave(q->queue_lock, flags);
+        if (unlikely(blk_queue_dead(q))) {
+                spin_unlock_irqrestore(q->queue_lock, flags);
+                return -ENODEV;
+        }
        /*
         * Submitting request must be dequeued before calling this function
@@ -2740,6 +2764,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
        trace_block_unplug(q, depth, !from_schedule);
        /*
+         * Don't mess with dead queue.
+         */
+        if (unlikely(blk_queue_dead(q))) {
+                spin_unlock(q->queue_lock);
+                return;
+        }
+        /*
         * If we are punting this to kblockd, then we can safely drop
         * the queue_lock before waking kblockd (which needs to take
         * this lock).
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                        depth = 0;
                        spin_lock(q->queue_lock);
                }
+                /*
+                 * Short-circuit if @q is dead
+                 */
+                if (unlikely(blk_queue_dead(q))) {
+                        __blk_end_request_all(rq, -ENODEV);
+                        continue;
+                }
                /*
                 * rq is already accounted, so use raw insert
                 */
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-15 15:24:45 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-15 15:24:45 -0500
commit	b3c9dd182ed3bdcdaf0e42625a35924b0497afdc (patch)
tree	ad48ad4d923fee147c736318d0fad35b3755f4f5 /block/blk-core.c
parent	83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (diff)
parent	5d381efb3d1f1ef10535a31ca0dd9b22fe1e1922 (diff)

diff --git a/block/blk-core.c b/block/blk-core.c index 15de223c7f93..e6c05a97ee2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39	EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);	39	EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
40	EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);	40	EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
41		41
		42	DEFINE_IDA(blk_queue_ida);
		43
42	/*	44	/*
43	* For the allocated request tables	45	* For the allocated request tables
44	*/	46	*/
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
358	void blk_drain_queue(struct request_queue *q, bool drain_all)	360	void blk_drain_queue(struct request_queue *q, bool drain_all)
359	{	361	{
360	while (true) {	362	while (true) {
361	int nr_rqs;	363	bool drain = false;
		364	int i;
362		365
363	spin_lock_irq(q->queue_lock);	366	spin_lock_irq(q->queue_lock);
364		367
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
375	if (!list_empty(&q->queue_head))	378	if (!list_empty(&q->queue_head))
376	__blk_run_queue(q);	379	__blk_run_queue(q);
377		380
378	if (drain_all)	381	drain \|= q->rq.elvpriv;
379	nr_rqs = q->rq.count[0] + q->rq.count[1];	382
380	else	383	/*
381	nr_rqs = q->rq.elvpriv;	384	* Unfortunately, requests are queued at and tracked from
		385	* multiple places and there's no single counter which can
		386	* be drained. Check all the queues and counters.
		387	*/
		388	if (drain_all) {
		389	drain \|= !list_empty(&q->queue_head);
		390	for (i = 0; i < 2; i++) {
		391	drain \|= q->rq.count[i];
		392	drain \|= q->in_flight[i];
		393	drain \|= !list_empty(&q->flush_queue[i]);
		394	}
		395	}
382		396
383	spin_unlock_irq(q->queue_lock);	397	spin_unlock_irq(q->queue_lock);
384		398
385	if (!nr_rqs)	399	if (!drain)
386	break;	400	break;
387	msleep(10);	401	msleep(10);
388	}	402	}
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
469	if (!q)	483	if (!q)
470	return NULL;	484	return NULL;
471		485
		486	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
		487	if (q->id < 0)
		488	goto fail_q;
		489
472	q->backing_dev_info.ra_pages =	490	q->backing_dev_info.ra_pages =
473	(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;	491	(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
474	q->backing_dev_info.state = 0;	492	q->backing_dev_info.state = 0;
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
477	q->node = node_id;	495	q->node = node_id;
478		496
479	err = bdi_init(&q->backing_dev_info);	497	err = bdi_init(&q->backing_dev_info);
480	if (err) {	498	if (err)
481	kmem_cache_free(blk_requestq_cachep, q);	499	goto fail_id;
482	return NULL;
483	}
484		500
485	if (blk_throtl_init(q)) {	501	if (blk_throtl_init(q))
486	kmem_cache_free(blk_requestq_cachep, q);	502	goto fail_id;
487	return NULL;
488	}
489		503
490	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,	504	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
491	laptop_mode_timer_fn, (unsigned long) q);	505	laptop_mode_timer_fn, (unsigned long) q);
492	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);	506	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
493	INIT_LIST_HEAD(&q->timeout_list);	507	INIT_LIST_HEAD(&q->timeout_list);
		508	INIT_LIST_HEAD(&q->icq_list);
494	INIT_LIST_HEAD(&q->flush_queue[0]);	509	INIT_LIST_HEAD(&q->flush_queue[0]);
495	INIT_LIST_HEAD(&q->flush_queue[1]);	510	INIT_LIST_HEAD(&q->flush_queue[1]);
496	INIT_LIST_HEAD(&q->flush_data_in_flight);	511	INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
508	q->queue_lock = &q->__queue_lock;	523	q->queue_lock = &q->__queue_lock;
509		524
510	return q;	525	return q;
		526
		527	fail_id:
		528	ida_simple_remove(&blk_queue_ida, q->id);
		529	fail_q:
		530	kmem_cache_free(blk_requestq_cachep, q);
		531	return NULL;
511	}	532	}
512	EXPORT_SYMBOL(blk_alloc_queue_node);	533	EXPORT_SYMBOL(blk_alloc_queue_node);
513		534
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue q, request_fn_proc rfn,
605	}	626	}
606	EXPORT_SYMBOL(blk_init_allocated_queue);	627	EXPORT_SYMBOL(blk_init_allocated_queue);
607		628
608	int blk_get_queue(struct request_queue *q)	629	bool blk_get_queue(struct request_queue *q)
609	{	630	{
610	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {	631	if (likely(!blk_queue_dead(q))) {
611	kobject_get(&q->kobj);	632	__blk_get_queue(q);
612	return 0;	633	return true;
613	}	634	}
614		635
615	return 1;	636	return false;
616	}	637	}
617	EXPORT_SYMBOL(blk_get_queue);	638	EXPORT_SYMBOL(blk_get_queue);
618		639
619	static inline void blk_free_request(struct request_queue q, struct request rq)	640	static inline void blk_free_request(struct request_queue q, struct request rq)
620	{	641	{
621	if (rq->cmd_flags & REQ_ELVPRIV)	642	if (rq->cmd_flags & REQ_ELVPRIV) {
622	elv_put_request(q, rq);	643	elv_put_request(q, rq);
		644	if (rq->elv.icq)
		645	put_io_context(rq->elv.icq->ioc, q);
		646	}
		647
623	mempool_free(rq, q->rq.rq_pool);	648	mempool_free(rq, q->rq.rq_pool);
624	}	649	}
625		650
626	static struct request *	651	static struct request *
627	blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)	652	blk_alloc_request(struct request_queue q, struct io_cq icq,
		653	unsigned int flags, gfp_t gfp_mask)
628	{	654	{
629	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);	655	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
630		656
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
635		661
636	rq->cmd_flags = flags \| REQ_ALLOCED;	662	rq->cmd_flags = flags \| REQ_ALLOCED;
637		663
638	if ((flags & REQ_ELVPRIV) &&	664	if (flags & REQ_ELVPRIV) {
639	unlikely(elv_set_request(q, rq, gfp_mask))) {	665	rq->elv.icq = icq;
640	mempool_free(rq, q->rq.rq_pool);	666	if (unlikely(elv_set_request(q, rq, gfp_mask))) {
641	return NULL;	667	mempool_free(rq, q->rq.rq_pool);
		668	return NULL;
		669	}
		670	/* @rq->elv.icq holds on to io_context until @rq is freed */
		671	if (icq)
		672	get_io_context(icq->ioc);
642	}	673	}
643		674
644	return rq;	675	return rq;
@@ -750,11 +781,17 @@ static struct request get_request(struct request_queue q, int rw_flags,
750	{	781	{
751	struct request *rq = NULL;	782	struct request *rq = NULL;
752	struct request_list *rl = &q->rq;	783	struct request_list *rl = &q->rq;
753	struct io_context *ioc = NULL;	784	struct elevator_type *et;
		785	struct io_context *ioc;
		786	struct io_cq *icq = NULL;
754	const bool is_sync = rw_is_sync(rw_flags) != 0;	787	const bool is_sync = rw_is_sync(rw_flags) != 0;
		788	bool retried = false;
755	int may_queue;	789	int may_queue;
		790	retry:
		791	et = q->elevator->type;
		792	ioc = current->io_context;
756		793
757	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))	794	if (unlikely(blk_queue_dead(q)))
758	return NULL;	795	return NULL;
759		796
760	may_queue = elv_may_queue(q, rw_flags);	797	may_queue = elv_may_queue(q, rw_flags);
@@ -763,7 +800,20 @@ static struct request get_request(struct request_queue q, int rw_flags,
763		800
764	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {	801	if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
765	if (rl->count[is_sync]+1 >= q->nr_requests) {	802	if (rl->count[is_sync]+1 >= q->nr_requests) {
766	ioc = current_io_context(GFP_ATOMIC, q->node);	803	/*
		804	* We want ioc to record batching state. If it's
		805	* not already there, creating a new one requires
		806	* dropping queue_lock, which in turn requires
		807	* retesting conditions to avoid queue hang.
		808	*/
		809	if (!ioc && !retried) {
		810	spin_unlock_irq(q->queue_lock);
		811	create_io_context(current, gfp_mask, q->node);
		812	spin_lock_irq(q->queue_lock);
		813	retried = true;
		814	goto retry;
		815	}
		816
767	/*	817	/*
768	* The queue will fill after this allocation, so set	818	* The queue will fill after this allocation, so set
769	* it as full, and mark this process as "batching".	819	* it as full, and mark this process as "batching".
@@ -799,17 +849,36 @@ static struct request get_request(struct request_queue q, int rw_flags,
799	rl->count[is_sync]++;	849	rl->count[is_sync]++;
800	rl->starved[is_sync] = 0;	850	rl->starved[is_sync] = 0;
801		851
		852	/*
		853	* Decide whether the new request will be managed by elevator. If
		854	* so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
		855	* prevent the current elevator from being destroyed until the new
		856	* request is freed. This guarantees icq's won't be destroyed and
		857	* makes creating new ones safe.
		858	*
		859	* Also, lookup icq while holding queue_lock. If it doesn't exist,
		860	* it will be created after releasing queue_lock.
		861	*/
802	if (blk_rq_should_init_elevator(bio) &&	862	if (blk_rq_should_init_elevator(bio) &&
803	!test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {	863	!test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
804	rw_flags \|= REQ_ELVPRIV;	864	rw_flags \|= REQ_ELVPRIV;
805	rl->elvpriv++;	865	rl->elvpriv++;
		866	if (et->icq_cache && ioc)
		867	icq = ioc_lookup_icq(ioc, q);
806	}	868	}
807		869
808	if (blk_queue_io_stat(q))	870	if (blk_queue_io_stat(q))
809	rw_flags \|= REQ_IO_STAT;	871	rw_flags \|= REQ_IO_STAT;
810	spin_unlock_irq(q->queue_lock);	872	spin_unlock_irq(q->queue_lock);
811		873
812	rq = blk_alloc_request(q, rw_flags, gfp_mask);	874	/* create icq if missing */
		875	if (unlikely(et->icq_cache && !icq))
		876	icq = ioc_create_icq(q, gfp_mask);
		877
		878	/* rqs are guaranteed to have icq on elv_set_request() if requested */
		879	if (likely(!et->icq_cache \|\| icq))
		880	rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
		881
813	if (unlikely(!rq)) {	882	if (unlikely(!rq)) {
814	/*	883	/*
815	* Allocation failed presumably due to memory. Undo anything	884	* Allocation failed presumably due to memory. Undo anything
@@ -871,10 +940,9 @@ static struct request get_request_wait(struct request_queue q, int rw_flags,
871	rq = get_request(q, rw_flags, bio, GFP_NOIO);	940	rq = get_request(q, rw_flags, bio, GFP_NOIO);
872	while (!rq) {	941	while (!rq) {
873	DEFINE_WAIT(wait);	942	DEFINE_WAIT(wait);
874	struct io_context *ioc;
875	struct request_list *rl = &q->rq;	943	struct request_list *rl = &q->rq;
876		944
877	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))	945	if (unlikely(blk_queue_dead(q)))
878	return NULL;	946	return NULL;
879		947
880	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,	948	prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@ -891,8 +959,8 @@ static struct request get_request_wait(struct request_queue q, int rw_flags,
891	* up to a big batch of them for a small period time.	959	* up to a big batch of them for a small period time.
892	* See ioc_batching, ioc_set_batching	960	* See ioc_batching, ioc_set_batching
893	*/	961	*/
894	ioc = current_io_context(GFP_NOIO, q->node);	962	create_io_context(current, GFP_NOIO, q->node);
895	ioc_set_batching(q, ioc);	963	ioc_set_batching(q, current->io_context);
896		964
897	spin_lock_irq(q->queue_lock);	965	spin_lock_irq(q->queue_lock);
898	finish_wait(&rl->wait[is_sync], &wait);	966	finish_wait(&rl->wait[is_sync], &wait);
@@ -1009,54 +1077,6 @@ static void add_acct_request(struct request_queue q, struct request rq,
1009	__elv_add_request(q, rq, where);	1077	__elv_add_request(q, rq, where);
1010	}	1078	}
1011		1079
1012	/**
1013	* blk_insert_request - insert a special request into a request queue
1014	* @q: request queue where request should be inserted
1015	* @rq: request to be inserted
1016	* @at_head: insert request at head or tail of queue
1017	* @data: private data
1018	*
1019	* Description:
1020	* Many block devices need to execute commands asynchronously, so they don't
1021	* block the whole kernel from preemption during request execution. This is
1022	* accomplished normally by inserting aritficial requests tagged as
1023	* REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
1024	* be scheduled for actual execution by the request queue.
1025	*
1026	* We have the option of inserting the head or the tail of the queue.
1027	* Typically we use the tail for new ioctls and so forth. We use the head
1028	* of the queue for things like a QUEUE_FULL message from a device, or a
1029	* host that is unable to accept a particular command.
1030	*/
1031	void blk_insert_request(struct request_queue q, struct request rq,
1032	int at_head, void *data)
1033	{
1034	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1035	unsigned long flags;
1036
1037	/*
1038	* tell I/O scheduler that this isn't a regular read/write (ie it
1039	* must not attempt merges on this) and that it acts as a soft
1040	* barrier
1041	*/
1042	rq->cmd_type = REQ_TYPE_SPECIAL;
1043
1044	rq->special = data;
1045
1046	spin_lock_irqsave(q->queue_lock, flags);
1047
1048	/*
1049	* If command is tagged, release the tag
1050	*/
1051	if (blk_rq_tagged(rq))
1052	blk_queue_end_tag(q, rq);
1053
1054	add_acct_request(q, rq, where);
1055	__blk_run_queue(q);
1056	spin_unlock_irqrestore(q->queue_lock, flags);
1057	}
1058	EXPORT_SYMBOL(blk_insert_request);
1059
1060	static void part_round_stats_single(int cpu, struct hd_struct *part,	1080	static void part_round_stats_single(int cpu, struct hd_struct *part,
1061	unsigned long now)	1081	unsigned long now)
1062	{	1082	{
@@ -1766,6 +1786,10 @@ int blk_insert_cloned_request(struct request_queue q, struct request rq)
1766	return -EIO;	1786	return -EIO;
1767		1787
1768	spin_lock_irqsave(q->queue_lock, flags);	1788	spin_lock_irqsave(q->queue_lock, flags);
		1789	if (unlikely(blk_queue_dead(q))) {
		1790	spin_unlock_irqrestore(q->queue_lock, flags);
		1791	return -ENODEV;
		1792	}
1769		1793
1770	/*	1794	/*
1771	* Submitting request must be dequeued before calling this function	1795	* Submitting request must be dequeued before calling this function
@@ -2740,6 +2764,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
2740	trace_block_unplug(q, depth, !from_schedule);	2764	trace_block_unplug(q, depth, !from_schedule);
2741		2765
2742	/*	2766	/*
		2767	* Don't mess with dead queue.
		2768	*/
		2769	if (unlikely(blk_queue_dead(q))) {
		2770	spin_unlock(q->queue_lock);
		2771	return;
		2772	}
		2773
		2774	/*
2743	* If we are punting this to kblockd, then we can safely drop	2775	* If we are punting this to kblockd, then we can safely drop
2744	* the queue_lock before waking kblockd (which needs to take	2776	* the queue_lock before waking kblockd (which needs to take
2745	* this lock).	2777	* this lock).
@@ -2815,6 +2847,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2815	depth = 0;	2847	depth = 0;
2816	spin_lock(q->queue_lock);	2848	spin_lock(q->queue_lock);
2817	}	2849	}
		2850
		2851	/*
		2852	* Short-circuit if @q is dead
		2853	*/
		2854	if (unlikely(blk_queue_dead(q))) {
		2855	__blk_end_request_all(rq, -ENODEV);
		2856	continue;
		2857	}
		2858
2818	/*	2859	/*
2819	* rq is already accounted, so use raw insert	2860	* rq is already accounted, so use raw insert
2820	*/	2861	*/