blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()

We have to remove synchronize_rcu() from blk_queue_cleanup(), otherwise long delay can be caused during lun probe. For removing it, we have to avoid to iterate the set->tag_list in IO path, eg, blk_mq_sched_restart(). This patch reverts 5b79413946d (Revert "blk-mq: don't handle TAG_SHARED in restart"). Given we have fixed enough IO hang issue, and there isn't any reason to restart all queues in one tags any more, see the following reasons: 1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(), which can wake up queues waiting for driver tag. 2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue, target or host is ready, but SCSI built-in restart can cover all these well, see scsi_end_request(), queue will be rerun after any request initiated from this host/target is completed. In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30% when running I/O on these 8 luns concurrently. Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Cc: Omar Sandoval <osandov@fb.com> Cc: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: linux-scsi@vger.kernel.org Reported-by: Andrew Jones <drjones@redhat.com> Tested-by: Andrew Jones <drjones@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Ming Lei <ming.lei@redhat.com> 2018-06-25 07:31:48 -0400
committer: Jens Axboe <axboe@kernel.dk> 2018-07-09 11:07:52 -0400
commit: 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d (patch)
tree: f8280901685cfc59ae03e92ea91a29a567f3b7b6 /block/blk-mq-sched.c
parent: 5815839b3ca16bb1d45939270871169f6803a121 (diff)
1 files changed, 5 insertions, 80 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 56c493c6cd90..4e027f6108ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
        if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
                return;
-        if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+        set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-                struct request_queue *q = hctx->queue;
-                if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                        atomic_inc(&q->shared_hctx_restart);
-        } else
-                set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 {
        if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                return false;
+                return;
+        clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-        if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-                struct request_queue *q = hctx->queue;
-                if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-                        atomic_dec(&q->shared_hctx_restart);
-        } else
-                clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-        return blk_mq_run_hw_queue(hctx, true);
+        blk_mq_run_hw_queue(hctx, true);
 }
 /*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
        return false;
 }
-/**
- * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
- * @pos:    loop cursor.
- * @skip:   the list element that will not be examined. Iteration starts at
- *          @skip->next.
- * @head:   head of the list to examine. This list must have at least one
- *          element, namely @skip.
- * @member: name of the list_head structure within typeof(*pos).
- */
-#define list_for_each_entry_rcu_rr(pos, skip, head, member)             \
-        for ((pos) = (skip);                                            \
-             (pos = (pos)->member.next != (head) ? list_entry_rcu(      \
-                        (pos)->member.next, typeof(*pos), member) :     \
-              list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
-             (pos) != (skip); )
-/*
- * Called after a driver tag has been freed to check whether a hctx needs to
- * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
- * queues in a round-robin fashion if the tag set of @hctx is shared with other
- * hardware queues.
- */
-void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
-{
-        struct blk_mq_tags *const tags = hctx->tags;
-        struct blk_mq_tag_set *const set = hctx->queue->tag_set;
-        struct request_queue *const queue = hctx->queue, *q;
-        struct blk_mq_hw_ctx *hctx2;
-        unsigned int i, j;
-        if (set->flags & BLK_MQ_F_TAG_SHARED) {
-                /*
-                 * If this is 0, then we know that no hardware queues
-                 * have RESTART marked. We're done.
-                 */
-                if (!atomic_read(&queue->shared_hctx_restart))
-                        return;
-                rcu_read_lock();
-                list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
-                                           tag_set_list) {
-                        queue_for_each_hw_ctx(q, hctx2, i)
-                                if (hctx2->tags == tags &&
-                                    blk_mq_sched_restart_hctx(hctx2))
-                                        goto done;
-                }
-                j = hctx->queue_num + 1;
-                for (i = 0; i < queue->nr_hw_queues; i++, j++) {
-                        if (j == queue->nr_hw_queues)
-                                j = 0;
-                        hctx2 = queue->queue_hw_ctx[j];
-                        if (hctx2->tags == tags &&
-                            blk_mq_sched_restart_hctx(hctx2))
-                                break;
-                }
-done:
-                rcu_read_unlock();
-        } else {
-                blk_mq_sched_restart_hctx(hctx);
-        }
-}
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
                                 bool run_queue, bool async)
 {
author	Ming Lei <ming.lei@redhat.com>	2018-06-25 07:31:48 -0400
committer	Jens Axboe <axboe@kernel.dk>	2018-07-09 11:07:52 -0400
commit	97889f9ac24f8d2fc8e703ea7f80c162bab10d4d (patch)
tree	f8280901685cfc59ae03e92ea91a29a567f3b7b6 /block/blk-mq-sched.c
parent	5815839b3ca16bb1d45939270871169f6803a121 (diff)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 56c493c6cd90..4e027f6108ae 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))	59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
60	return;	60	return;
61		61
62	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {	62	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
63	struct request_queue *q = hctx->queue;
64
65	if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
66	atomic_inc(&q->shared_hctx_restart);
67	} else
68	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
69	}	63	}
70		64
71	static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)	65	void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
72	{	66	{
73	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))	67	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
74	return false;	68	return;
75		69	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
76	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
77	struct request_queue *q = hctx->queue;
78
79	if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
80	atomic_dec(&q->shared_hctx_restart);
81	} else
82	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
83		70
84	return blk_mq_run_hw_queue(hctx, true);	71	blk_mq_run_hw_queue(hctx, true);
85	}	72	}
86		73
87	/*	74	/*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
380	return false;	367	return false;
381	}	368	}
382		369
383	/**
384	* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
385	* @pos: loop cursor.
386	* @skip: the list element that will not be examined. Iteration starts at
387	* @skip->next.
388	* @head: head of the list to examine. This list must have at least one
389	* element, namely @skip.
390	* @member: name of the list_head structure within typeof(*pos).
391	*/
392	#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
393	for ((pos) = (skip); \
394	(pos = (pos)->member.next != (head) ? list_entry_rcu( \
395	(pos)->member.next, typeof(*pos), member) : \
396	list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
397	(pos) != (skip); )
398
399	/*
400	* Called after a driver tag has been freed to check whether a hctx needs to
401	* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
402	* queues in a round-robin fashion if the tag set of @hctx is shared with other
403	* hardware queues.
404	*/
405	void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
406	{
407	struct blk_mq_tags *const tags = hctx->tags;
408	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
409	struct request_queue const queue = hctx->queue, q;
410	struct blk_mq_hw_ctx *hctx2;
411	unsigned int i, j;
412
413	if (set->flags & BLK_MQ_F_TAG_SHARED) {
414	/*
415	* If this is 0, then we know that no hardware queues
416	* have RESTART marked. We're done.
417	*/
418	if (!atomic_read(&queue->shared_hctx_restart))
419	return;
420
421	rcu_read_lock();
422	list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
423	tag_set_list) {
424	queue_for_each_hw_ctx(q, hctx2, i)
425	if (hctx2->tags == tags &&
426	blk_mq_sched_restart_hctx(hctx2))
427	goto done;
428	}
429	j = hctx->queue_num + 1;
430	for (i = 0; i < queue->nr_hw_queues; i++, j++) {
431	if (j == queue->nr_hw_queues)
432	j = 0;
433	hctx2 = queue->queue_hw_ctx[j];
434	if (hctx2->tags == tags &&
435	blk_mq_sched_restart_hctx(hctx2))
436	break;
437	}
438	done:
439	rcu_read_unlock();
440	} else {
441	blk_mq_sched_restart_hctx(hctx);
442	}
443	}
444
445	void blk_mq_sched_insert_request(struct request *rq, bool at_head,	370	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
446	bool run_queue, bool async)	371	bool run_queue, bool async)
447	{	372	{