mq-deadline: Introduce zone locking support

Introduce zone write locking to avoid write request reordering with zoned block devices. This is achieved using a finer selection of the next request to dispatch: 1) Any non-write request is always allowed to proceed. 2) Any write to a conventional zone is always allowed to proceed. 3) For a write to a sequential zone, the zone lock is first checked. a) If the zone is not locked, the write is allowed to proceed after its target zone is locked. b) If the zone is locked, the write request is skipped and the next request in the dispatch queue tested (back to step 1). For a write request that has locked its target zone, the zone is unlocked either when the request completes with a call to the method deadline_request_completed() or when the request is requeued using dd_insert_request(). Requests targeting a locked zone are always left in the scheduler queue to preserve the lba ordering for write requests. If no write request can be dispatched, allow reads to be dispatched even if the write batch is not done. If the device used is not a zoned block device, or if zoned block device support is disabled, this patch does not modify mq-deadline behavior. Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Damien Le Moal <damien.lemoal@wdc.com> 2017-12-21 01:43:40 -0500
committer: Jens Axboe <axboe@kernel.dk> 2018-01-05 11:22:17 -0500
commit: 5700f69178e91a6b21250049b86148ed5e9550c1 (patch)
tree: 08f8ffdbd9218095acbf40905b538800e21eff79 /block/mq-deadline.c
parent: bf09ce56f0e654b94d980b9aa89e3fce78887e01 (diff)
1 files changed, 86 insertions, 3 deletions
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8bd6db9e69c7..d56972e8ebda 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -59,6 +59,7 @@ struct deadline_data {
        int front_merges;
        spinlock_t lock;
+        spinlock_t zone_lock;
        struct list_head dispatch;
 };
@@ -198,13 +199,33 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 static struct request *
 deadline_fifo_request(struct deadline_data *dd, int data_dir)
 {
+        struct request *rq;
+        unsigned long flags;
        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
                return NULL;
        if (list_empty(&dd->fifo_list[data_dir]))
                return NULL;
-        return rq_entry_fifo(dd->fifo_list[data_dir].next);
+        rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
+        if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+                return rq;
+        /*
+         * Look for a write request that can be dispatched, that is one with
+         * an unlocked target zone.
+         */
+        spin_lock_irqsave(&dd->zone_lock, flags);
+        list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
+                if (blk_req_can_dispatch_to_zone(rq))
+                        goto out;
+        }
+        rq = NULL;
+out:
+        spin_unlock_irqrestore(&dd->zone_lock, flags);
+        return rq;
 }
 /*
@@ -214,10 +235,32 @@ deadline_fifo_request(struct deadline_data *dd, int data_dir)
 static struct request *
 deadline_next_request(struct deadline_data *dd, int data_dir)
 {
+        struct request *rq;
+        unsigned long flags;
        if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
                return NULL;
-        return dd->next_rq[data_dir];
+        rq = dd->next_rq[data_dir];
+        if (!rq)
+                return NULL;
+        if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+                return rq;
+        /*
+         * Look for a write request that can be dispatched, that is one with
+         * an unlocked target zone.
+         */
+        spin_lock_irqsave(&dd->zone_lock, flags);
+        while (rq) {
+                if (blk_req_can_dispatch_to_zone(rq))
+                        break;
+                rq = deadline_latter_request(rq);
+        }
+        spin_unlock_irqrestore(&dd->zone_lock, flags);
+        return rq;
 }
 /*
@@ -259,7 +302,8 @@ static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
        if (reads) {
                BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
-                if (writes && (dd->starved++ >= dd->writes_starved))
+                if (deadline_fifo_request(dd, WRITE) &&
+                    (dd->starved++ >= dd->writes_starved))
                        goto dispatch_writes;
                data_dir = READ;
@@ -304,6 +348,13 @@ dispatch_find_request:
                rq = next_rq;
        }
+        /*
+         * For a zoned block device, if we only have writes queued and none of
+         * them can be dispatched, rq will be NULL.
+         */
+        if (!rq)
+                return NULL;
        dd->batching = 0;
 dispatch_request:
@@ -313,6 +364,10 @@ dispatch_request:
        dd->batching++;
        deadline_move_request(dd, rq);
 done:
+        /*
+         * If the request needs its target zone locked, do it.
+         */
+        blk_req_zone_write_lock(rq);
        rq->rq_flags |= RQF_STARTED;
        return rq;
 }
@@ -368,6 +423,7 @@ static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
        dd->front_merges = 1;
        dd->fifo_batch = fifo_batch;
        spin_lock_init(&dd->lock);
+        spin_lock_init(&dd->zone_lock);
        INIT_LIST_HEAD(&dd->dispatch);
        q->elevator = eq;
@@ -424,6 +480,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
        struct deadline_data *dd = q->elevator->elevator_data;
        const int data_dir = rq_data_dir(rq);
+        /*
+         * This may be a requeue of a write request that has locked its
+         * target zone. If it is the case, this releases the zone lock.
+         */
+        blk_req_zone_write_unlock(rq);
        if (blk_mq_sched_try_insert_merge(q, rq))
                return;
@@ -468,6 +530,26 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
        spin_unlock(&dd->lock);
 }
+/*
+ * For zoned block devices, write unlock the target zone of
+ * completed write requests. Do this while holding the zone lock
+ * spinlock so that the zone is never unlocked while deadline_fifo_request()
+ * while deadline_next_request() are executing.
+ */
+static void dd_completed_request(struct request *rq)
+{
+        struct request_queue *q = rq->q;
+        if (blk_queue_is_zoned(q)) {
+                struct deadline_data *dd = q->elevator->elevator_data;
+                unsigned long flags;
+                spin_lock_irqsave(&dd->zone_lock, flags);
+                blk_req_zone_write_unlock(rq);
+                spin_unlock_irqrestore(&dd->zone_lock, flags);
+        }
+}
 static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
 {
        struct deadline_data *dd = hctx->queue->elevator->elevator_data;
@@ -669,6 +751,7 @@ static struct elevator_type mq_deadline = {
        .ops.mq = {
                .insert_requests        = dd_insert_requests,
                .dispatch_request       = dd_dispatch_request,
+                .completed_request      = dd_completed_request,
                .next_request           = elv_rb_latter_request,
                .former_request         = elv_rb_former_request,
                .bio_merge              = dd_bio_merge,
author	Damien Le Moal <damien.lemoal@wdc.com>	2017-12-21 01:43:40 -0500
committer	Jens Axboe <axboe@kernel.dk>	2018-01-05 11:22:17 -0500
commit	5700f69178e91a6b21250049b86148ed5e9550c1 (patch)
tree	08f8ffdbd9218095acbf40905b538800e21eff79 /block/mq-deadline.c
parent	bf09ce56f0e654b94d980b9aa89e3fce78887e01 (diff)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 8bd6db9e69c7..d56972e8ebda 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c
@@ -59,6 +59,7 @@ struct deadline_data {
59	int front_merges;	59	int front_merges;
60		60
61	spinlock_t lock;	61	spinlock_t lock;
		62	spinlock_t zone_lock;
62	struct list_head dispatch;	63	struct list_head dispatch;
63	};	64	};
64		65
@@ -198,13 +199,33 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
198	static struct request *	199	static struct request *
199	deadline_fifo_request(struct deadline_data *dd, int data_dir)	200	deadline_fifo_request(struct deadline_data *dd, int data_dir)
200	{	201	{
		202	struct request *rq;
		203	unsigned long flags;
		204
201	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))	205	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
202	return NULL;	206	return NULL;
203		207
204	if (list_empty(&dd->fifo_list[data_dir]))	208	if (list_empty(&dd->fifo_list[data_dir]))
205	return NULL;	209	return NULL;
206		210
207	return rq_entry_fifo(dd->fifo_list[data_dir].next);	211	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
		212	if (data_dir == READ \|\| !blk_queue_is_zoned(rq->q))
		213	return rq;
		214
		215	/*
		216	* Look for a write request that can be dispatched, that is one with
		217	* an unlocked target zone.
		218	*/
		219	spin_lock_irqsave(&dd->zone_lock, flags);
		220	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
		221	if (blk_req_can_dispatch_to_zone(rq))
		222	goto out;
		223	}
		224	rq = NULL;
		225	out:
		226	spin_unlock_irqrestore(&dd->zone_lock, flags);
		227
		228	return rq;
208	}	229	}
209		230
210	/*	231	/*
@@ -214,10 +235,32 @@ deadline_fifo_request(struct deadline_data *dd, int data_dir)
214	static struct request *	235	static struct request *
215	deadline_next_request(struct deadline_data *dd, int data_dir)	236	deadline_next_request(struct deadline_data *dd, int data_dir)
216	{	237	{
		238	struct request *rq;
		239	unsigned long flags;
		240
217	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))	241	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
218	return NULL;	242	return NULL;
219		243
220	return dd->next_rq[data_dir];	244	rq = dd->next_rq[data_dir];
		245	if (!rq)
		246	return NULL;
		247
		248	if (data_dir == READ \|\| !blk_queue_is_zoned(rq->q))
		249	return rq;
		250
		251	/*
		252	* Look for a write request that can be dispatched, that is one with
		253	* an unlocked target zone.
		254	*/
		255	spin_lock_irqsave(&dd->zone_lock, flags);
		256	while (rq) {
		257	if (blk_req_can_dispatch_to_zone(rq))
		258	break;
		259	rq = deadline_latter_request(rq);
		260	}
		261	spin_unlock_irqrestore(&dd->zone_lock, flags);
		262
		263	return rq;
221	}	264	}
222		265
223	/*	266	/*
@@ -259,7 +302,8 @@ static struct request __dd_dispatch_request(struct blk_mq_hw_ctx hctx)
259	if (reads) {	302	if (reads) {
260	BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));	303	BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
261		304
262	if (writes && (dd->starved++ >= dd->writes_starved))	305	if (deadline_fifo_request(dd, WRITE) &&
		306	(dd->starved++ >= dd->writes_starved))
263	goto dispatch_writes;	307	goto dispatch_writes;
264		308
265	data_dir = READ;	309	data_dir = READ;
@@ -304,6 +348,13 @@ dispatch_find_request:
304	rq = next_rq;	348	rq = next_rq;
305	}	349	}
306		350
		351	/*
		352	* For a zoned block device, if we only have writes queued and none of
		353	* them can be dispatched, rq will be NULL.
		354	*/
		355	if (!rq)
		356	return NULL;
		357
307	dd->batching = 0;	358	dd->batching = 0;
308		359
309	dispatch_request:	360	dispatch_request:
@@ -313,6 +364,10 @@ dispatch_request:
313	dd->batching++;	364	dd->batching++;
314	deadline_move_request(dd, rq);	365	deadline_move_request(dd, rq);
315	done:	366	done:
		367	/*
		368	* If the request needs its target zone locked, do it.
		369	*/
		370	blk_req_zone_write_lock(rq);
316	rq->rq_flags \|= RQF_STARTED;	371	rq->rq_flags \|= RQF_STARTED;
317	return rq;	372	return rq;
318	}	373	}
@@ -368,6 +423,7 @@ static int dd_init_queue(struct request_queue q, struct elevator_type e)
368	dd->front_merges = 1;	423	dd->front_merges = 1;
369	dd->fifo_batch = fifo_batch;	424	dd->fifo_batch = fifo_batch;
370	spin_lock_init(&dd->lock);	425	spin_lock_init(&dd->lock);
		426	spin_lock_init(&dd->zone_lock);
371	INIT_LIST_HEAD(&dd->dispatch);	427	INIT_LIST_HEAD(&dd->dispatch);
372		428
373	q->elevator = eq;	429	q->elevator = eq;
@@ -424,6 +480,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,
424	struct deadline_data *dd = q->elevator->elevator_data;	480	struct deadline_data *dd = q->elevator->elevator_data;
425	const int data_dir = rq_data_dir(rq);	481	const int data_dir = rq_data_dir(rq);
426		482
		483	/*
		484	* This may be a requeue of a write request that has locked its
		485	* target zone. If it is the case, this releases the zone lock.
		486	*/
		487	blk_req_zone_write_unlock(rq);
		488
427	if (blk_mq_sched_try_insert_merge(q, rq))	489	if (blk_mq_sched_try_insert_merge(q, rq))
428	return;	490	return;
429		491
@@ -468,6 +530,26 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
468	spin_unlock(&dd->lock);	530	spin_unlock(&dd->lock);
469	}	531	}
470		532
		533	/*
		534	* For zoned block devices, write unlock the target zone of
		535	* completed write requests. Do this while holding the zone lock
		536	* spinlock so that the zone is never unlocked while deadline_fifo_request()
		537	* while deadline_next_request() are executing.
		538	*/
		539	static void dd_completed_request(struct request *rq)
		540	{
		541	struct request_queue *q = rq->q;
		542
		543	if (blk_queue_is_zoned(q)) {
		544	struct deadline_data *dd = q->elevator->elevator_data;
		545	unsigned long flags;
		546
		547	spin_lock_irqsave(&dd->zone_lock, flags);
		548	blk_req_zone_write_unlock(rq);
		549	spin_unlock_irqrestore(&dd->zone_lock, flags);
		550	}
		551	}
		552
471	static bool dd_has_work(struct blk_mq_hw_ctx *hctx)	553	static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
472	{	554	{
473	struct deadline_data *dd = hctx->queue->elevator->elevator_data;	555	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
@@ -669,6 +751,7 @@ static struct elevator_type mq_deadline = {
669	.ops.mq = {	751	.ops.mq = {
670	.insert_requests = dd_insert_requests,	752	.insert_requests = dd_insert_requests,
671	.dispatch_request = dd_dispatch_request,	753	.dispatch_request = dd_dispatch_request,
		754	.completed_request = dd_completed_request,
672	.next_request = elv_rb_latter_request,	755	.next_request = elv_rb_latter_request,
673	.former_request = elv_rb_former_request,	756	.former_request = elv_rb_former_request,
674	.bio_merge = dd_bio_merge,	757	.bio_merge = dd_bio_merge,