block, bfq: update blkio stats outside the scheduler lock

bfq invokes various blkg_*stats_* functions to update the statistics contained in the special files blkio.bfq.* in the blkio controller groups, i.e., the I/O accounting related to the proportional-share policy provided by bfq. The execution of these functions takes a considerable percentage, about 40%, of the total per-request execution time of bfq (i.e., of the sum of the execution time of all the bfq functions that have to be executed to process an I/O request from its creation to its destruction). This reduces the request-processing rate sustainable by bfq noticeably, even on a multicore CPU. In fact, the bfq functions that invoke blkg_*stats_* functions cannot be executed in parallel with the rest of the code of bfq, because both are executed under the same same per-device scheduler lock. To reduce this slowdown, this commit moves, wherever possible, the invocation of these functions (more precisely, of the bfq functions that invoke blkg_*stats_* functions) outside the critical sections protected by the scheduler lock. With this change, and with all blkio.bfq.* statistics enabled, the throughput grows, e.g., from 250 to 310 KIOPS (+25%) on an Intel i7-4850HQ, in case of 8 threads doing random I/O in parallel on null_blk, with the latter configured with 0 latency. We obtained the same or higher throughput boosts, up to +30%, with other processors (some figures are reported in the documentation). For our tests, we used the script [1], with which our results can be easily reproduced. NOTE. This commit still protects the invocation of blkg_*stats_* functions with the request_queue lock, because the group these functions are invoked on may otherwise disappear before or while these functions are executed. Fortunately, tests without even this lock show, by difference, that the serialization caused by this lock has a little impact (at most ~5% of throughput reduction). [1] https://github.com/Algodev-github/IOSpeed Tested-by: Lee Tibbert <lee.tibbert@gmail.com> Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Luca Miccio <lucmiccio@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Paolo Valente <paolo.valente@linaro.org> 2017-11-13 01:34:09 -0500
committer: Jens Axboe <axboe@kernel.dk> 2017-11-14 22:13:33 -0500
commit: 24bfd19bb7890255693ee5cb6dc100d8d215d00b (patch)
tree: 2785ccac0d1b711113bea4b6698895f8de1fc325 /block/bfq-iosched.c
parent: 614822f81f606e0064acdae11d9ec1efd3db4190 (diff)
1 files changed, 99 insertions, 11 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 91703eba63f0..69e05f861daf 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2228,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
                                       struct bfq_queue *bfqq)
 {
        if (bfqq) {
-                bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
                bfq_clear_bfqq_fifo_expire(bfqq);
                bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
@@ -3469,7 +3468,6 @@ check_queue:
                                 */
                                bfq_clear_bfqq_wait_request(bfqq);
                                hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
-                                bfqg_stats_update_idle_time(bfqq_group(bfqq));
                        }
                        goto keep_queue;
                }
@@ -3695,15 +3693,67 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
        struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
        struct request *rq;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+        struct bfq_queue *in_serv_queue, *bfqq;
+        bool waiting_rq, idle_timer_disabled;
+#endif
        spin_lock_irq(&bfqd->lock);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+        in_serv_queue = bfqd->in_service_queue;
+        waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
+        rq = __bfq_dispatch_request(hctx);
+        idle_timer_disabled =
+                waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
+#else
        rq = __bfq_dispatch_request(hctx);
-        if (rq && RQ_BFQQ(rq))
+#endif
-                bfqg_stats_update_io_remove(bfqq_group(RQ_BFQQ(rq)),
-                                            rq->cmd_flags);
        spin_unlock_irq(&bfqd->lock);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+        bfqq = rq ? RQ_BFQQ(rq) : NULL;
+        if (!idle_timer_disabled && !bfqq)
+                return rq;
+        /*
+         * rq and bfqq are guaranteed to exist until this function
+         * ends, for the following reasons. First, rq can be
+         * dispatched to the device, and then can be completed and
+         * freed, only after this function ends. Second, rq cannot be
+         * merged (and thus freed because of a merge) any longer,
+         * because it has already started. Thus rq cannot be freed
+         * before this function ends, and, since rq has a reference to
+         * bfqq, the same guarantee holds for bfqq too.
+         *
+         * In addition, the following queue lock guarantees that
+         * bfqq_group(bfqq) exists as well.
+         */
+        spin_lock_irq(hctx->queue->queue_lock);
+        if (idle_timer_disabled)
+                /*
+                 * Since the idle timer has been disabled,
+                 * in_serv_queue contained some request when
+                 * __bfq_dispatch_request was invoked above, which
+                 * implies that rq was picked exactly from
+                 * in_serv_queue. Thus in_serv_queue == bfqq, and is
+                 * therefore guaranteed to exist because of the above
+                 * arguments.
+                 */
+                bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
+        if (bfqq) {
+                struct bfq_group *bfqg = bfqq_group(bfqq);
+                bfqg_stats_update_avg_queue_size(bfqg);
+                bfqg_stats_set_start_empty_time(bfqg);
+                bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
+        }
+        spin_unlock_irq(hctx->queue->queue_lock);
+#endif
        return rq;
 }
@@ -4161,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                 */
                bfq_clear_bfqq_wait_request(bfqq);
                hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
-                bfqg_stats_update_idle_time(bfqq_group(bfqq));
                /*
                 * The queue is not empty, because a new request just
@@ -4176,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
        }
 }
-static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
+/* returns true if it causes the idle timer to be disabled */
+static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
 {
        struct bfq_queue *bfqq = RQ_BFQQ(rq),
                *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
+        bool waiting, idle_timer_disabled = false;
        if (new_bfqq) {
                if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
@@ -4213,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
                bfqq = new_bfqq;
        }
+        waiting = bfqq && bfq_bfqq_wait_request(bfqq);
        bfq_add_request(rq);
+        idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
        rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
        list_add_tail(&rq->queuelist, &bfqq->fifo);
        bfq_rq_enqueued(bfqd, bfqq, rq);
+        return idle_timer_disabled;
 }
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -4226,7 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
        struct request_queue *q = hctx->queue;
        struct bfq_data *bfqd = q->elevator->elevator_data;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
        struct bfq_queue *bfqq = RQ_BFQQ(rq);
+        bool idle_timer_disabled = false;
+        unsigned int cmd_flags;
+#endif
        spin_lock_irq(&bfqd->lock);
        if (blk_mq_sched_try_insert_merge(q, rq)) {
@@ -4245,13 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                else
                        list_add_tail(&rq->queuelist, &bfqd->dispatch);
        } else {
-                __bfq_insert_request(bfqd, rq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+                idle_timer_disabled = __bfq_insert_request(bfqd, rq);
                /*
                 * Update bfqq, because, if a queue merge has occurred
                 * in __bfq_insert_request, then rq has been
                 * redirected into a new queue.
                 */
                bfqq = RQ_BFQQ(rq);
+#else
+                __bfq_insert_request(bfqd, rq);
+#endif
                if (rq_mergeable(rq)) {
                        elv_rqhash_add(q, rq);
@@ -4260,10 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                }
        }
-        if (bfqq)
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
-                bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, rq->cmd_flags);
+        /*
+         * Cache cmd_flags before releasing scheduler lock, because rq
+         * may disappear afterwards (for example, because of a request
+         * merge).
+         */
+        cmd_flags = rq->cmd_flags;
+#endif
        spin_unlock_irq(&bfqd->lock);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+        if (!bfqq)
+                return;
+        /*
+         * bfqq still exists, because it can disappear only after
+         * either it is merged with another queue, or the process it
+         * is associated with exits. But both actions must be taken by
+         * the same process currently executing this flow of
+         * instruction.
+         *
+         * In addition, the following queue lock guarantees that
+         * bfqq_group(bfqq) exists as well.
+         */
+        spin_lock_irq(q->queue_lock);
+        bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
+        if (idle_timer_disabled)
+                bfqg_stats_update_idle_time(bfqq_group(bfqq));
+        spin_unlock_irq(q->queue_lock);
+#endif
 }
 static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
author	Paolo Valente <paolo.valente@linaro.org>	2017-11-13 01:34:09 -0500
committer	Jens Axboe <axboe@kernel.dk>	2017-11-14 22:13:33 -0500
commit	24bfd19bb7890255693ee5cb6dc100d8d215d00b (patch)
tree	2785ccac0d1b711113bea4b6698895f8de1fc325 /block/bfq-iosched.c
parent	614822f81f606e0064acdae11d9ec1efd3db4190 (diff)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 91703eba63f0..69e05f861daf 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c
@@ -2228,7 +2228,6 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
2228	struct bfq_queue *bfqq)	2228	struct bfq_queue *bfqq)
2229	{	2229	{
2230	if (bfqq) {	2230	if (bfqq) {
2231	bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
2232	bfq_clear_bfqq_fifo_expire(bfqq);	2231	bfq_clear_bfqq_fifo_expire(bfqq);
2233		2232
2234	bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;	2233	bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
@@ -3469,7 +3468,6 @@ check_queue:
3469	*/	3468	*/
3470	bfq_clear_bfqq_wait_request(bfqq);	3469	bfq_clear_bfqq_wait_request(bfqq);
3471	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);	3470	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
3472	bfqg_stats_update_idle_time(bfqq_group(bfqq));
3473	}	3471	}
3474	goto keep_queue;	3472	goto keep_queue;
3475	}	3473	}
@@ -3695,15 +3693,67 @@ static struct request bfq_dispatch_request(struct blk_mq_hw_ctx hctx)
3695	{	3693	{
3696	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;	3694	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
3697	struct request *rq;	3695	struct request *rq;
		3696	#ifdef CONFIG_BFQ_GROUP_IOSCHED
		3697	struct bfq_queue in_serv_queue, bfqq;
		3698	bool waiting_rq, idle_timer_disabled;
		3699	#endif
3698		3700
3699	spin_lock_irq(&bfqd->lock);	3701	spin_lock_irq(&bfqd->lock);
3700		3702
		3703	#ifdef CONFIG_BFQ_GROUP_IOSCHED
		3704	in_serv_queue = bfqd->in_service_queue;
		3705	waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
		3706
		3707	rq = __bfq_dispatch_request(hctx);
		3708
		3709	idle_timer_disabled =
		3710	waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
		3711
		3712	#else
3701	rq = __bfq_dispatch_request(hctx);	3713	rq = __bfq_dispatch_request(hctx);
3702	if (rq && RQ_BFQQ(rq))	3714	#endif
3703	bfqg_stats_update_io_remove(bfqq_group(RQ_BFQQ(rq)),
3704	rq->cmd_flags);
3705	spin_unlock_irq(&bfqd->lock);	3715	spin_unlock_irq(&bfqd->lock);
3706		3716
		3717	#ifdef CONFIG_BFQ_GROUP_IOSCHED
		3718	bfqq = rq ? RQ_BFQQ(rq) : NULL;
		3719	if (!idle_timer_disabled && !bfqq)
		3720	return rq;
		3721
		3722	/*
		3723	* rq and bfqq are guaranteed to exist until this function
		3724	* ends, for the following reasons. First, rq can be
		3725	* dispatched to the device, and then can be completed and
		3726	* freed, only after this function ends. Second, rq cannot be
		3727	* merged (and thus freed because of a merge) any longer,
		3728	* because it has already started. Thus rq cannot be freed
		3729	* before this function ends, and, since rq has a reference to
		3730	* bfqq, the same guarantee holds for bfqq too.
		3731	*
		3732	* In addition, the following queue lock guarantees that
		3733	* bfqq_group(bfqq) exists as well.
		3734	*/
		3735	spin_lock_irq(hctx->queue->queue_lock);
		3736	if (idle_timer_disabled)
		3737	/*
		3738	* Since the idle timer has been disabled,
		3739	* in_serv_queue contained some request when
		3740	* __bfq_dispatch_request was invoked above, which
		3741	* implies that rq was picked exactly from
		3742	* in_serv_queue. Thus in_serv_queue == bfqq, and is
		3743	* therefore guaranteed to exist because of the above
		3744	* arguments.
		3745	*/
		3746	bfqg_stats_update_idle_time(bfqq_group(in_serv_queue));
		3747	if (bfqq) {
		3748	struct bfq_group *bfqg = bfqq_group(bfqq);
		3749
		3750	bfqg_stats_update_avg_queue_size(bfqg);
		3751	bfqg_stats_set_start_empty_time(bfqg);
		3752	bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
		3753	}
		3754	spin_unlock_irq(hctx->queue->queue_lock);
		3755	#endif
		3756
3707	return rq;	3757	return rq;
3708	}	3758	}
3709		3759
@@ -4161,7 +4211,6 @@ static void bfq_rq_enqueued(struct bfq_data bfqd, struct bfq_queue bfqq,
4161	*/	4211	*/
4162	bfq_clear_bfqq_wait_request(bfqq);	4212	bfq_clear_bfqq_wait_request(bfqq);
4163	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);	4213	hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
4164	bfqg_stats_update_idle_time(bfqq_group(bfqq));
4165		4214
4166	/*	4215	/*
4167	* The queue is not empty, because a new request just	4216	* The queue is not empty, because a new request just
@@ -4176,10 +4225,12 @@ static void bfq_rq_enqueued(struct bfq_data bfqd, struct bfq_queue bfqq,
4176	}	4225	}
4177	}	4226	}
4178		4227
4179	static void __bfq_insert_request(struct bfq_data bfqd, struct request rq)	4228	/* returns true if it causes the idle timer to be disabled */
		4229	static bool __bfq_insert_request(struct bfq_data bfqd, struct request rq)
4180	{	4230	{
4181	struct bfq_queue *bfqq = RQ_BFQQ(rq),	4231	struct bfq_queue *bfqq = RQ_BFQQ(rq),
4182	*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);	4232	*new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
		4233	bool waiting, idle_timer_disabled = false;
4183		4234
4184	if (new_bfqq) {	4235	if (new_bfqq) {
4185	if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)	4236	if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
@@ -4213,12 +4264,16 @@ static void __bfq_insert_request(struct bfq_data bfqd, struct request rq)
4213	bfqq = new_bfqq;	4264	bfqq = new_bfqq;
4214	}	4265	}
4215		4266
		4267	waiting = bfqq && bfq_bfqq_wait_request(bfqq);
4216	bfq_add_request(rq);	4268	bfq_add_request(rq);
		4269	idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
4217		4270
4218	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];	4271	rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
4219	list_add_tail(&rq->queuelist, &bfqq->fifo);	4272	list_add_tail(&rq->queuelist, &bfqq->fifo);
4220		4273
4221	bfq_rq_enqueued(bfqd, bfqq, rq);	4274	bfq_rq_enqueued(bfqd, bfqq, rq);
		4275
		4276	return idle_timer_disabled;
4222	}	4277	}
4223		4278
4224	static void bfq_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,	4279	static void bfq_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,
@@ -4226,7 +4281,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,
4226	{	4281	{
4227	struct request_queue *q = hctx->queue;	4282	struct request_queue *q = hctx->queue;
4228	struct bfq_data *bfqd = q->elevator->elevator_data;	4283	struct bfq_data *bfqd = q->elevator->elevator_data;
		4284	#ifdef CONFIG_BFQ_GROUP_IOSCHED
4229	struct bfq_queue *bfqq = RQ_BFQQ(rq);	4285	struct bfq_queue *bfqq = RQ_BFQQ(rq);
		4286	bool idle_timer_disabled = false;
		4287	unsigned int cmd_flags;
		4288	#endif
4230		4289
4231	spin_lock_irq(&bfqd->lock);	4290	spin_lock_irq(&bfqd->lock);
4232	if (blk_mq_sched_try_insert_merge(q, rq)) {	4291	if (blk_mq_sched_try_insert_merge(q, rq)) {
@@ -4245,13 +4304,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,
4245	else	4304	else
4246	list_add_tail(&rq->queuelist, &bfqd->dispatch);	4305	list_add_tail(&rq->queuelist, &bfqd->dispatch);
4247	} else {	4306	} else {
4248	__bfq_insert_request(bfqd, rq);	4307	#ifdef CONFIG_BFQ_GROUP_IOSCHED
		4308	idle_timer_disabled = __bfq_insert_request(bfqd, rq);
4249	/*	4309	/*
4250	* Update bfqq, because, if a queue merge has occurred	4310	* Update bfqq, because, if a queue merge has occurred
4251	* in __bfq_insert_request, then rq has been	4311	* in __bfq_insert_request, then rq has been
4252	* redirected into a new queue.	4312	* redirected into a new queue.
4253	*/	4313	*/
4254	bfqq = RQ_BFQQ(rq);	4314	bfqq = RQ_BFQQ(rq);
		4315	#else
		4316	__bfq_insert_request(bfqd, rq);
		4317	#endif
4255		4318
4256	if (rq_mergeable(rq)) {	4319	if (rq_mergeable(rq)) {
4257	elv_rqhash_add(q, rq);	4320	elv_rqhash_add(q, rq);
@@ -4260,10 +4323,35 @@ static void bfq_insert_request(struct blk_mq_hw_ctx hctx, struct request rq,
4260	}	4323	}
4261	}	4324	}
4262		4325
4263	if (bfqq)	4326	#ifdef CONFIG_BFQ_GROUP_IOSCHED
4264	bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, rq->cmd_flags);	4327	/*
4265		4328	* Cache cmd_flags before releasing scheduler lock, because rq
		4329	* may disappear afterwards (for example, because of a request
		4330	* merge).
		4331	*/
		4332	cmd_flags = rq->cmd_flags;
		4333	#endif
4266	spin_unlock_irq(&bfqd->lock);	4334	spin_unlock_irq(&bfqd->lock);
		4335
		4336	#ifdef CONFIG_BFQ_GROUP_IOSCHED
		4337	if (!bfqq)
		4338	return;
		4339	/*
		4340	* bfqq still exists, because it can disappear only after
		4341	* either it is merged with another queue, or the process it
		4342	* is associated with exits. But both actions must be taken by
		4343	* the same process currently executing this flow of
		4344	* instruction.
		4345	*
		4346	* In addition, the following queue lock guarantees that
		4347	* bfqq_group(bfqq) exists as well.
		4348	*/
		4349	spin_lock_irq(q->queue_lock);
		4350	bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
		4351	if (idle_timer_disabled)
		4352	bfqg_stats_update_idle_time(bfqq_group(bfqq));
		4353	spin_unlock_irq(q->queue_lock);
		4354	#endif
4267	}	4355	}
4268		4356
4269	static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,	4357	static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,