summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2017-12-20 06:38:33 -0500
committerJens Axboe <axboe@kernel.dk>2018-01-05 11:26:09 -0500
commit7b8fa3b900a087bc03b11329a92398fde563ba37 (patch)
treefc14295feedbd3090b76d0c5191444d0df7aac40 /block/bfq-iosched.c
parent1be6e8a964ee9aa8d4daac523ce29e5f486dd756 (diff)
block, bfq: let a queue be merged only shortly after starting I/O
In BFQ and CFQ, two processes are said to be cooperating if they do I/O in such a way that the union of their I/O requests yields a sequential I/O pattern. To get such a sequential I/O pattern out of the non-sequential pattern of each cooperating process, BFQ and CFQ merge the queues associated with these processes. In more detail, cooperating processes, and thus their associated queues, usually start, or restart, to do I/O shortly after each other. This is the case, e.g., for the I/O threads of KVM/QEMU and of the dump utility. Basing on this assumption, this commit allows a bfq_queue to be merged only during a short time interval (100ms) after it starts, or re-starts, to do I/O. This filtering provides two important benefits. First, it greatly reduces the probability that two non-cooperating processes have their queues merged by mistake, if they just happen to do I/O close to each other for a short time interval. These spurious merges cause loss of service guarantees. A low-weight bfq_queue may unjustly get more than its expected share of the throughput: if such a low-weight queue is merged with a high-weight queue, then the I/O for the low-weight queue is served as if the queue had a high weight. This may damage other high-weight queues unexpectedly. For instance, because of this issue, lxterminal occasionally took 7.5 seconds to start, instead of 6.5 seconds, when some sequential readers and writers did I/O in the background on a FUJITSU MHX2300BT HDD. The reason is that the bfq_queues associated with some of the readers or the writers were merged with the high-weight queues of some processes that had to do some urgent but little I/O. The readers then exploited the inherited high weight for all or most of their I/O, during the start-up of terminal. The filtering introduced by this commit eliminated any outlier caused by spurious queue merges in our start-up time tests. This filtering also provides a little boost of the throughput sustainable by BFQ: 3-4%, depending on the CPU. The reason is that, once a bfq_queue cannot be merged any longer, this commit makes BFQ stop updating the data needed to handle merging for the queue. Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Angelo Ruocco <angeloruocco90@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c57
1 files changed, 46 insertions, 11 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2cf395daee80..7066d90f09df 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -166,6 +166,20 @@ static const int bfq_async_charge_factor = 10;
166/* Default timeout values, in jiffies, approximating CFQ defaults. */ 166/* Default timeout values, in jiffies, approximating CFQ defaults. */
167const int bfq_timeout = HZ / 8; 167const int bfq_timeout = HZ / 8;
168 168
169/*
170 * Time limit for merging (see comments in bfq_setup_cooperator). Set
171 * to the slowest value that, in our tests, proved to be effective in
172 * removing false positives, while not causing true positives to miss
173 * queue merging.
174 *
175 * As can be deduced from the low time limit below, queue merging, if
176 * successful, happens at the very beggining of the I/O of the involved
177 * cooperating processes, as a consequence of the arrival of the very
178 * first requests from each cooperator. After that, there is very
179 * little chance to find cooperators.
180 */
181static const unsigned long bfq_merge_time_limit = HZ/10;
182
169static struct kmem_cache *bfq_pool; 183static struct kmem_cache *bfq_pool;
170 184
171/* Below this threshold (in ns), we consider thinktime immediate. */ 185/* Below this threshold (in ns), we consider thinktime immediate. */
@@ -444,6 +458,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
444 return bfqq; 458 return bfqq;
445} 459}
446 460
461static bool bfq_too_late_for_merging(struct bfq_queue *bfqq)
462{
463 return bfqq->service_from_backlogged > 0 &&
464 time_is_before_jiffies(bfqq->first_IO_time +
465 bfq_merge_time_limit);
466}
467
447void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) 468void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
448{ 469{
449 struct rb_node **p, *parent; 470 struct rb_node **p, *parent;
@@ -454,6 +475,14 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
454 bfqq->pos_root = NULL; 475 bfqq->pos_root = NULL;
455 } 476 }
456 477
478 /*
479 * bfqq cannot be merged any longer (see comments in
480 * bfq_setup_cooperator): no point in adding bfqq into the
481 * position tree.
482 */
483 if (bfq_too_late_for_merging(bfqq))
484 return;
485
457 if (bfq_class_idle(bfqq)) 486 if (bfq_class_idle(bfqq))
458 return; 487 return;
459 if (!bfqq->next_rq) 488 if (!bfqq->next_rq)
@@ -1935,6 +1964,9 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
1935static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, 1964static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
1936 struct bfq_queue *new_bfqq) 1965 struct bfq_queue *new_bfqq)
1937{ 1966{
1967 if (bfq_too_late_for_merging(new_bfqq))
1968 return false;
1969
1938 if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || 1970 if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
1939 (bfqq->ioprio_class != new_bfqq->ioprio_class)) 1971 (bfqq->ioprio_class != new_bfqq->ioprio_class))
1940 return false; 1972 return false;
@@ -2003,6 +2035,20 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
2003{ 2035{
2004 struct bfq_queue *in_service_bfqq, *new_bfqq; 2036 struct bfq_queue *in_service_bfqq, *new_bfqq;
2005 2037
2038 /*
2039 * Prevent bfqq from being merged if it has been created too
2040 * long ago. The idea is that true cooperating processes, and
2041 * thus their associated bfq_queues, are supposed to be
2042 * created shortly after each other. This is the case, e.g.,
2043 * for KVM/QEMU and dump I/O threads. Basing on this
2044 * assumption, the following filtering greatly reduces the
2045 * probability that two non-cooperating processes, which just
2046 * happen to do close I/O for some short time interval, have
2047 * their queues merged by mistake.
2048 */
2049 if (bfq_too_late_for_merging(bfqq))
2050 return NULL;
2051
2006 if (bfqq->new_bfqq) 2052 if (bfqq->new_bfqq)
2007 return bfqq->new_bfqq; 2053 return bfqq->new_bfqq;
2008 2054
@@ -3003,17 +3049,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
3003 slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); 3049 slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta);
3004 3050
3005 /* 3051 /*
3006 * Increase service_from_backlogged before next statement,
3007 * because the possible next invocation of
3008 * bfq_bfqq_charge_time would likely inflate
3009 * entity->service. In contrast, service_from_backlogged must
3010 * contain real service, to enable the soft real-time
3011 * heuristic to correctly compute the bandwidth consumed by
3012 * bfqq.
3013 */
3014 bfqq->service_from_backlogged += entity->service;
3015
3016 /*
3017 * As above explained, charge slow (typically seeky) and 3052 * As above explained, charge slow (typically seeky) and
3018 * timed-out queues with the time and not the service 3053 * timed-out queues with the time and not the service
3019 * received, to favor sequential workloads. 3054 * received, to favor sequential workloads.