summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2017-04-12 12:23:09 -0400
committerJens Axboe <axboe@fb.com>2017-04-19 10:30:26 -0400
commit54b604567fbfa1a35a44c2ac4a35c959d277adc2 (patch)
treeb92e963307d3eed5ba5de45e4563ddac24c9514e /block/bfq-iosched.c
parente21b7a0b988772e82e7147e1c659a5afe2ae003c (diff)
block, bfq: improve throughput boosting
The feedback-loop algorithm used by BFQ to compute queue (process) budgets is basically a set of three update rules, one for each of the main reasons why a queue may be expired. If many processes suddenly switch from sporadic I/O to greedy and sequential I/O, then these rules are quite slow to assign large budgets to these processes, and hence to achieve a high throughput. On the opposite side, BFQ assigns the maximum possible budget B_max to a just-created queue. This allows a high throughput to be achieved immediately if the associated process is I/O-bound and performs sequential I/O from the beginning. But it also increases the worst-case latency experienced by the first requests issued by the process, because the larger the budget of a queue waiting for service is, the later the queue will be served by B-WF2Q+ (Subsec 3.3 in [1]). This is detrimental for an interactive or soft real-time application. To tackle these throughput and latency problems, on one hand this patch changes the initial budget value to B_max/2. On the other hand, it re-tunes the three rules, adopting a more aggressive, multiplicative increase/linear decrease scheme. This scheme trades latency for throughput more than before, and tends to assign large budgets quickly to processes that are or become I/O-bound. For two of the expiration reasons, the new version of the rules also contains some more little improvements, briefly described below. *No more backlog.* In this case, the budget was larger than the number of sectors actually read/written by the process before it stopped doing I/O. Hence, to reduce latency for the possible future I/O requests of the process, the old rule simply set the next budget to the number of sectors actually consumed by the process. However, if there are still outstanding requests, then the process may have not yet issued its next request just because it is still waiting for the completion of some of the still outstanding ones. If this sub-case holds true, then the new rule, instead of decreasing the budget, doubles it, proactively, in the hope that: 1) a larger budget will fit the actual needs of the process, and 2) the process is sequential and hence a higher throughput will be achieved by serving the process longer after granting it access to the device. *Budget timeout*. The original rule set the new budget to the maximum value B_max, to maximize throughput and let all processes experiencing budget timeouts receive the same share of the device time. In our experiments we verified that this sudden jump to B_max did not provide sensible benefits; rather it increased the latency of processes performing sporadic and short I/O. The new rule only doubles the budget. [1] P. Valente and M. Andreolini, "Improving Application Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of the 5th Annual International Systems and Storage Conference (SYSTOR '12), June 2012. Slightly extended version: http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- results.pdf Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c87
1 files changed, 41 insertions, 46 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index af1740a1d453..1edac72ab51d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -752,9 +752,6 @@ static struct kmem_cache *bfq_pool;
752#define BFQQ_CLOSE_THR (sector_t)(8 * 1024) 752#define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
753#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) 753#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8)
754 754
755/* Budget feedback step. */
756#define BFQ_BUDGET_STEP 128
757
758/* Min samples used for peak rate estimation (for autotuning). */ 755/* Min samples used for peak rate estimation (for autotuning). */
759#define BFQ_PEAK_RATE_SAMPLES 32 756#define BFQ_PEAK_RATE_SAMPLES 32
760 757
@@ -4074,40 +4071,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
4074 return bfqq; 4071 return bfqq;
4075} 4072}
4076 4073
4077/*
4078 * bfq_default_budget - return the default budget for @bfqq on @bfqd.
4079 * @bfqd: the device descriptor.
4080 * @bfqq: the queue to consider.
4081 *
4082 * We use 3/4 of the @bfqd maximum budget as the default value
4083 * for the max_budget field of the queues. This lets the feedback
4084 * mechanism to start from some middle ground, then the behavior
4085 * of the process will drive the heuristics towards high values, if
4086 * it behaves as a greedy sequential reader, or towards small values
4087 * if it shows a more intermittent behavior.
4088 */
4089static unsigned long bfq_default_budget(struct bfq_data *bfqd,
4090 struct bfq_queue *bfqq)
4091{
4092 unsigned long budget;
4093
4094 /*
4095 * When we need an estimate of the peak rate we need to avoid
4096 * to give budgets that are too short due to previous
4097 * measurements. So, in the first 10 assignments use a
4098 * ``safe'' budget value. For such first assignment the value
4099 * of bfqd->budgets_assigned happens to be lower than 194.
4100 * See __bfq_set_in_service_queue for the formula by which
4101 * this field is computed.
4102 */
4103 if (bfqd->budgets_assigned < 194 && bfqd->bfq_user_max_budget == 0)
4104 budget = bfq_default_max_budget;
4105 else
4106 budget = bfqd->bfq_max_budget;
4107
4108 return budget - budget / 4;
4109}
4110
4111static void bfq_arm_slice_timer(struct bfq_data *bfqd) 4074static void bfq_arm_slice_timer(struct bfq_data *bfqd)
4112{ 4075{
4113 struct bfq_queue *bfqq = bfqd->in_service_queue; 4076 struct bfq_queue *bfqq = bfqd->in_service_queue;
@@ -4232,13 +4195,47 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
4232 * for throughput. 4195 * for throughput.
4233 */ 4196 */
4234 case BFQQE_TOO_IDLE: 4197 case BFQQE_TOO_IDLE:
4235 if (budget > min_budget + BFQ_BUDGET_STEP) 4198 /*
4236 budget -= BFQ_BUDGET_STEP; 4199 * This is the only case where we may reduce
4237 else 4200 * the budget: if there is no request of the
4238 budget = min_budget; 4201 * process still waiting for completion, then
4202 * we assume (tentatively) that the timer has
4203 * expired because the batch of requests of
4204 * the process could have been served with a
4205 * smaller budget. Hence, betting that
4206 * process will behave in the same way when it
4207 * becomes backlogged again, we reduce its
4208 * next budget. As long as we guess right,
4209 * this budget cut reduces the latency
4210 * experienced by the process.
4211 *
4212 * However, if there are still outstanding
4213 * requests, then the process may have not yet
4214 * issued its next request just because it is
4215 * still waiting for the completion of some of
4216 * the still outstanding ones. So in this
4217 * subcase we do not reduce its budget, on the
4218 * contrary we increase it to possibly boost
4219 * the throughput, as discussed in the
4220 * comments to the BUDGET_TIMEOUT case.
4221 */
4222 if (bfqq->dispatched > 0) /* still outstanding reqs */
4223 budget = min(budget * 2, bfqd->bfq_max_budget);
4224 else {
4225 if (budget > 5 * min_budget)
4226 budget -= 4 * min_budget;
4227 else
4228 budget = min_budget;
4229 }
4239 break; 4230 break;
4240 case BFQQE_BUDGET_TIMEOUT: 4231 case BFQQE_BUDGET_TIMEOUT:
4241 budget = bfq_default_budget(bfqd, bfqq); 4232 /*
4233 * We double the budget here because it gives
4234 * the chance to boost the throughput if this
4235 * is not a seeky process (and has bumped into
4236 * this timeout because of, e.g., ZBR).
4237 */
4238 budget = min(budget * 2, bfqd->bfq_max_budget);
4242 break; 4239 break;
4243 case BFQQE_BUDGET_EXHAUSTED: 4240 case BFQQE_BUDGET_EXHAUSTED:
4244 /* 4241 /*
@@ -4250,8 +4247,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
4250 * definitely increase the budget of this good 4247 * definitely increase the budget of this good
4251 * candidate to boost the disk throughput. 4248 * candidate to boost the disk throughput.
4252 */ 4249 */
4253 budget = min(budget + 8 * BFQ_BUDGET_STEP, 4250 budget = min(budget * 4, bfqd->bfq_max_budget);
4254 bfqd->bfq_max_budget);
4255 break; 4251 break;
4256 case BFQQE_NO_MORE_REQUESTS: 4252 case BFQQE_NO_MORE_REQUESTS:
4257 /* 4253 /*
@@ -5025,9 +5021,8 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
5025 bfqq->pid = pid; 5021 bfqq->pid = pid;
5026 5022
5027 /* Tentative initial value to trade off between thr and lat */ 5023 /* Tentative initial value to trade off between thr and lat */
5028 bfqq->max_budget = bfq_default_budget(bfqd, bfqq); 5024 bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
5029 bfqq->budget_timeout = bfq_smallest_from_now(); 5025 bfqq->budget_timeout = bfq_smallest_from_now();
5030 bfqq->pid = pid;
5031 5026
5032 /* first request is almost certainly seeky */ 5027 /* first request is almost certainly seeky */
5033 bfqq->seek_history = 1; 5028 bfqq->seek_history = 1;