summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorArianna Avanzini <avanzini.arianna@gmail.com>2017-04-12 12:23:20 -0400
committerJens Axboe <axboe@fb.com>2017-04-19 10:30:26 -0400
commite1b2324dd065880a3200098fe3637ac171c296e6 (patch)
treedf9b51dbb7a94babd3179f721b38f62ea3f66f0e /block/bfq-iosched.c
parente01eff01d5c81f4dbba186299b16b08aa7316d5b (diff)
block, bfq: handle bursts of queue activations
Many popular I/O-intensive services or applications spawn or reactivate many parallel threads/processes during short time intervals. Examples are systemd during boot or git grep. These services or applications benefit mostly from a high throughput: the quicker the I/O generated by their processes is cumulatively served, the sooner the target job of these services or applications gets completed. As a consequence, it is almost always counterproductive to weight-raise any of the queues associated to the processes of these services or applications: in most cases it would just lower the throughput, mainly because weight-raising also implies device idling. To address this issue, an I/O scheduler needs, first, to detect which queues are associated with these services or applications. In this respect, we have that, from the I/O-scheduler standpoint, these services or applications cause bursts of activations, i.e., activations of different queues occurring shortly after each other. However, a shorter burst of activations may be caused also by the start of an application that does not consist in a lot of parallel I/O-bound threads (see the comments on the function bfq_handle_burst for details). In view of these facts, this commit introduces: 1) an heuristic to detect (only) bursts of queue activations caused by services or applications consisting in many parallel I/O-bound threads; 2) the prevention of device idling and weight-raising for the queues belonging to these bursts. Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c404
1 files changed, 389 insertions, 15 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 549f030509ef..b7e3c8653414 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -360,6 +360,10 @@ struct bfq_queue {
360 360
361 /* bit vector: a 1 for each seeky requests in history */ 361 /* bit vector: a 1 for each seeky requests in history */
362 u32 seek_history; 362 u32 seek_history;
363
364 /* node for the device's burst list */
365 struct hlist_node burst_list_node;
366
363 /* position of the last request enqueued */ 367 /* position of the last request enqueued */
364 sector_t last_request_pos; 368 sector_t last_request_pos;
365 369
@@ -443,6 +447,17 @@ struct bfq_io_cq {
443 bool saved_IO_bound; 447 bool saved_IO_bound;
444 448
445 /* 449 /*
450 * Same purpose as the previous fields for the value of the
451 * field keeping the queue's belonging to a large burst
452 */
453 bool saved_in_large_burst;
454 /*
455 * True if the queue belonged to a burst list before its merge
456 * with another cooperating queue.
457 */
458 bool was_in_burst_list;
459
460 /*
446 * Similar to previous fields: save wr information. 461 * Similar to previous fields: save wr information.
447 */ 462 */
448 unsigned long saved_wr_coeff; 463 unsigned long saved_wr_coeff;
@@ -609,6 +624,36 @@ struct bfq_data {
609 */ 624 */
610 bool strict_guarantees; 625 bool strict_guarantees;
611 626
627 /*
628 * Last time at which a queue entered the current burst of
629 * queues being activated shortly after each other; for more
630 * details about this and the following parameters related to
631 * a burst of activations, see the comments on the function
632 * bfq_handle_burst.
633 */
634 unsigned long last_ins_in_burst;
635 /*
636 * Reference time interval used to decide whether a queue has
637 * been activated shortly after @last_ins_in_burst.
638 */
639 unsigned long bfq_burst_interval;
640 /* number of queues in the current burst of queue activations */
641 int burst_size;
642
643 /* common parent entity for the queues in the burst */
644 struct bfq_entity *burst_parent_entity;
645 /* Maximum burst size above which the current queue-activation
646 * burst is deemed as 'large'.
647 */
648 unsigned long bfq_large_burst_thresh;
649 /* true if a large queue-activation burst is in progress */
650 bool large_burst;
651 /*
652 * Head of the burst list (as for the above fields, more
653 * details in the comments on the function bfq_handle_burst).
654 */
655 struct hlist_head burst_list;
656
612 /* if set to true, low-latency heuristics are enabled */ 657 /* if set to true, low-latency heuristics are enabled */
613 bool low_latency; 658 bool low_latency;
614 /* 659 /*
@@ -671,7 +716,8 @@ struct bfq_data {
671}; 716};
672 717
673enum bfqq_state_flags { 718enum bfqq_state_flags {
674 BFQQF_busy = 0, /* has requests or is in service */ 719 BFQQF_just_created = 0, /* queue just allocated */
720 BFQQF_busy, /* has requests or is in service */
675 BFQQF_wait_request, /* waiting for a request */ 721 BFQQF_wait_request, /* waiting for a request */
676 BFQQF_non_blocking_wait_rq, /* 722 BFQQF_non_blocking_wait_rq, /*
677 * waiting for a request 723 * waiting for a request
@@ -685,6 +731,10 @@ enum bfqq_state_flags {
685 * having consumed at most 2/10 of 731 * having consumed at most 2/10 of
686 * its budget 732 * its budget
687 */ 733 */
734 BFQQF_in_large_burst, /*
735 * bfqq activated in a large burst,
736 * see comments to bfq_handle_burst.
737 */
688 BFQQF_softrt_update, /* 738 BFQQF_softrt_update, /*
689 * may need softrt-next-start 739 * may need softrt-next-start
690 * update 740 * update
@@ -707,6 +757,7 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
707 return test_bit(BFQQF_##name, &(bfqq)->flags); \ 757 return test_bit(BFQQF_##name, &(bfqq)->flags); \
708} 758}
709 759
760BFQ_BFQQ_FNS(just_created);
710BFQ_BFQQ_FNS(busy); 761BFQ_BFQQ_FNS(busy);
711BFQ_BFQQ_FNS(wait_request); 762BFQ_BFQQ_FNS(wait_request);
712BFQ_BFQQ_FNS(non_blocking_wait_rq); 763BFQ_BFQQ_FNS(non_blocking_wait_rq);
@@ -714,6 +765,7 @@ BFQ_BFQQ_FNS(fifo_expire);
714BFQ_BFQQ_FNS(idle_window); 765BFQ_BFQQ_FNS(idle_window);
715BFQ_BFQQ_FNS(sync); 766BFQ_BFQQ_FNS(sync);
716BFQ_BFQQ_FNS(IO_bound); 767BFQ_BFQQ_FNS(IO_bound);
768BFQ_BFQQ_FNS(in_large_burst);
717BFQ_BFQQ_FNS(coop); 769BFQ_BFQQ_FNS(coop);
718BFQ_BFQQ_FNS(split_coop); 770BFQ_BFQQ_FNS(split_coop);
719BFQ_BFQQ_FNS(softrt_update); 771BFQ_BFQQ_FNS(softrt_update);
@@ -4303,9 +4355,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
4303 bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; 4355 bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
4304 bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; 4356 bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
4305 4357
4306 if (bfqq->wr_coeff > 1 && 4358 if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
4307 time_is_before_jiffies(bfqq->last_wr_start_finish + 4359 time_is_before_jiffies(bfqq->last_wr_start_finish +
4308 bfqq->wr_cur_max_time)) { 4360 bfqq->wr_cur_max_time))) {
4309 bfq_log_bfqq(bfqq->bfqd, bfqq, 4361 bfq_log_bfqq(bfqq->bfqd, bfqq,
4310 "resume state: switching off wr"); 4362 "resume state: switching off wr");
4311 4363
@@ -4321,6 +4373,232 @@ static int bfqq_process_refs(struct bfq_queue *bfqq)
4321 return bfqq->ref - bfqq->allocated - bfqq->entity.on_st; 4373 return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
4322} 4374}
4323 4375
4376/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
4377static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
4378{
4379 struct bfq_queue *item;
4380 struct hlist_node *n;
4381
4382 hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
4383 hlist_del_init(&item->burst_list_node);
4384 hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
4385 bfqd->burst_size = 1;
4386 bfqd->burst_parent_entity = bfqq->entity.parent;
4387}
4388
4389/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
4390static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
4391{
4392 /* Increment burst size to take into account also bfqq */
4393 bfqd->burst_size++;
4394
4395 if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
4396 struct bfq_queue *pos, *bfqq_item;
4397 struct hlist_node *n;
4398
4399 /*
4400 * Enough queues have been activated shortly after each
4401 * other to consider this burst as large.
4402 */
4403 bfqd->large_burst = true;
4404
4405 /*
4406 * We can now mark all queues in the burst list as
4407 * belonging to a large burst.
4408 */
4409 hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
4410 burst_list_node)
4411 bfq_mark_bfqq_in_large_burst(bfqq_item);
4412 bfq_mark_bfqq_in_large_burst(bfqq);
4413
4414 /*
4415 * From now on, and until the current burst finishes, any
4416 * new queue being activated shortly after the last queue
4417 * was inserted in the burst can be immediately marked as
4418 * belonging to a large burst. So the burst list is not
4419 * needed any more. Remove it.
4420 */
4421 hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
4422 burst_list_node)
4423 hlist_del_init(&pos->burst_list_node);
4424 } else /*
4425 * Burst not yet large: add bfqq to the burst list. Do
4426 * not increment the ref counter for bfqq, because bfqq
4427 * is removed from the burst list before freeing bfqq
4428 * in put_queue.
4429 */
4430 hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
4431}
4432
4433/*
4434 * If many queues belonging to the same group happen to be created
4435 * shortly after each other, then the processes associated with these
4436 * queues have typically a common goal. In particular, bursts of queue
4437 * creations are usually caused by services or applications that spawn
4438 * many parallel threads/processes. Examples are systemd during boot,
4439 * or git grep. To help these processes get their job done as soon as
4440 * possible, it is usually better to not grant either weight-raising
4441 * or device idling to their queues.
4442 *
4443 * In this comment we describe, firstly, the reasons why this fact
4444 * holds, and, secondly, the next function, which implements the main
4445 * steps needed to properly mark these queues so that they can then be
4446 * treated in a different way.
4447 *
4448 * The above services or applications benefit mostly from a high
4449 * throughput: the quicker the requests of the activated queues are
4450 * cumulatively served, the sooner the target job of these queues gets
4451 * completed. As a consequence, weight-raising any of these queues,
4452 * which also implies idling the device for it, is almost always
4453 * counterproductive. In most cases it just lowers throughput.
4454 *
4455 * On the other hand, a burst of queue creations may be caused also by
4456 * the start of an application that does not consist of a lot of
4457 * parallel I/O-bound threads. In fact, with a complex application,
4458 * several short processes may need to be executed to start-up the
4459 * application. In this respect, to start an application as quickly as
4460 * possible, the best thing to do is in any case to privilege the I/O
4461 * related to the application with respect to all other
4462 * I/O. Therefore, the best strategy to start as quickly as possible
4463 * an application that causes a burst of queue creations is to
4464 * weight-raise all the queues created during the burst. This is the
4465 * exact opposite of the best strategy for the other type of bursts.
4466 *
4467 * In the end, to take the best action for each of the two cases, the
4468 * two types of bursts need to be distinguished. Fortunately, this
4469 * seems relatively easy, by looking at the sizes of the bursts. In
4470 * particular, we found a threshold such that only bursts with a
4471 * larger size than that threshold are apparently caused by
4472 * services or commands such as systemd or git grep. For brevity,
4473 * hereafter we call just 'large' these bursts. BFQ *does not*
4474 * weight-raise queues whose creation occurs in a large burst. In
4475 * addition, for each of these queues BFQ performs or does not perform
4476 * idling depending on which choice boosts the throughput more. The
4477 * exact choice depends on the device and request pattern at
4478 * hand.
4479 *
4480 * Unfortunately, false positives may occur while an interactive task
4481 * is starting (e.g., an application is being started). The
4482 * consequence is that the queues associated with the task do not
4483 * enjoy weight raising as expected. Fortunately these false positives
4484 * are very rare. They typically occur if some service happens to
4485 * start doing I/O exactly when the interactive task starts.
4486 *
4487 * Turning back to the next function, it implements all the steps
4488 * needed to detect the occurrence of a large burst and to properly
4489 * mark all the queues belonging to it (so that they can then be
4490 * treated in a different way). This goal is achieved by maintaining a
4491 * "burst list" that holds, temporarily, the queues that belong to the
4492 * burst in progress. The list is then used to mark these queues as
4493 * belonging to a large burst if the burst does become large. The main
4494 * steps are the following.
4495 *
4496 * . when the very first queue is created, the queue is inserted into the
4497 * list (as it could be the first queue in a possible burst)
4498 *
4499 * . if the current burst has not yet become large, and a queue Q that does
4500 * not yet belong to the burst is activated shortly after the last time
4501 * at which a new queue entered the burst list, then the function appends
4502 * Q to the burst list
4503 *
4504 * . if, as a consequence of the previous step, the burst size reaches
4505 * the large-burst threshold, then
4506 *
4507 * . all the queues in the burst list are marked as belonging to a
4508 * large burst
4509 *
4510 * . the burst list is deleted; in fact, the burst list already served
4511 * its purpose (keeping temporarily track of the queues in a burst,
4512 * so as to be able to mark them as belonging to a large burst in the
4513 * previous sub-step), and now is not needed any more
4514 *
4515 * . the device enters a large-burst mode
4516 *
4517 * . if a queue Q that does not belong to the burst is created while
4518 * the device is in large-burst mode and shortly after the last time
4519 * at which a queue either entered the burst list or was marked as
4520 * belonging to the current large burst, then Q is immediately marked
4521 * as belonging to a large burst.
4522 *
4523 * . if a queue Q that does not belong to the burst is created a while
4524 * later, i.e., not shortly after, than the last time at which a queue
4525 * either entered the burst list or was marked as belonging to the
4526 * current large burst, then the current burst is deemed as finished and:
4527 *
4528 * . the large-burst mode is reset if set
4529 *
4530 * . the burst list is emptied
4531 *
4532 * . Q is inserted in the burst list, as Q may be the first queue
4533 * in a possible new burst (then the burst list contains just Q
4534 * after this step).
4535 */
4536static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
4537{
4538 /*
4539 * If bfqq is already in the burst list or is part of a large
4540 * burst, or finally has just been split, then there is
4541 * nothing else to do.
4542 */
4543 if (!hlist_unhashed(&bfqq->burst_list_node) ||
4544 bfq_bfqq_in_large_burst(bfqq) ||
4545 time_is_after_eq_jiffies(bfqq->split_time +
4546 msecs_to_jiffies(10)))
4547 return;
4548
4549 /*
4550 * If bfqq's creation happens late enough, or bfqq belongs to
4551 * a different group than the burst group, then the current
4552 * burst is finished, and related data structures must be
4553 * reset.
4554 *
4555 * In this respect, consider the special case where bfqq is
4556 * the very first queue created after BFQ is selected for this
4557 * device. In this case, last_ins_in_burst and
4558 * burst_parent_entity are not yet significant when we get
4559 * here. But it is easy to verify that, whether or not the
4560 * following condition is true, bfqq will end up being
4561 * inserted into the burst list. In particular the list will
4562 * happen to contain only bfqq. And this is exactly what has
4563 * to happen, as bfqq may be the first queue of the first
4564 * burst.
4565 */
4566 if (time_is_before_jiffies(bfqd->last_ins_in_burst +
4567 bfqd->bfq_burst_interval) ||
4568 bfqq->entity.parent != bfqd->burst_parent_entity) {
4569 bfqd->large_burst = false;
4570 bfq_reset_burst_list(bfqd, bfqq);
4571 goto end;
4572 }
4573
4574 /*
4575 * If we get here, then bfqq is being activated shortly after the
4576 * last queue. So, if the current burst is also large, we can mark
4577 * bfqq as belonging to this large burst immediately.
4578 */
4579 if (bfqd->large_burst) {
4580 bfq_mark_bfqq_in_large_burst(bfqq);
4581 goto end;
4582 }
4583
4584 /*
4585 * If we get here, then a large-burst state has not yet been
4586 * reached, but bfqq is being activated shortly after the last
4587 * queue. Then we add bfqq to the burst.
4588 */
4589 bfq_add_to_burst(bfqd, bfqq);
4590end:
4591 /*
4592 * At this point, bfqq either has been added to the current
4593 * burst or has caused the current burst to terminate and a
4594 * possible new burst to start. In particular, in the second
4595 * case, bfqq has become the first queue in the possible new
4596 * burst. In both cases last_ins_in_burst needs to be moved
4597 * forward.
4598 */
4599 bfqd->last_ins_in_burst = jiffies;
4600}
4601
4324static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) 4602static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
4325{ 4603{
4326 struct bfq_entity *entity = &bfqq->entity; 4604 struct bfq_entity *entity = &bfqq->entity;
@@ -4534,6 +4812,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
4534 unsigned int old_wr_coeff, 4812 unsigned int old_wr_coeff,
4535 bool wr_or_deserves_wr, 4813 bool wr_or_deserves_wr,
4536 bool interactive, 4814 bool interactive,
4815 bool in_burst,
4537 bool soft_rt) 4816 bool soft_rt)
4538{ 4817{
4539 if (old_wr_coeff == 1 && wr_or_deserves_wr) { 4818 if (old_wr_coeff == 1 && wr_or_deserves_wr) {
@@ -4565,7 +4844,9 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
4565 if (interactive) { /* update wr coeff and duration */ 4844 if (interactive) { /* update wr coeff and duration */
4566 bfqq->wr_coeff = bfqd->bfq_wr_coeff; 4845 bfqq->wr_coeff = bfqd->bfq_wr_coeff;
4567 bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); 4846 bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
4568 } else if (soft_rt) { 4847 } else if (in_burst)
4848 bfqq->wr_coeff = 1;
4849 else if (soft_rt) {
4569 /* 4850 /*
4570 * The application is now or still meeting the 4851 * The application is now or still meeting the
4571 * requirements for being deemed soft rt. We 4852 * requirements for being deemed soft rt. We
@@ -4625,7 +4906,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4625 struct request *rq, 4906 struct request *rq,
4626 bool *interactive) 4907 bool *interactive)
4627{ 4908{
4628 bool soft_rt, wr_or_deserves_wr, bfqq_wants_to_preempt, 4909 bool soft_rt, in_burst, wr_or_deserves_wr,
4910 bfqq_wants_to_preempt,
4629 idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), 4911 idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
4630 /* 4912 /*
4631 * See the comments on 4913 * See the comments on
@@ -4641,12 +4923,15 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4641 /* 4923 /*
4642 * bfqq deserves to be weight-raised if: 4924 * bfqq deserves to be weight-raised if:
4643 * - it is sync, 4925 * - it is sync,
4926 * - it does not belong to a large burst,
4644 * - it has been idle for enough time or is soft real-time, 4927 * - it has been idle for enough time or is soft real-time,
4645 * - is linked to a bfq_io_cq (it is not shared in any sense). 4928 * - is linked to a bfq_io_cq (it is not shared in any sense).
4646 */ 4929 */
4930 in_burst = bfq_bfqq_in_large_burst(bfqq);
4647 soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && 4931 soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
4932 !in_burst &&
4648 time_is_before_jiffies(bfqq->soft_rt_next_start); 4933 time_is_before_jiffies(bfqq->soft_rt_next_start);
4649 *interactive = idle_for_long_time; 4934 *interactive = !in_burst && idle_for_long_time;
4650 wr_or_deserves_wr = bfqd->low_latency && 4935 wr_or_deserves_wr = bfqd->low_latency &&
4651 (bfqq->wr_coeff > 1 || 4936 (bfqq->wr_coeff > 1 ||
4652 (bfq_bfqq_sync(bfqq) && 4937 (bfq_bfqq_sync(bfqq) &&
@@ -4661,6 +4946,31 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4661 arrived_in_time, 4946 arrived_in_time,
4662 wr_or_deserves_wr); 4947 wr_or_deserves_wr);
4663 4948
4949 /*
4950 * If bfqq happened to be activated in a burst, but has been
4951 * idle for much more than an interactive queue, then we
4952 * assume that, in the overall I/O initiated in the burst, the
4953 * I/O associated with bfqq is finished. So bfqq does not need
4954 * to be treated as a queue belonging to a burst
4955 * anymore. Accordingly, we reset bfqq's in_large_burst flag
4956 * if set, and remove bfqq from the burst list if it's
4957 * there. We do not decrement burst_size, because the fact
4958 * that bfqq does not need to belong to the burst list any
4959 * more does not invalidate the fact that bfqq was created in
4960 * a burst.
4961 */
4962 if (likely(!bfq_bfqq_just_created(bfqq)) &&
4963 idle_for_long_time &&
4964 time_is_before_jiffies(
4965 bfqq->budget_timeout +
4966 msecs_to_jiffies(10000))) {
4967 hlist_del_init(&bfqq->burst_list_node);
4968 bfq_clear_bfqq_in_large_burst(bfqq);
4969 }
4970
4971 bfq_clear_bfqq_just_created(bfqq);
4972
4973
4664 if (!bfq_bfqq_IO_bound(bfqq)) { 4974 if (!bfq_bfqq_IO_bound(bfqq)) {
4665 if (arrived_in_time) { 4975 if (arrived_in_time) {
4666 bfqq->requests_within_timer++; 4976 bfqq->requests_within_timer++;
@@ -4683,6 +4993,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
4683 old_wr_coeff, 4993 old_wr_coeff,
4684 wr_or_deserves_wr, 4994 wr_or_deserves_wr,
4685 *interactive, 4995 *interactive,
4996 in_burst,
4686 soft_rt); 4997 soft_rt);
4687 4998
4688 if (old_wr_coeff != bfqq->wr_coeff) 4999 if (old_wr_coeff != bfqq->wr_coeff)
@@ -5310,6 +5621,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
5310 bic->saved_ttime = bfqq->ttime; 5621 bic->saved_ttime = bfqq->ttime;
5311 bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); 5622 bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
5312 bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); 5623 bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
5624 bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
5625 bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
5313 bic->saved_wr_coeff = bfqq->wr_coeff; 5626 bic->saved_wr_coeff = bfqq->wr_coeff;
5314 bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; 5627 bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
5315 bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; 5628 bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
@@ -5345,7 +5658,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
5345 * where bfqq has just been created, but has not yet made it 5658 * where bfqq has just been created, but has not yet made it
5346 * to be weight-raised (which may happen because EQM may merge 5659 * to be weight-raised (which may happen because EQM may merge
5347 * bfqq even before bfq_add_request is executed for the first 5660 * bfqq even before bfq_add_request is executed for the first
5348 * time for bfqq). 5661 * time for bfqq). Handling this case would however be very
5662 * easy, thanks to the flag just_created.
5349 */ 5663 */
5350 if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { 5664 if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
5351 new_bfqq->wr_coeff = bfqq->wr_coeff; 5665 new_bfqq->wr_coeff = bfqq->wr_coeff;
@@ -6430,6 +6744,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
6430{ 6744{
6431 struct bfq_data *bfqd = bfqq->bfqd; 6745 struct bfq_data *bfqd = bfqq->bfqd;
6432 bool idling_boosts_thr, idling_boosts_thr_without_issues, 6746 bool idling_boosts_thr, idling_boosts_thr_without_issues,
6747 idling_needed_for_service_guarantees,
6433 asymmetric_scenario; 6748 asymmetric_scenario;
6434 6749
6435 if (bfqd->strict_guarantees) 6750 if (bfqd->strict_guarantees)
@@ -6610,6 +6925,23 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
6610 !bfq_symmetric_scenario(bfqd); 6925 !bfq_symmetric_scenario(bfqd);
6611 6926
6612 /* 6927 /*
6928 * Finally, there is a case where maximizing throughput is the
6929 * best choice even if it may cause unfairness toward
6930 * bfqq. Such a case is when bfqq became active in a burst of
6931 * queue activations. Queues that became active during a large
6932 * burst benefit only from throughput, as discussed in the
6933 * comments on bfq_handle_burst. Thus, if bfqq became active
6934 * in a burst and not idling the device maximizes throughput,
6935 * then the device must no be idled, because not idling the
6936 * device provides bfqq and all other queues in the burst with
6937 * maximum benefit. Combining this and the above case, we can
6938 * now establish when idling is actually needed to preserve
6939 * service guarantees.
6940 */
6941 idling_needed_for_service_guarantees =
6942 asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
6943
6944 /*
6613 * We have now all the components we need to compute the return 6945 * We have now all the components we need to compute the return
6614 * value of the function, which is true only if both the following 6946 * value of the function, which is true only if both the following
6615 * conditions hold: 6947 * conditions hold:
@@ -6618,7 +6950,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
6618 * is necessary to preserve service guarantees. 6950 * is necessary to preserve service guarantees.
6619 */ 6951 */
6620 return bfq_bfqq_sync(bfqq) && 6952 return bfq_bfqq_sync(bfqq) &&
6621 (idling_boosts_thr_without_issues || asymmetric_scenario); 6953 (idling_boosts_thr_without_issues ||
6954 idling_needed_for_service_guarantees);
6622} 6955}
6623 6956
6624/* 6957/*
@@ -6757,14 +7090,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
6757 bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); 7090 bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
6758 7091
6759 /* 7092 /*
6760 * If too much time has elapsed from the beginning of 7093 * If the queue was activated in a burst, or too much
6761 * this weight-raising period, then end weight raising. 7094 * time has elapsed from the beginning of this
7095 * weight-raising period, then end weight raising.
6762 */ 7096 */
6763 if (time_is_before_jiffies(bfqq->last_wr_start_finish + 7097 if (bfq_bfqq_in_large_burst(bfqq))
6764 bfqq->wr_cur_max_time)) { 7098 bfq_bfqq_end_wr(bfqq);
7099 else if (time_is_before_jiffies(bfqq->last_wr_start_finish +
7100 bfqq->wr_cur_max_time)) {
6765 if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || 7101 if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
6766 time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + 7102 time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
6767 bfq_wr_duration(bfqd))) 7103 bfq_wr_duration(bfqd)))
6768 bfq_bfqq_end_wr(bfqq); 7104 bfq_bfqq_end_wr(bfqq);
6769 else { 7105 else {
6770 /* switch back to interactive wr */ 7106 /* switch back to interactive wr */
@@ -6962,7 +7298,16 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
6962 if (bfqq->ref) 7298 if (bfqq->ref)
6963 return; 7299 return;
6964 7300
6965 bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); 7301 if (bfq_bfqq_sync(bfqq))
7302 /*
7303 * The fact that this queue is being destroyed does not
7304 * invalidate the fact that this queue may have been
7305 * activated during the current burst. As a consequence,
7306 * although the queue does not exist anymore, and hence
7307 * needs to be removed from the burst list if there,
7308 * the burst size has not to be decremented.
7309 */
7310 hlist_del_init(&bfqq->burst_list_node);
6966 7311
6967 kmem_cache_free(bfq_pool, bfqq); 7312 kmem_cache_free(bfq_pool, bfqq);
6968#ifdef CONFIG_BFQ_GROUP_IOSCHED 7313#ifdef CONFIG_BFQ_GROUP_IOSCHED
@@ -7124,6 +7469,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
7124{ 7469{
7125 RB_CLEAR_NODE(&bfqq->entity.rb_node); 7470 RB_CLEAR_NODE(&bfqq->entity.rb_node);
7126 INIT_LIST_HEAD(&bfqq->fifo); 7471 INIT_LIST_HEAD(&bfqq->fifo);
7472 INIT_HLIST_NODE(&bfqq->burst_list_node);
7127 7473
7128 bfqq->ref = 0; 7474 bfqq->ref = 0;
7129 bfqq->bfqd = bfqd; 7475 bfqq->bfqd = bfqd;
@@ -7135,6 +7481,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
7135 if (!bfq_class_idle(bfqq)) 7481 if (!bfq_class_idle(bfqq))
7136 bfq_mark_bfqq_idle_window(bfqq); 7482 bfq_mark_bfqq_idle_window(bfqq);
7137 bfq_mark_bfqq_sync(bfqq); 7483 bfq_mark_bfqq_sync(bfqq);
7484 bfq_mark_bfqq_just_created(bfqq);
7138 } else 7485 } else
7139 bfq_clear_bfqq_sync(bfqq); 7486 bfq_clear_bfqq_sync(bfqq);
7140 7487
@@ -7400,6 +7747,7 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
7400 new_bfqq->allocated++; 7747 new_bfqq->allocated++;
7401 bfqq->allocated--; 7748 bfqq->allocated--;
7402 new_bfqq->ref++; 7749 new_bfqq->ref++;
7750 bfq_clear_bfqq_just_created(bfqq);
7403 /* 7751 /*
7404 * If the bic associated with the process 7752 * If the bic associated with the process
7405 * issuing this request still points to bfqq 7753 * issuing this request still points to bfqq
@@ -7680,8 +8028,18 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
7680 bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); 8028 bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
7681 8029
7682 bic_set_bfqq(bic, bfqq, is_sync); 8030 bic_set_bfqq(bic, bfqq, is_sync);
7683 if (split && is_sync) 8031 if (split && is_sync) {
8032 if ((bic->was_in_burst_list && bfqd->large_burst) ||
8033 bic->saved_in_large_burst)
8034 bfq_mark_bfqq_in_large_burst(bfqq);
8035 else {
8036 bfq_clear_bfqq_in_large_burst(bfqq);
8037 if (bic->was_in_burst_list)
8038 hlist_add_head(&bfqq->burst_list_node,
8039 &bfqd->burst_list);
8040 }
7684 bfqq->split_time = jiffies; 8041 bfqq->split_time = jiffies;
8042 }
7685 8043
7686 return bfqq; 8044 return bfqq;
7687} 8045}
@@ -7714,6 +8072,11 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
7714 /* If the queue was seeky for too long, break it apart. */ 8072 /* If the queue was seeky for too long, break it apart. */
7715 if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { 8073 if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
7716 bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); 8074 bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
8075
8076 /* Update bic before losing reference to bfqq */
8077 if (bfq_bfqq_in_large_burst(bfqq))
8078 bic->saved_in_large_burst = true;
8079
7717 bfqq = bfq_split_bfqq(bic, bfqq); 8080 bfqq = bfq_split_bfqq(bic, bfqq);
7718 /* 8081 /*
7719 * A reference to bic->icq.ioc needs to be 8082 * A reference to bic->icq.ioc needs to be
@@ -7757,6 +8120,9 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
7757 } 8120 }
7758 } 8121 }
7759 8122
8123 if (unlikely(bfq_bfqq_just_created(bfqq)))
8124 bfq_handle_burst(bfqd, bfqq);
8125
7760 bfq_unlock_put_ioc(bfqd); 8126 bfq_unlock_put_ioc(bfqd);
7761 8127
7762 return 0; 8128 return 0;
@@ -7936,6 +8302,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7936 bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; 8302 bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
7937 bfqd->oom_bfqq.entity.new_weight = 8303 bfqd->oom_bfqq.entity.new_weight =
7938 bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); 8304 bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
8305
8306 /* oom_bfqq does not participate to bursts */
8307 bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
8308
7939 /* 8309 /*
7940 * Trigger weight initialization, according to ioprio, at the 8310 * Trigger weight initialization, according to ioprio, at the
7941 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio 8311 * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
@@ -7956,6 +8326,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7956 8326
7957 INIT_LIST_HEAD(&bfqd->active_list); 8327 INIT_LIST_HEAD(&bfqd->active_list);
7958 INIT_LIST_HEAD(&bfqd->idle_list); 8328 INIT_LIST_HEAD(&bfqd->idle_list);
8329 INIT_HLIST_HEAD(&bfqd->burst_list);
7959 8330
7960 bfqd->hw_tag = -1; 8331 bfqd->hw_tag = -1;
7961 8332
@@ -7970,6 +8341,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7970 8341
7971 bfqd->bfq_requests_within_timer = 120; 8342 bfqd->bfq_requests_within_timer = 120;
7972 8343
8344 bfqd->bfq_large_burst_thresh = 8;
8345 bfqd->bfq_burst_interval = msecs_to_jiffies(180);
8346
7973 bfqd->low_latency = true; 8347 bfqd->low_latency = true;
7974 8348
7975 /* 8349 /*