diff options
author | Arianna Avanzini <avanzini.arianna@gmail.com> | 2017-04-12 12:23:20 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2017-04-19 10:30:26 -0400 |
commit | e1b2324dd065880a3200098fe3637ac171c296e6 (patch) | |
tree | df9b51dbb7a94babd3179f721b38f62ea3f66f0e /block/bfq-iosched.c | |
parent | e01eff01d5c81f4dbba186299b16b08aa7316d5b (diff) |
block, bfq: handle bursts of queue activations
Many popular I/O-intensive services or applications spawn or
reactivate many parallel threads/processes during short time
intervals. Examples are systemd during boot or git grep. These
services or applications benefit mostly from a high throughput: the
quicker the I/O generated by their processes is cumulatively served,
the sooner the target job of these services or applications gets
completed. As a consequence, it is almost always counterproductive to
weight-raise any of the queues associated to the processes of these
services or applications: in most cases it would just lower the
throughput, mainly because weight-raising also implies device idling.
To address this issue, an I/O scheduler needs, first, to detect which
queues are associated with these services or applications. In this
respect, we have that, from the I/O-scheduler standpoint, these
services or applications cause bursts of activations, i.e.,
activations of different queues occurring shortly after each
other. However, a shorter burst of activations may be caused also by
the start of an application that does not consist in a lot of parallel
I/O-bound threads (see the comments on the function bfq_handle_burst
for details).
In view of these facts, this commit introduces:
1) an heuristic to detect (only) bursts of queue activations caused by
services or applications consisting in many parallel I/O-bound
threads;
2) the prevention of device idling and weight-raising for the queues
belonging to these bursts.
Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r-- | block/bfq-iosched.c | 404 |
1 files changed, 389 insertions, 15 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 549f030509ef..b7e3c8653414 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -360,6 +360,10 @@ struct bfq_queue { | |||
360 | 360 | ||
361 | /* bit vector: a 1 for each seeky requests in history */ | 361 | /* bit vector: a 1 for each seeky requests in history */ |
362 | u32 seek_history; | 362 | u32 seek_history; |
363 | |||
364 | /* node for the device's burst list */ | ||
365 | struct hlist_node burst_list_node; | ||
366 | |||
363 | /* position of the last request enqueued */ | 367 | /* position of the last request enqueued */ |
364 | sector_t last_request_pos; | 368 | sector_t last_request_pos; |
365 | 369 | ||
@@ -443,6 +447,17 @@ struct bfq_io_cq { | |||
443 | bool saved_IO_bound; | 447 | bool saved_IO_bound; |
444 | 448 | ||
445 | /* | 449 | /* |
450 | * Same purpose as the previous fields for the value of the | ||
451 | * field keeping the queue's belonging to a large burst | ||
452 | */ | ||
453 | bool saved_in_large_burst; | ||
454 | /* | ||
455 | * True if the queue belonged to a burst list before its merge | ||
456 | * with another cooperating queue. | ||
457 | */ | ||
458 | bool was_in_burst_list; | ||
459 | |||
460 | /* | ||
446 | * Similar to previous fields: save wr information. | 461 | * Similar to previous fields: save wr information. |
447 | */ | 462 | */ |
448 | unsigned long saved_wr_coeff; | 463 | unsigned long saved_wr_coeff; |
@@ -609,6 +624,36 @@ struct bfq_data { | |||
609 | */ | 624 | */ |
610 | bool strict_guarantees; | 625 | bool strict_guarantees; |
611 | 626 | ||
627 | /* | ||
628 | * Last time at which a queue entered the current burst of | ||
629 | * queues being activated shortly after each other; for more | ||
630 | * details about this and the following parameters related to | ||
631 | * a burst of activations, see the comments on the function | ||
632 | * bfq_handle_burst. | ||
633 | */ | ||
634 | unsigned long last_ins_in_burst; | ||
635 | /* | ||
636 | * Reference time interval used to decide whether a queue has | ||
637 | * been activated shortly after @last_ins_in_burst. | ||
638 | */ | ||
639 | unsigned long bfq_burst_interval; | ||
640 | /* number of queues in the current burst of queue activations */ | ||
641 | int burst_size; | ||
642 | |||
643 | /* common parent entity for the queues in the burst */ | ||
644 | struct bfq_entity *burst_parent_entity; | ||
645 | /* Maximum burst size above which the current queue-activation | ||
646 | * burst is deemed as 'large'. | ||
647 | */ | ||
648 | unsigned long bfq_large_burst_thresh; | ||
649 | /* true if a large queue-activation burst is in progress */ | ||
650 | bool large_burst; | ||
651 | /* | ||
652 | * Head of the burst list (as for the above fields, more | ||
653 | * details in the comments on the function bfq_handle_burst). | ||
654 | */ | ||
655 | struct hlist_head burst_list; | ||
656 | |||
612 | /* if set to true, low-latency heuristics are enabled */ | 657 | /* if set to true, low-latency heuristics are enabled */ |
613 | bool low_latency; | 658 | bool low_latency; |
614 | /* | 659 | /* |
@@ -671,7 +716,8 @@ struct bfq_data { | |||
671 | }; | 716 | }; |
672 | 717 | ||
673 | enum bfqq_state_flags { | 718 | enum bfqq_state_flags { |
674 | BFQQF_busy = 0, /* has requests or is in service */ | 719 | BFQQF_just_created = 0, /* queue just allocated */ |
720 | BFQQF_busy, /* has requests or is in service */ | ||
675 | BFQQF_wait_request, /* waiting for a request */ | 721 | BFQQF_wait_request, /* waiting for a request */ |
676 | BFQQF_non_blocking_wait_rq, /* | 722 | BFQQF_non_blocking_wait_rq, /* |
677 | * waiting for a request | 723 | * waiting for a request |
@@ -685,6 +731,10 @@ enum bfqq_state_flags { | |||
685 | * having consumed at most 2/10 of | 731 | * having consumed at most 2/10 of |
686 | * its budget | 732 | * its budget |
687 | */ | 733 | */ |
734 | BFQQF_in_large_burst, /* | ||
735 | * bfqq activated in a large burst, | ||
736 | * see comments to bfq_handle_burst. | ||
737 | */ | ||
688 | BFQQF_softrt_update, /* | 738 | BFQQF_softrt_update, /* |
689 | * may need softrt-next-start | 739 | * may need softrt-next-start |
690 | * update | 740 | * update |
@@ -707,6 +757,7 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ | |||
707 | return test_bit(BFQQF_##name, &(bfqq)->flags); \ | 757 | return test_bit(BFQQF_##name, &(bfqq)->flags); \ |
708 | } | 758 | } |
709 | 759 | ||
760 | BFQ_BFQQ_FNS(just_created); | ||
710 | BFQ_BFQQ_FNS(busy); | 761 | BFQ_BFQQ_FNS(busy); |
711 | BFQ_BFQQ_FNS(wait_request); | 762 | BFQ_BFQQ_FNS(wait_request); |
712 | BFQ_BFQQ_FNS(non_blocking_wait_rq); | 763 | BFQ_BFQQ_FNS(non_blocking_wait_rq); |
@@ -714,6 +765,7 @@ BFQ_BFQQ_FNS(fifo_expire); | |||
714 | BFQ_BFQQ_FNS(idle_window); | 765 | BFQ_BFQQ_FNS(idle_window); |
715 | BFQ_BFQQ_FNS(sync); | 766 | BFQ_BFQQ_FNS(sync); |
716 | BFQ_BFQQ_FNS(IO_bound); | 767 | BFQ_BFQQ_FNS(IO_bound); |
768 | BFQ_BFQQ_FNS(in_large_burst); | ||
717 | BFQ_BFQQ_FNS(coop); | 769 | BFQ_BFQQ_FNS(coop); |
718 | BFQ_BFQQ_FNS(split_coop); | 770 | BFQ_BFQQ_FNS(split_coop); |
719 | BFQ_BFQQ_FNS(softrt_update); | 771 | BFQ_BFQQ_FNS(softrt_update); |
@@ -4303,9 +4355,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) | |||
4303 | bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; | 4355 | bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; |
4304 | bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; | 4356 | bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; |
4305 | 4357 | ||
4306 | if (bfqq->wr_coeff > 1 && | 4358 | if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || |
4307 | time_is_before_jiffies(bfqq->last_wr_start_finish + | 4359 | time_is_before_jiffies(bfqq->last_wr_start_finish + |
4308 | bfqq->wr_cur_max_time)) { | 4360 | bfqq->wr_cur_max_time))) { |
4309 | bfq_log_bfqq(bfqq->bfqd, bfqq, | 4361 | bfq_log_bfqq(bfqq->bfqd, bfqq, |
4310 | "resume state: switching off wr"); | 4362 | "resume state: switching off wr"); |
4311 | 4363 | ||
@@ -4321,6 +4373,232 @@ static int bfqq_process_refs(struct bfq_queue *bfqq) | |||
4321 | return bfqq->ref - bfqq->allocated - bfqq->entity.on_st; | 4373 | return bfqq->ref - bfqq->allocated - bfqq->entity.on_st; |
4322 | } | 4374 | } |
4323 | 4375 | ||
4376 | /* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */ | ||
4377 | static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
4378 | { | ||
4379 | struct bfq_queue *item; | ||
4380 | struct hlist_node *n; | ||
4381 | |||
4382 | hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node) | ||
4383 | hlist_del_init(&item->burst_list_node); | ||
4384 | hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); | ||
4385 | bfqd->burst_size = 1; | ||
4386 | bfqd->burst_parent_entity = bfqq->entity.parent; | ||
4387 | } | ||
4388 | |||
4389 | /* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ | ||
4390 | static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
4391 | { | ||
4392 | /* Increment burst size to take into account also bfqq */ | ||
4393 | bfqd->burst_size++; | ||
4394 | |||
4395 | if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { | ||
4396 | struct bfq_queue *pos, *bfqq_item; | ||
4397 | struct hlist_node *n; | ||
4398 | |||
4399 | /* | ||
4400 | * Enough queues have been activated shortly after each | ||
4401 | * other to consider this burst as large. | ||
4402 | */ | ||
4403 | bfqd->large_burst = true; | ||
4404 | |||
4405 | /* | ||
4406 | * We can now mark all queues in the burst list as | ||
4407 | * belonging to a large burst. | ||
4408 | */ | ||
4409 | hlist_for_each_entry(bfqq_item, &bfqd->burst_list, | ||
4410 | burst_list_node) | ||
4411 | bfq_mark_bfqq_in_large_burst(bfqq_item); | ||
4412 | bfq_mark_bfqq_in_large_burst(bfqq); | ||
4413 | |||
4414 | /* | ||
4415 | * From now on, and until the current burst finishes, any | ||
4416 | * new queue being activated shortly after the last queue | ||
4417 | * was inserted in the burst can be immediately marked as | ||
4418 | * belonging to a large burst. So the burst list is not | ||
4419 | * needed any more. Remove it. | ||
4420 | */ | ||
4421 | hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, | ||
4422 | burst_list_node) | ||
4423 | hlist_del_init(&pos->burst_list_node); | ||
4424 | } else /* | ||
4425 | * Burst not yet large: add bfqq to the burst list. Do | ||
4426 | * not increment the ref counter for bfqq, because bfqq | ||
4427 | * is removed from the burst list before freeing bfqq | ||
4428 | * in put_queue. | ||
4429 | */ | ||
4430 | hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); | ||
4431 | } | ||
4432 | |||
4433 | /* | ||
4434 | * If many queues belonging to the same group happen to be created | ||
4435 | * shortly after each other, then the processes associated with these | ||
4436 | * queues have typically a common goal. In particular, bursts of queue | ||
4437 | * creations are usually caused by services or applications that spawn | ||
4438 | * many parallel threads/processes. Examples are systemd during boot, | ||
4439 | * or git grep. To help these processes get their job done as soon as | ||
4440 | * possible, it is usually better to not grant either weight-raising | ||
4441 | * or device idling to their queues. | ||
4442 | * | ||
4443 | * In this comment we describe, firstly, the reasons why this fact | ||
4444 | * holds, and, secondly, the next function, which implements the main | ||
4445 | * steps needed to properly mark these queues so that they can then be | ||
4446 | * treated in a different way. | ||
4447 | * | ||
4448 | * The above services or applications benefit mostly from a high | ||
4449 | * throughput: the quicker the requests of the activated queues are | ||
4450 | * cumulatively served, the sooner the target job of these queues gets | ||
4451 | * completed. As a consequence, weight-raising any of these queues, | ||
4452 | * which also implies idling the device for it, is almost always | ||
4453 | * counterproductive. In most cases it just lowers throughput. | ||
4454 | * | ||
4455 | * On the other hand, a burst of queue creations may be caused also by | ||
4456 | * the start of an application that does not consist of a lot of | ||
4457 | * parallel I/O-bound threads. In fact, with a complex application, | ||
4458 | * several short processes may need to be executed to start-up the | ||
4459 | * application. In this respect, to start an application as quickly as | ||
4460 | * possible, the best thing to do is in any case to privilege the I/O | ||
4461 | * related to the application with respect to all other | ||
4462 | * I/O. Therefore, the best strategy to start as quickly as possible | ||
4463 | * an application that causes a burst of queue creations is to | ||
4464 | * weight-raise all the queues created during the burst. This is the | ||
4465 | * exact opposite of the best strategy for the other type of bursts. | ||
4466 | * | ||
4467 | * In the end, to take the best action for each of the two cases, the | ||
4468 | * two types of bursts need to be distinguished. Fortunately, this | ||
4469 | * seems relatively easy, by looking at the sizes of the bursts. In | ||
4470 | * particular, we found a threshold such that only bursts with a | ||
4471 | * larger size than that threshold are apparently caused by | ||
4472 | * services or commands such as systemd or git grep. For brevity, | ||
4473 | * hereafter we call just 'large' these bursts. BFQ *does not* | ||
4474 | * weight-raise queues whose creation occurs in a large burst. In | ||
4475 | * addition, for each of these queues BFQ performs or does not perform | ||
4476 | * idling depending on which choice boosts the throughput more. The | ||
4477 | * exact choice depends on the device and request pattern at | ||
4478 | * hand. | ||
4479 | * | ||
4480 | * Unfortunately, false positives may occur while an interactive task | ||
4481 | * is starting (e.g., an application is being started). The | ||
4482 | * consequence is that the queues associated with the task do not | ||
4483 | * enjoy weight raising as expected. Fortunately these false positives | ||
4484 | * are very rare. They typically occur if some service happens to | ||
4485 | * start doing I/O exactly when the interactive task starts. | ||
4486 | * | ||
4487 | * Turning back to the next function, it implements all the steps | ||
4488 | * needed to detect the occurrence of a large burst and to properly | ||
4489 | * mark all the queues belonging to it (so that they can then be | ||
4490 | * treated in a different way). This goal is achieved by maintaining a | ||
4491 | * "burst list" that holds, temporarily, the queues that belong to the | ||
4492 | * burst in progress. The list is then used to mark these queues as | ||
4493 | * belonging to a large burst if the burst does become large. The main | ||
4494 | * steps are the following. | ||
4495 | * | ||
4496 | * . when the very first queue is created, the queue is inserted into the | ||
4497 | * list (as it could be the first queue in a possible burst) | ||
4498 | * | ||
4499 | * . if the current burst has not yet become large, and a queue Q that does | ||
4500 | * not yet belong to the burst is activated shortly after the last time | ||
4501 | * at which a new queue entered the burst list, then the function appends | ||
4502 | * Q to the burst list | ||
4503 | * | ||
4504 | * . if, as a consequence of the previous step, the burst size reaches | ||
4505 | * the large-burst threshold, then | ||
4506 | * | ||
4507 | * . all the queues in the burst list are marked as belonging to a | ||
4508 | * large burst | ||
4509 | * | ||
4510 | * . the burst list is deleted; in fact, the burst list already served | ||
4511 | * its purpose (keeping temporarily track of the queues in a burst, | ||
4512 | * so as to be able to mark them as belonging to a large burst in the | ||
4513 | * previous sub-step), and now is not needed any more | ||
4514 | * | ||
4515 | * . the device enters a large-burst mode | ||
4516 | * | ||
4517 | * . if a queue Q that does not belong to the burst is created while | ||
4518 | * the device is in large-burst mode and shortly after the last time | ||
4519 | * at which a queue either entered the burst list or was marked as | ||
4520 | * belonging to the current large burst, then Q is immediately marked | ||
4521 | * as belonging to a large burst. | ||
4522 | * | ||
4523 | * . if a queue Q that does not belong to the burst is created a while | ||
4524 | * later, i.e., not shortly after, than the last time at which a queue | ||
4525 | * either entered the burst list or was marked as belonging to the | ||
4526 | * current large burst, then the current burst is deemed as finished and: | ||
4527 | * | ||
4528 | * . the large-burst mode is reset if set | ||
4529 | * | ||
4530 | * . the burst list is emptied | ||
4531 | * | ||
4532 | * . Q is inserted in the burst list, as Q may be the first queue | ||
4533 | * in a possible new burst (then the burst list contains just Q | ||
4534 | * after this step). | ||
4535 | */ | ||
4536 | static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
4537 | { | ||
4538 | /* | ||
4539 | * If bfqq is already in the burst list or is part of a large | ||
4540 | * burst, or finally has just been split, then there is | ||
4541 | * nothing else to do. | ||
4542 | */ | ||
4543 | if (!hlist_unhashed(&bfqq->burst_list_node) || | ||
4544 | bfq_bfqq_in_large_burst(bfqq) || | ||
4545 | time_is_after_eq_jiffies(bfqq->split_time + | ||
4546 | msecs_to_jiffies(10))) | ||
4547 | return; | ||
4548 | |||
4549 | /* | ||
4550 | * If bfqq's creation happens late enough, or bfqq belongs to | ||
4551 | * a different group than the burst group, then the current | ||
4552 | * burst is finished, and related data structures must be | ||
4553 | * reset. | ||
4554 | * | ||
4555 | * In this respect, consider the special case where bfqq is | ||
4556 | * the very first queue created after BFQ is selected for this | ||
4557 | * device. In this case, last_ins_in_burst and | ||
4558 | * burst_parent_entity are not yet significant when we get | ||
4559 | * here. But it is easy to verify that, whether or not the | ||
4560 | * following condition is true, bfqq will end up being | ||
4561 | * inserted into the burst list. In particular the list will | ||
4562 | * happen to contain only bfqq. And this is exactly what has | ||
4563 | * to happen, as bfqq may be the first queue of the first | ||
4564 | * burst. | ||
4565 | */ | ||
4566 | if (time_is_before_jiffies(bfqd->last_ins_in_burst + | ||
4567 | bfqd->bfq_burst_interval) || | ||
4568 | bfqq->entity.parent != bfqd->burst_parent_entity) { | ||
4569 | bfqd->large_burst = false; | ||
4570 | bfq_reset_burst_list(bfqd, bfqq); | ||
4571 | goto end; | ||
4572 | } | ||
4573 | |||
4574 | /* | ||
4575 | * If we get here, then bfqq is being activated shortly after the | ||
4576 | * last queue. So, if the current burst is also large, we can mark | ||
4577 | * bfqq as belonging to this large burst immediately. | ||
4578 | */ | ||
4579 | if (bfqd->large_burst) { | ||
4580 | bfq_mark_bfqq_in_large_burst(bfqq); | ||
4581 | goto end; | ||
4582 | } | ||
4583 | |||
4584 | /* | ||
4585 | * If we get here, then a large-burst state has not yet been | ||
4586 | * reached, but bfqq is being activated shortly after the last | ||
4587 | * queue. Then we add bfqq to the burst. | ||
4588 | */ | ||
4589 | bfq_add_to_burst(bfqd, bfqq); | ||
4590 | end: | ||
4591 | /* | ||
4592 | * At this point, bfqq either has been added to the current | ||
4593 | * burst or has caused the current burst to terminate and a | ||
4594 | * possible new burst to start. In particular, in the second | ||
4595 | * case, bfqq has become the first queue in the possible new | ||
4596 | * burst. In both cases last_ins_in_burst needs to be moved | ||
4597 | * forward. | ||
4598 | */ | ||
4599 | bfqd->last_ins_in_burst = jiffies; | ||
4600 | } | ||
4601 | |||
4324 | static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) | 4602 | static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) |
4325 | { | 4603 | { |
4326 | struct bfq_entity *entity = &bfqq->entity; | 4604 | struct bfq_entity *entity = &bfqq->entity; |
@@ -4534,6 +4812,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, | |||
4534 | unsigned int old_wr_coeff, | 4812 | unsigned int old_wr_coeff, |
4535 | bool wr_or_deserves_wr, | 4813 | bool wr_or_deserves_wr, |
4536 | bool interactive, | 4814 | bool interactive, |
4815 | bool in_burst, | ||
4537 | bool soft_rt) | 4816 | bool soft_rt) |
4538 | { | 4817 | { |
4539 | if (old_wr_coeff == 1 && wr_or_deserves_wr) { | 4818 | if (old_wr_coeff == 1 && wr_or_deserves_wr) { |
@@ -4565,7 +4844,9 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, | |||
4565 | if (interactive) { /* update wr coeff and duration */ | 4844 | if (interactive) { /* update wr coeff and duration */ |
4566 | bfqq->wr_coeff = bfqd->bfq_wr_coeff; | 4845 | bfqq->wr_coeff = bfqd->bfq_wr_coeff; |
4567 | bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); | 4846 | bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); |
4568 | } else if (soft_rt) { | 4847 | } else if (in_burst) |
4848 | bfqq->wr_coeff = 1; | ||
4849 | else if (soft_rt) { | ||
4569 | /* | 4850 | /* |
4570 | * The application is now or still meeting the | 4851 | * The application is now or still meeting the |
4571 | * requirements for being deemed soft rt. We | 4852 | * requirements for being deemed soft rt. We |
@@ -4625,7 +4906,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4625 | struct request *rq, | 4906 | struct request *rq, |
4626 | bool *interactive) | 4907 | bool *interactive) |
4627 | { | 4908 | { |
4628 | bool soft_rt, wr_or_deserves_wr, bfqq_wants_to_preempt, | 4909 | bool soft_rt, in_burst, wr_or_deserves_wr, |
4910 | bfqq_wants_to_preempt, | ||
4629 | idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), | 4911 | idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), |
4630 | /* | 4912 | /* |
4631 | * See the comments on | 4913 | * See the comments on |
@@ -4641,12 +4923,15 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4641 | /* | 4923 | /* |
4642 | * bfqq deserves to be weight-raised if: | 4924 | * bfqq deserves to be weight-raised if: |
4643 | * - it is sync, | 4925 | * - it is sync, |
4926 | * - it does not belong to a large burst, | ||
4644 | * - it has been idle for enough time or is soft real-time, | 4927 | * - it has been idle for enough time or is soft real-time, |
4645 | * - is linked to a bfq_io_cq (it is not shared in any sense). | 4928 | * - is linked to a bfq_io_cq (it is not shared in any sense). |
4646 | */ | 4929 | */ |
4930 | in_burst = bfq_bfqq_in_large_burst(bfqq); | ||
4647 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && | 4931 | soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && |
4932 | !in_burst && | ||
4648 | time_is_before_jiffies(bfqq->soft_rt_next_start); | 4933 | time_is_before_jiffies(bfqq->soft_rt_next_start); |
4649 | *interactive = idle_for_long_time; | 4934 | *interactive = !in_burst && idle_for_long_time; |
4650 | wr_or_deserves_wr = bfqd->low_latency && | 4935 | wr_or_deserves_wr = bfqd->low_latency && |
4651 | (bfqq->wr_coeff > 1 || | 4936 | (bfqq->wr_coeff > 1 || |
4652 | (bfq_bfqq_sync(bfqq) && | 4937 | (bfq_bfqq_sync(bfqq) && |
@@ -4661,6 +4946,31 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4661 | arrived_in_time, | 4946 | arrived_in_time, |
4662 | wr_or_deserves_wr); | 4947 | wr_or_deserves_wr); |
4663 | 4948 | ||
4949 | /* | ||
4950 | * If bfqq happened to be activated in a burst, but has been | ||
4951 | * idle for much more than an interactive queue, then we | ||
4952 | * assume that, in the overall I/O initiated in the burst, the | ||
4953 | * I/O associated with bfqq is finished. So bfqq does not need | ||
4954 | * to be treated as a queue belonging to a burst | ||
4955 | * anymore. Accordingly, we reset bfqq's in_large_burst flag | ||
4956 | * if set, and remove bfqq from the burst list if it's | ||
4957 | * there. We do not decrement burst_size, because the fact | ||
4958 | * that bfqq does not need to belong to the burst list any | ||
4959 | * more does not invalidate the fact that bfqq was created in | ||
4960 | * a burst. | ||
4961 | */ | ||
4962 | if (likely(!bfq_bfqq_just_created(bfqq)) && | ||
4963 | idle_for_long_time && | ||
4964 | time_is_before_jiffies( | ||
4965 | bfqq->budget_timeout + | ||
4966 | msecs_to_jiffies(10000))) { | ||
4967 | hlist_del_init(&bfqq->burst_list_node); | ||
4968 | bfq_clear_bfqq_in_large_burst(bfqq); | ||
4969 | } | ||
4970 | |||
4971 | bfq_clear_bfqq_just_created(bfqq); | ||
4972 | |||
4973 | |||
4664 | if (!bfq_bfqq_IO_bound(bfqq)) { | 4974 | if (!bfq_bfqq_IO_bound(bfqq)) { |
4665 | if (arrived_in_time) { | 4975 | if (arrived_in_time) { |
4666 | bfqq->requests_within_timer++; | 4976 | bfqq->requests_within_timer++; |
@@ -4683,6 +4993,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
4683 | old_wr_coeff, | 4993 | old_wr_coeff, |
4684 | wr_or_deserves_wr, | 4994 | wr_or_deserves_wr, |
4685 | *interactive, | 4995 | *interactive, |
4996 | in_burst, | ||
4686 | soft_rt); | 4997 | soft_rt); |
4687 | 4998 | ||
4688 | if (old_wr_coeff != bfqq->wr_coeff) | 4999 | if (old_wr_coeff != bfqq->wr_coeff) |
@@ -5310,6 +5621,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) | |||
5310 | bic->saved_ttime = bfqq->ttime; | 5621 | bic->saved_ttime = bfqq->ttime; |
5311 | bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); | 5622 | bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); |
5312 | bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); | 5623 | bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); |
5624 | bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); | ||
5625 | bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); | ||
5313 | bic->saved_wr_coeff = bfqq->wr_coeff; | 5626 | bic->saved_wr_coeff = bfqq->wr_coeff; |
5314 | bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; | 5627 | bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; |
5315 | bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; | 5628 | bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; |
@@ -5345,7 +5658,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, | |||
5345 | * where bfqq has just been created, but has not yet made it | 5658 | * where bfqq has just been created, but has not yet made it |
5346 | * to be weight-raised (which may happen because EQM may merge | 5659 | * to be weight-raised (which may happen because EQM may merge |
5347 | * bfqq even before bfq_add_request is executed for the first | 5660 | * bfqq even before bfq_add_request is executed for the first |
5348 | * time for bfqq). | 5661 | * time for bfqq). Handling this case would however be very |
5662 | * easy, thanks to the flag just_created. | ||
5349 | */ | 5663 | */ |
5350 | if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { | 5664 | if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { |
5351 | new_bfqq->wr_coeff = bfqq->wr_coeff; | 5665 | new_bfqq->wr_coeff = bfqq->wr_coeff; |
@@ -6430,6 +6744,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) | |||
6430 | { | 6744 | { |
6431 | struct bfq_data *bfqd = bfqq->bfqd; | 6745 | struct bfq_data *bfqd = bfqq->bfqd; |
6432 | bool idling_boosts_thr, idling_boosts_thr_without_issues, | 6746 | bool idling_boosts_thr, idling_boosts_thr_without_issues, |
6747 | idling_needed_for_service_guarantees, | ||
6433 | asymmetric_scenario; | 6748 | asymmetric_scenario; |
6434 | 6749 | ||
6435 | if (bfqd->strict_guarantees) | 6750 | if (bfqd->strict_guarantees) |
@@ -6610,6 +6925,23 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) | |||
6610 | !bfq_symmetric_scenario(bfqd); | 6925 | !bfq_symmetric_scenario(bfqd); |
6611 | 6926 | ||
6612 | /* | 6927 | /* |
6928 | * Finally, there is a case where maximizing throughput is the | ||
6929 | * best choice even if it may cause unfairness toward | ||
6930 | * bfqq. Such a case is when bfqq became active in a burst of | ||
6931 | * queue activations. Queues that became active during a large | ||
6932 | * burst benefit only from throughput, as discussed in the | ||
6933 | * comments on bfq_handle_burst. Thus, if bfqq became active | ||
6934 | * in a burst and not idling the device maximizes throughput, | ||
6935 | * then the device must no be idled, because not idling the | ||
6936 | * device provides bfqq and all other queues in the burst with | ||
6937 | * maximum benefit. Combining this and the above case, we can | ||
6938 | * now establish when idling is actually needed to preserve | ||
6939 | * service guarantees. | ||
6940 | */ | ||
6941 | idling_needed_for_service_guarantees = | ||
6942 | asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq); | ||
6943 | |||
6944 | /* | ||
6613 | * We have now all the components we need to compute the return | 6945 | * We have now all the components we need to compute the return |
6614 | * value of the function, which is true only if both the following | 6946 | * value of the function, which is true only if both the following |
6615 | * conditions hold: | 6947 | * conditions hold: |
@@ -6618,7 +6950,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) | |||
6618 | * is necessary to preserve service guarantees. | 6950 | * is necessary to preserve service guarantees. |
6619 | */ | 6951 | */ |
6620 | return bfq_bfqq_sync(bfqq) && | 6952 | return bfq_bfqq_sync(bfqq) && |
6621 | (idling_boosts_thr_without_issues || asymmetric_scenario); | 6953 | (idling_boosts_thr_without_issues || |
6954 | idling_needed_for_service_guarantees); | ||
6622 | } | 6955 | } |
6623 | 6956 | ||
6624 | /* | 6957 | /* |
@@ -6757,14 +7090,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) | |||
6757 | bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); | 7090 | bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); |
6758 | 7091 | ||
6759 | /* | 7092 | /* |
6760 | * If too much time has elapsed from the beginning of | 7093 | * If the queue was activated in a burst, or too much |
6761 | * this weight-raising period, then end weight raising. | 7094 | * time has elapsed from the beginning of this |
7095 | * weight-raising period, then end weight raising. | ||
6762 | */ | 7096 | */ |
6763 | if (time_is_before_jiffies(bfqq->last_wr_start_finish + | 7097 | if (bfq_bfqq_in_large_burst(bfqq)) |
6764 | bfqq->wr_cur_max_time)) { | 7098 | bfq_bfqq_end_wr(bfqq); |
7099 | else if (time_is_before_jiffies(bfqq->last_wr_start_finish + | ||
7100 | bfqq->wr_cur_max_time)) { | ||
6765 | if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || | 7101 | if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || |
6766 | time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + | 7102 | time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + |
6767 | bfq_wr_duration(bfqd))) | 7103 | bfq_wr_duration(bfqd))) |
6768 | bfq_bfqq_end_wr(bfqq); | 7104 | bfq_bfqq_end_wr(bfqq); |
6769 | else { | 7105 | else { |
6770 | /* switch back to interactive wr */ | 7106 | /* switch back to interactive wr */ |
@@ -6962,7 +7298,16 @@ static void bfq_put_queue(struct bfq_queue *bfqq) | |||
6962 | if (bfqq->ref) | 7298 | if (bfqq->ref) |
6963 | return; | 7299 | return; |
6964 | 7300 | ||
6965 | bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); | 7301 | if (bfq_bfqq_sync(bfqq)) |
7302 | /* | ||
7303 | * The fact that this queue is being destroyed does not | ||
7304 | * invalidate the fact that this queue may have been | ||
7305 | * activated during the current burst. As a consequence, | ||
7306 | * although the queue does not exist anymore, and hence | ||
7307 | * needs to be removed from the burst list if there, | ||
7308 | * the burst size has not to be decremented. | ||
7309 | */ | ||
7310 | hlist_del_init(&bfqq->burst_list_node); | ||
6966 | 7311 | ||
6967 | kmem_cache_free(bfq_pool, bfqq); | 7312 | kmem_cache_free(bfq_pool, bfqq); |
6968 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 7313 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
@@ -7124,6 +7469,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
7124 | { | 7469 | { |
7125 | RB_CLEAR_NODE(&bfqq->entity.rb_node); | 7470 | RB_CLEAR_NODE(&bfqq->entity.rb_node); |
7126 | INIT_LIST_HEAD(&bfqq->fifo); | 7471 | INIT_LIST_HEAD(&bfqq->fifo); |
7472 | INIT_HLIST_NODE(&bfqq->burst_list_node); | ||
7127 | 7473 | ||
7128 | bfqq->ref = 0; | 7474 | bfqq->ref = 0; |
7129 | bfqq->bfqd = bfqd; | 7475 | bfqq->bfqd = bfqd; |
@@ -7135,6 +7481,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
7135 | if (!bfq_class_idle(bfqq)) | 7481 | if (!bfq_class_idle(bfqq)) |
7136 | bfq_mark_bfqq_idle_window(bfqq); | 7482 | bfq_mark_bfqq_idle_window(bfqq); |
7137 | bfq_mark_bfqq_sync(bfqq); | 7483 | bfq_mark_bfqq_sync(bfqq); |
7484 | bfq_mark_bfqq_just_created(bfqq); | ||
7138 | } else | 7485 | } else |
7139 | bfq_clear_bfqq_sync(bfqq); | 7486 | bfq_clear_bfqq_sync(bfqq); |
7140 | 7487 | ||
@@ -7400,6 +7747,7 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) | |||
7400 | new_bfqq->allocated++; | 7747 | new_bfqq->allocated++; |
7401 | bfqq->allocated--; | 7748 | bfqq->allocated--; |
7402 | new_bfqq->ref++; | 7749 | new_bfqq->ref++; |
7750 | bfq_clear_bfqq_just_created(bfqq); | ||
7403 | /* | 7751 | /* |
7404 | * If the bic associated with the process | 7752 | * If the bic associated with the process |
7405 | * issuing this request still points to bfqq | 7753 | * issuing this request still points to bfqq |
@@ -7680,8 +8028,18 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, | |||
7680 | bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); | 8028 | bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); |
7681 | 8029 | ||
7682 | bic_set_bfqq(bic, bfqq, is_sync); | 8030 | bic_set_bfqq(bic, bfqq, is_sync); |
7683 | if (split && is_sync) | 8031 | if (split && is_sync) { |
8032 | if ((bic->was_in_burst_list && bfqd->large_burst) || | ||
8033 | bic->saved_in_large_burst) | ||
8034 | bfq_mark_bfqq_in_large_burst(bfqq); | ||
8035 | else { | ||
8036 | bfq_clear_bfqq_in_large_burst(bfqq); | ||
8037 | if (bic->was_in_burst_list) | ||
8038 | hlist_add_head(&bfqq->burst_list_node, | ||
8039 | &bfqd->burst_list); | ||
8040 | } | ||
7684 | bfqq->split_time = jiffies; | 8041 | bfqq->split_time = jiffies; |
8042 | } | ||
7685 | 8043 | ||
7686 | return bfqq; | 8044 | return bfqq; |
7687 | } | 8045 | } |
@@ -7714,6 +8072,11 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | |||
7714 | /* If the queue was seeky for too long, break it apart. */ | 8072 | /* If the queue was seeky for too long, break it apart. */ |
7715 | if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { | 8073 | if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { |
7716 | bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); | 8074 | bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); |
8075 | |||
8076 | /* Update bic before losing reference to bfqq */ | ||
8077 | if (bfq_bfqq_in_large_burst(bfqq)) | ||
8078 | bic->saved_in_large_burst = true; | ||
8079 | |||
7717 | bfqq = bfq_split_bfqq(bic, bfqq); | 8080 | bfqq = bfq_split_bfqq(bic, bfqq); |
7718 | /* | 8081 | /* |
7719 | * A reference to bic->icq.ioc needs to be | 8082 | * A reference to bic->icq.ioc needs to be |
@@ -7757,6 +8120,9 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, | |||
7757 | } | 8120 | } |
7758 | } | 8121 | } |
7759 | 8122 | ||
8123 | if (unlikely(bfq_bfqq_just_created(bfqq))) | ||
8124 | bfq_handle_burst(bfqd, bfqq); | ||
8125 | |||
7760 | bfq_unlock_put_ioc(bfqd); | 8126 | bfq_unlock_put_ioc(bfqd); |
7761 | 8127 | ||
7762 | return 0; | 8128 | return 0; |
@@ -7936,6 +8302,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
7936 | bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; | 8302 | bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; |
7937 | bfqd->oom_bfqq.entity.new_weight = | 8303 | bfqd->oom_bfqq.entity.new_weight = |
7938 | bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); | 8304 | bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); |
8305 | |||
8306 | /* oom_bfqq does not participate to bursts */ | ||
8307 | bfq_clear_bfqq_just_created(&bfqd->oom_bfqq); | ||
8308 | |||
7939 | /* | 8309 | /* |
7940 | * Trigger weight initialization, according to ioprio, at the | 8310 | * Trigger weight initialization, according to ioprio, at the |
7941 | * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio | 8311 | * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio |
@@ -7956,6 +8326,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
7956 | 8326 | ||
7957 | INIT_LIST_HEAD(&bfqd->active_list); | 8327 | INIT_LIST_HEAD(&bfqd->active_list); |
7958 | INIT_LIST_HEAD(&bfqd->idle_list); | 8328 | INIT_LIST_HEAD(&bfqd->idle_list); |
8329 | INIT_HLIST_HEAD(&bfqd->burst_list); | ||
7959 | 8330 | ||
7960 | bfqd->hw_tag = -1; | 8331 | bfqd->hw_tag = -1; |
7961 | 8332 | ||
@@ -7970,6 +8341,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
7970 | 8341 | ||
7971 | bfqd->bfq_requests_within_timer = 120; | 8342 | bfqd->bfq_requests_within_timer = 120; |
7972 | 8343 | ||
8344 | bfqd->bfq_large_burst_thresh = 8; | ||
8345 | bfqd->bfq_burst_interval = msecs_to_jiffies(180); | ||
8346 | |||
7973 | bfqd->low_latency = true; | 8347 | bfqd->low_latency = true; |
7974 | 8348 | ||
7975 | /* | 8349 | /* |