diff options
-rw-r--r-- | Documentation/block/bfq-iosched.txt | 9 | ||||
-rw-r--r-- | block/bfq-iosched.c | 740 |
2 files changed, 675 insertions, 74 deletions
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 461b27fce979..1b87df6cd476 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt | |||
@@ -375,6 +375,11 @@ default, low latency mode is enabled. If enabled, interactive and soft | |||
375 | real-time applications are privileged and experience a lower latency, | 375 | real-time applications are privileged and experience a lower latency, |
376 | as explained in more detail in the description of how BFQ works. | 376 | as explained in more detail in the description of how BFQ works. |
377 | 377 | ||
378 | DO NOT enable this mode if you need full control on bandwidth | ||
379 | distribution. In fact, if it is enabled, then BFQ automatically | ||
380 | increases the bandwidth share of privileged applications, as the main | ||
381 | means to guarantee a lower latency to them. | ||
382 | |||
378 | timeout_sync | 383 | timeout_sync |
379 | ------------ | 384 | ------------ |
380 | 385 | ||
@@ -507,6 +512,10 @@ linear mapping between ioprio and weights, described at the beginning | |||
507 | of the tunable section, is still valid, but all weights higher than | 512 | of the tunable section, is still valid, but all weights higher than |
508 | IOPRIO_BE_NR*10 are mapped to ioprio 0. | 513 | IOPRIO_BE_NR*10 are mapped to ioprio 0. |
509 | 514 | ||
515 | Recall that, if low-latency is set, then BFQ automatically raises the | ||
516 | weight of the queues associated with interactive and soft real-time | ||
517 | applications. Unset this tunable if you need/want to control weights. | ||
518 | |||
510 | 519 | ||
511 | [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O | 520 | [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O |
512 | Scheduler", Proceedings of the First Workshop on Mobile System | 521 | Scheduler", Proceedings of the First Workshop on Mobile System |
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index dce273b91015..1a32c8341ab0 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -339,6 +339,17 @@ struct bfq_queue { | |||
339 | 339 | ||
340 | /* pid of the process owning the queue, used for logging purposes */ | 340 | /* pid of the process owning the queue, used for logging purposes */ |
341 | pid_t pid; | 341 | pid_t pid; |
342 | |||
343 | /* current maximum weight-raising time for this queue */ | ||
344 | unsigned long wr_cur_max_time; | ||
345 | /* | ||
346 | * Start time of the current weight-raising period if | ||
347 | * the @bfq-queue is being weight-raised, otherwise | ||
348 | * finish time of the last weight-raising period. | ||
349 | */ | ||
350 | unsigned long last_wr_start_finish; | ||
351 | /* factor by which the weight of this queue is multiplied */ | ||
352 | unsigned int wr_coeff; | ||
342 | }; | 353 | }; |
343 | 354 | ||
344 | /** | 355 | /** |
@@ -356,6 +367,11 @@ struct bfq_io_cq { | |||
356 | #endif | 367 | #endif |
357 | }; | 368 | }; |
358 | 369 | ||
370 | enum bfq_device_speed { | ||
371 | BFQ_BFQD_FAST, | ||
372 | BFQ_BFQD_SLOW, | ||
373 | }; | ||
374 | |||
359 | /** | 375 | /** |
360 | * struct bfq_data - per-device data structure. | 376 | * struct bfq_data - per-device data structure. |
361 | * | 377 | * |
@@ -487,6 +503,34 @@ struct bfq_data { | |||
487 | */ | 503 | */ |
488 | bool strict_guarantees; | 504 | bool strict_guarantees; |
489 | 505 | ||
506 | /* if set to true, low-latency heuristics are enabled */ | ||
507 | bool low_latency; | ||
508 | /* | ||
509 | * Maximum factor by which the weight of a weight-raised queue | ||
510 | * is multiplied. | ||
511 | */ | ||
512 | unsigned int bfq_wr_coeff; | ||
513 | /* maximum duration of a weight-raising period (jiffies) */ | ||
514 | unsigned int bfq_wr_max_time; | ||
515 | /* | ||
516 | * Minimum idle period after which weight-raising may be | ||
517 | * reactivated for a queue (in jiffies). | ||
518 | */ | ||
519 | unsigned int bfq_wr_min_idle_time; | ||
520 | /* | ||
521 | * Minimum period between request arrivals after which | ||
522 | * weight-raising may be reactivated for an already busy async | ||
523 | * queue (in jiffies). | ||
524 | */ | ||
525 | unsigned long bfq_wr_min_inter_arr_async; | ||
526 | /* | ||
527 | * Cached value of the product R*T, used for computing the | ||
528 | * maximum duration of weight raising automatically. | ||
529 | */ | ||
530 | u64 RT_prod; | ||
531 | /* device-speed class for the low-latency heuristic */ | ||
532 | enum bfq_device_speed device_speed; | ||
533 | |||
490 | /* fallback dummy bfqq for extreme OOM conditions */ | 534 | /* fallback dummy bfqq for extreme OOM conditions */ |
491 | struct bfq_queue oom_bfqq; | 535 | struct bfq_queue oom_bfqq; |
492 | 536 | ||
@@ -515,7 +559,6 @@ enum bfqq_state_flags { | |||
515 | BFQQF_fifo_expire, /* FIFO checked in this slice */ | 559 | BFQQF_fifo_expire, /* FIFO checked in this slice */ |
516 | BFQQF_idle_window, /* slice idling enabled */ | 560 | BFQQF_idle_window, /* slice idling enabled */ |
517 | BFQQF_sync, /* synchronous queue */ | 561 | BFQQF_sync, /* synchronous queue */ |
518 | BFQQF_budget_new, /* no completion with this budget */ | ||
519 | BFQQF_IO_bound, /* | 562 | BFQQF_IO_bound, /* |
520 | * bfqq has timed-out at least once | 563 | * bfqq has timed-out at least once |
521 | * having consumed at most 2/10 of | 564 | * having consumed at most 2/10 of |
@@ -543,7 +586,6 @@ BFQ_BFQQ_FNS(non_blocking_wait_rq); | |||
543 | BFQ_BFQQ_FNS(fifo_expire); | 586 | BFQ_BFQQ_FNS(fifo_expire); |
544 | BFQ_BFQQ_FNS(idle_window); | 587 | BFQ_BFQQ_FNS(idle_window); |
545 | BFQ_BFQQ_FNS(sync); | 588 | BFQ_BFQQ_FNS(sync); |
546 | BFQ_BFQQ_FNS(budget_new); | ||
547 | BFQ_BFQQ_FNS(IO_bound); | 589 | BFQ_BFQQ_FNS(IO_bound); |
548 | #undef BFQ_BFQQ_FNS | 590 | #undef BFQ_BFQQ_FNS |
549 | 591 | ||
@@ -637,7 +679,7 @@ struct bfq_group_data { | |||
637 | /* must be the first member */ | 679 | /* must be the first member */ |
638 | struct blkcg_policy_data pd; | 680 | struct blkcg_policy_data pd; |
639 | 681 | ||
640 | unsigned short weight; | 682 | unsigned int weight; |
641 | }; | 683 | }; |
642 | 684 | ||
643 | /** | 685 | /** |
@@ -732,6 +774,8 @@ static void bfq_put_queue(struct bfq_queue *bfqq); | |||
732 | static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, | 774 | static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, |
733 | struct bio *bio, bool is_sync, | 775 | struct bio *bio, bool is_sync, |
734 | struct bfq_io_cq *bic); | 776 | struct bfq_io_cq *bic); |
777 | static void bfq_end_wr_async_queues(struct bfq_data *bfqd, | ||
778 | struct bfq_group *bfqg); | ||
735 | static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); | 779 | static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); |
736 | static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); | 780 | static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); |
737 | 781 | ||
@@ -787,6 +831,56 @@ static struct kmem_cache *bfq_pool; | |||
787 | /* Shift used for peak rate fixed precision calculations. */ | 831 | /* Shift used for peak rate fixed precision calculations. */ |
788 | #define BFQ_RATE_SHIFT 16 | 832 | #define BFQ_RATE_SHIFT 16 |
789 | 833 | ||
834 | /* | ||
835 | * By default, BFQ computes the duration of the weight raising for | ||
836 | * interactive applications automatically, using the following formula: | ||
837 | * duration = (R / r) * T, where r is the peak rate of the device, and | ||
838 | * R and T are two reference parameters. | ||
839 | * In particular, R is the peak rate of the reference device (see below), | ||
840 | * and T is a reference time: given the systems that are likely to be | ||
841 | * installed on the reference device according to its speed class, T is | ||
842 | * about the maximum time needed, under BFQ and while reading two files in | ||
843 | * parallel, to load typical large applications on these systems. | ||
844 | * In practice, the slower/faster the device at hand is, the more/less it | ||
845 | * takes to load applications with respect to the reference device. | ||
846 | * Accordingly, the longer/shorter BFQ grants weight raising to interactive | ||
847 | * applications. | ||
848 | * | ||
849 | * BFQ uses four different reference pairs (R, T), depending on: | ||
850 | * . whether the device is rotational or non-rotational; | ||
851 | * . whether the device is slow, such as old or portable HDDs, as well as | ||
852 | * SD cards, or fast, such as newer HDDs and SSDs. | ||
853 | * | ||
854 | * The device's speed class is dynamically (re)detected in | ||
855 | * bfq_update_peak_rate() every time the estimated peak rate is updated. | ||
856 | * | ||
857 | * In the following definitions, R_slow[0]/R_fast[0] and | ||
858 | * T_slow[0]/T_fast[0] are the reference values for a slow/fast | ||
859 | * rotational device, whereas R_slow[1]/R_fast[1] and | ||
860 | * T_slow[1]/T_fast[1] are the reference values for a slow/fast | ||
861 | * non-rotational device. Finally, device_speed_thresh are the | ||
862 | * thresholds used to switch between speed classes. The reference | ||
863 | * rates are not the actual peak rates of the devices used as a | ||
864 | * reference, but slightly lower values. The reason for using these | ||
865 | * slightly lower values is that the peak-rate estimator tends to | ||
866 | * yield slightly lower values than the actual peak rate (it can yield | ||
867 | * the actual peak rate only if there is only one process doing I/O, | ||
868 | * and the process does sequential I/O). | ||
869 | * | ||
870 | * Both the reference peak rates and the thresholds are measured in | ||
871 | * sectors/usec, left-shifted by BFQ_RATE_SHIFT. | ||
872 | */ | ||
873 | static int R_slow[2] = {1000, 10700}; | ||
874 | static int R_fast[2] = {14000, 33000}; | ||
875 | /* | ||
876 | * To improve readability, a conversion function is used to initialize the | ||
877 | * following arrays, which entails that they can be initialized only in a | ||
878 | * function. | ||
879 | */ | ||
880 | static int T_slow[2]; | ||
881 | static int T_fast[2]; | ||
882 | static int device_speed_thresh[2]; | ||
883 | |||
790 | #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ | 884 | #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ |
791 | { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) | 885 | { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) |
792 | 886 | ||
@@ -1486,7 +1580,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, | |||
1486 | 1580 | ||
1487 | if (entity->prio_changed) { | 1581 | if (entity->prio_changed) { |
1488 | struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); | 1582 | struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
1489 | unsigned short prev_weight, new_weight; | 1583 | unsigned int prev_weight, new_weight; |
1490 | struct bfq_data *bfqd = NULL; | 1584 | struct bfq_data *bfqd = NULL; |
1491 | #ifdef CONFIG_BFQ_GROUP_IOSCHED | 1585 | #ifdef CONFIG_BFQ_GROUP_IOSCHED |
1492 | struct bfq_sched_data *sd; | 1586 | struct bfq_sched_data *sd; |
@@ -1535,7 +1629,8 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, | |||
1535 | new_st = bfq_entity_service_tree(entity); | 1629 | new_st = bfq_entity_service_tree(entity); |
1536 | 1630 | ||
1537 | prev_weight = entity->weight; | 1631 | prev_weight = entity->weight; |
1538 | new_weight = entity->orig_weight; | 1632 | new_weight = entity->orig_weight * |
1633 | (bfqq ? bfqq->wr_coeff : 1); | ||
1539 | entity->weight = new_weight; | 1634 | entity->weight = new_weight; |
1540 | 1635 | ||
1541 | new_st->wsum += entity->weight; | 1636 | new_st->wsum += entity->weight; |
@@ -1630,6 +1725,8 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, | |||
1630 | struct bfq_service_tree *st, | 1725 | struct bfq_service_tree *st, |
1631 | bool backshifted) | 1726 | bool backshifted) |
1632 | { | 1727 | { |
1728 | struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); | ||
1729 | |||
1633 | st = __bfq_entity_update_weight_prio(st, entity); | 1730 | st = __bfq_entity_update_weight_prio(st, entity); |
1634 | bfq_calc_finish(entity, entity->budget); | 1731 | bfq_calc_finish(entity, entity->budget); |
1635 | 1732 | ||
@@ -1659,10 +1756,19 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, | |||
1659 | * time. This may introduce a little unfairness among queues | 1756 | * time. This may introduce a little unfairness among queues |
1660 | * with backshifted timestamps, but it does not break | 1757 | * with backshifted timestamps, but it does not break |
1661 | * worst-case fairness guarantees. | 1758 | * worst-case fairness guarantees. |
1759 | * | ||
1760 | * As a special case, if bfqq is weight-raised, push up | ||
1761 | * timestamps much less, to keep very low the probability that | ||
1762 | * this push up causes the backshifted finish timestamps of | ||
1763 | * weight-raised queues to become higher than the backshifted | ||
1764 | * finish timestamps of non weight-raised queues. | ||
1662 | */ | 1765 | */ |
1663 | if (backshifted && bfq_gt(st->vtime, entity->finish)) { | 1766 | if (backshifted && bfq_gt(st->vtime, entity->finish)) { |
1664 | unsigned long delta = st->vtime - entity->finish; | 1767 | unsigned long delta = st->vtime - entity->finish; |
1665 | 1768 | ||
1769 | if (bfqq) | ||
1770 | delta /= bfqq->wr_coeff; | ||
1771 | |||
1666 | entity->start += delta; | 1772 | entity->start += delta; |
1667 | entity->finish += delta; | 1773 | entity->finish += delta; |
1668 | } | 1774 | } |
@@ -3070,6 +3176,18 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) | |||
3070 | bfqg_stats_xfer_dead(bfqg); | 3176 | bfqg_stats_xfer_dead(bfqg); |
3071 | } | 3177 | } |
3072 | 3178 | ||
3179 | static void bfq_end_wr_async(struct bfq_data *bfqd) | ||
3180 | { | ||
3181 | struct blkcg_gq *blkg; | ||
3182 | |||
3183 | list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { | ||
3184 | struct bfq_group *bfqg = blkg_to_bfqg(blkg); | ||
3185 | |||
3186 | bfq_end_wr_async_queues(bfqd, bfqg); | ||
3187 | } | ||
3188 | bfq_end_wr_async_queues(bfqd, bfqd->root_group); | ||
3189 | } | ||
3190 | |||
3073 | static int bfq_io_show_weight(struct seq_file *sf, void *v) | 3191 | static int bfq_io_show_weight(struct seq_file *sf, void *v) |
3074 | { | 3192 | { |
3075 | struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); | 3193 | struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); |
@@ -3433,6 +3551,11 @@ static void bfq_init_entity(struct bfq_entity *entity, | |||
3433 | 3551 | ||
3434 | static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} | 3552 | static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} |
3435 | 3553 | ||
3554 | static void bfq_end_wr_async(struct bfq_data *bfqd) | ||
3555 | { | ||
3556 | bfq_end_wr_async_queues(bfqd, bfqd->root_group); | ||
3557 | } | ||
3558 | |||
3436 | static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, | 3559 | static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, |
3437 | struct blkcg *blkcg) | 3560 | struct blkcg *blkcg) |
3438 | { | 3561 | { |
@@ -3613,7 +3736,7 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, | |||
3613 | static unsigned long bfq_serv_to_charge(struct request *rq, | 3736 | static unsigned long bfq_serv_to_charge(struct request *rq, |
3614 | struct bfq_queue *bfqq) | 3737 | struct bfq_queue *bfqq) |
3615 | { | 3738 | { |
3616 | if (bfq_bfqq_sync(bfqq)) | 3739 | if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1) |
3617 | return blk_rq_sectors(rq); | 3740 | return blk_rq_sectors(rq); |
3618 | 3741 | ||
3619 | return blk_rq_sectors(rq) * bfq_async_charge_factor; | 3742 | return blk_rq_sectors(rq) * bfq_async_charge_factor; |
@@ -3700,12 +3823,12 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
3700 | * whether the in-service queue should be expired, by returning | 3823 | * whether the in-service queue should be expired, by returning |
3701 | * true. The purpose of expiring the in-service queue is to give bfqq | 3824 | * true. The purpose of expiring the in-service queue is to give bfqq |
3702 | * the chance to possibly preempt the in-service queue, and the reason | 3825 | * the chance to possibly preempt the in-service queue, and the reason |
3703 | * for preempting the in-service queue is to achieve the following | 3826 | * for preempting the in-service queue is to achieve one of the two |
3704 | * goal: guarantee to bfqq its reserved bandwidth even if bfqq has | 3827 | * goals below. |
3705 | * expired because it has remained idle. | ||
3706 | * | 3828 | * |
3707 | * In particular, bfqq may have expired for one of the following two | 3829 | * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has |
3708 | * reasons: | 3830 | * expired because it has remained idle. In particular, bfqq may have |
3831 | * expired for one of the following two reasons: | ||
3709 | * | 3832 | * |
3710 | * - BFQQE_NO_MORE_REQUESTS bfqq did not enjoy any device idling | 3833 | * - BFQQE_NO_MORE_REQUESTS bfqq did not enjoy any device idling |
3711 | * and did not make it to issue a new request before its last | 3834 | * and did not make it to issue a new request before its last |
@@ -3769,10 +3892,36 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
3769 | * above-described special way, and signals that the in-service queue | 3892 | * above-described special way, and signals that the in-service queue |
3770 | * should be expired. Timestamp back-shifting is done later in | 3893 | * should be expired. Timestamp back-shifting is done later in |
3771 | * __bfq_activate_entity. | 3894 | * __bfq_activate_entity. |
3895 | * | ||
3896 | * 2. Reduce latency. Even if timestamps are not backshifted to let | ||
3897 | * the process associated with bfqq recover a service hole, bfqq may | ||
3898 | * however happen to have, after being (re)activated, a lower finish | ||
3899 | * timestamp than the in-service queue. That is, the next budget of | ||
3900 | * bfqq may have to be completed before the one of the in-service | ||
3901 | * queue. If this is the case, then preempting the in-service queue | ||
3902 | * allows this goal to be achieved, apart from the unpreemptible, | ||
3903 | * outstanding requests mentioned above. | ||
3904 | * | ||
3905 | * Unfortunately, regardless of which of the above two goals one wants | ||
3906 | * to achieve, service trees need first to be updated to know whether | ||
3907 | * the in-service queue must be preempted. To have service trees | ||
3908 | * correctly updated, the in-service queue must be expired and | ||
3909 | * rescheduled, and bfqq must be scheduled too. This is one of the | ||
3910 | * most costly operations (in future versions, the scheduling | ||
3911 | * mechanism may be re-designed in such a way to make it possible to | ||
3912 | * know whether preemption is needed without needing to update service | ||
3913 | * trees). In addition, queue preemptions almost always cause random | ||
3914 | * I/O, and thus loss of throughput. Because of these facts, the next | ||
3915 | * function adopts the following simple scheme to avoid both costly | ||
3916 | * operations and too frequent preemptions: it requests the expiration | ||
3917 | * of the in-service queue (unconditionally) only for queues that need | ||
3918 | * to recover a hole, or that either are weight-raised or deserve to | ||
3919 | * be weight-raised. | ||
3772 | */ | 3920 | */ |
3773 | static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, | 3921 | static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, |
3774 | struct bfq_queue *bfqq, | 3922 | struct bfq_queue *bfqq, |
3775 | bool arrived_in_time) | 3923 | bool arrived_in_time, |
3924 | bool wr_or_deserves_wr) | ||
3776 | { | 3925 | { |
3777 | struct bfq_entity *entity = &bfqq->entity; | 3926 | struct bfq_entity *entity = &bfqq->entity; |
3778 | 3927 | ||
@@ -3807,14 +3956,85 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, | |||
3807 | entity->budget = max_t(unsigned long, bfqq->max_budget, | 3956 | entity->budget = max_t(unsigned long, bfqq->max_budget, |
3808 | bfq_serv_to_charge(bfqq->next_rq, bfqq)); | 3957 | bfq_serv_to_charge(bfqq->next_rq, bfqq)); |
3809 | bfq_clear_bfqq_non_blocking_wait_rq(bfqq); | 3958 | bfq_clear_bfqq_non_blocking_wait_rq(bfqq); |
3810 | return false; | 3959 | return wr_or_deserves_wr; |
3960 | } | ||
3961 | |||
3962 | static unsigned int bfq_wr_duration(struct bfq_data *bfqd) | ||
3963 | { | ||
3964 | u64 dur; | ||
3965 | |||
3966 | if (bfqd->bfq_wr_max_time > 0) | ||
3967 | return bfqd->bfq_wr_max_time; | ||
3968 | |||
3969 | dur = bfqd->RT_prod; | ||
3970 | do_div(dur, bfqd->peak_rate); | ||
3971 | |||
3972 | /* | ||
3973 | * Limit duration between 3 and 13 seconds. Tests show that | ||
3974 | * higher values than 13 seconds often yield the opposite of | ||
3975 | * the desired result, i.e., worsen responsiveness by letting | ||
3976 | * non-interactive and non-soft-real-time applications | ||
3977 | * preserve weight raising for a too long time interval. | ||
3978 | * | ||
3979 | * On the other end, lower values than 3 seconds make it | ||
3980 | * difficult for most interactive tasks to complete their jobs | ||
3981 | * before weight-raising finishes. | ||
3982 | */ | ||
3983 | if (dur > msecs_to_jiffies(13000)) | ||
3984 | dur = msecs_to_jiffies(13000); | ||
3985 | else if (dur < msecs_to_jiffies(3000)) | ||
3986 | dur = msecs_to_jiffies(3000); | ||
3987 | |||
3988 | return dur; | ||
3989 | } | ||
3990 | |||
3991 | static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, | ||
3992 | struct bfq_queue *bfqq, | ||
3993 | unsigned int old_wr_coeff, | ||
3994 | bool wr_or_deserves_wr, | ||
3995 | bool interactive) | ||
3996 | { | ||
3997 | if (old_wr_coeff == 1 && wr_or_deserves_wr) { | ||
3998 | /* start a weight-raising period */ | ||
3999 | bfqq->wr_coeff = bfqd->bfq_wr_coeff; | ||
4000 | /* update wr duration */ | ||
4001 | bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); | ||
4002 | |||
4003 | /* | ||
4004 | * If needed, further reduce budget to make sure it is | ||
4005 | * close to bfqq's backlog, so as to reduce the | ||
4006 | * scheduling-error component due to a too large | ||
4007 | * budget. Do not care about throughput consequences, | ||
4008 | * but only about latency. Finally, do not assign a | ||
4009 | * too small budget either, to avoid increasing | ||
4010 | * latency by causing too frequent expirations. | ||
4011 | */ | ||
4012 | bfqq->entity.budget = min_t(unsigned long, | ||
4013 | bfqq->entity.budget, | ||
4014 | 2 * bfq_min_budget(bfqd)); | ||
4015 | } else if (old_wr_coeff > 1) { | ||
4016 | /* update wr duration */ | ||
4017 | bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); | ||
4018 | } | ||
4019 | } | ||
4020 | |||
4021 | static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd, | ||
4022 | struct bfq_queue *bfqq) | ||
4023 | { | ||
4024 | return bfqq->dispatched == 0 && | ||
4025 | time_is_before_jiffies( | ||
4026 | bfqq->budget_timeout + | ||
4027 | bfqd->bfq_wr_min_idle_time); | ||
3811 | } | 4028 | } |
3812 | 4029 | ||
3813 | static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | 4030 | static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, |
3814 | struct bfq_queue *bfqq, | 4031 | struct bfq_queue *bfqq, |
3815 | struct request *rq) | 4032 | int old_wr_coeff, |
4033 | struct request *rq, | ||
4034 | bool *interactive) | ||
3816 | { | 4035 | { |
3817 | bool bfqq_wants_to_preempt, | 4036 | bool wr_or_deserves_wr, bfqq_wants_to_preempt, |
4037 | idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), | ||
3818 | /* | 4038 | /* |
3819 | * See the comments on | 4039 | * See the comments on |
3820 | * bfq_bfqq_update_budg_for_activation for | 4040 | * bfq_bfqq_update_budg_for_activation for |
@@ -3827,12 +4047,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
3827 | bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags); | 4047 | bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags); |
3828 | 4048 | ||
3829 | /* | 4049 | /* |
3830 | * Update budget and check whether bfqq may want to preempt | 4050 | * bfqq deserves to be weight-raised if: |
3831 | * the in-service queue. | 4051 | * - it is sync, |
4052 | * - it has been idle for enough time. | ||
4053 | */ | ||
4054 | *interactive = idle_for_long_time; | ||
4055 | wr_or_deserves_wr = bfqd->low_latency && | ||
4056 | (bfqq->wr_coeff > 1 || | ||
4057 | (bfq_bfqq_sync(bfqq) && *interactive)); | ||
4058 | |||
4059 | /* | ||
4060 | * Using the last flag, update budget and check whether bfqq | ||
4061 | * may want to preempt the in-service queue. | ||
3832 | */ | 4062 | */ |
3833 | bfqq_wants_to_preempt = | 4063 | bfqq_wants_to_preempt = |
3834 | bfq_bfqq_update_budg_for_activation(bfqd, bfqq, | 4064 | bfq_bfqq_update_budg_for_activation(bfqd, bfqq, |
3835 | arrived_in_time); | 4065 | arrived_in_time, |
4066 | wr_or_deserves_wr); | ||
3836 | 4067 | ||
3837 | if (!bfq_bfqq_IO_bound(bfqq)) { | 4068 | if (!bfq_bfqq_IO_bound(bfqq)) { |
3838 | if (arrived_in_time) { | 4069 | if (arrived_in_time) { |
@@ -3844,6 +4075,16 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
3844 | bfqq->requests_within_timer = 0; | 4075 | bfqq->requests_within_timer = 0; |
3845 | } | 4076 | } |
3846 | 4077 | ||
4078 | if (bfqd->low_latency) { | ||
4079 | bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq, | ||
4080 | old_wr_coeff, | ||
4081 | wr_or_deserves_wr, | ||
4082 | *interactive); | ||
4083 | |||
4084 | if (old_wr_coeff != bfqq->wr_coeff) | ||
4085 | bfqq->entity.prio_changed = 1; | ||
4086 | } | ||
4087 | |||
3847 | bfq_add_bfqq_busy(bfqd, bfqq); | 4088 | bfq_add_bfqq_busy(bfqd, bfqq); |
3848 | 4089 | ||
3849 | /* | 4090 | /* |
@@ -3857,6 +4098,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, | |||
3857 | * function bfq_bfqq_update_budg_for_activation). | 4098 | * function bfq_bfqq_update_budg_for_activation). |
3858 | */ | 4099 | */ |
3859 | if (bfqd->in_service_queue && bfqq_wants_to_preempt && | 4100 | if (bfqd->in_service_queue && bfqq_wants_to_preempt && |
4101 | bfqd->in_service_queue->wr_coeff == 1 && | ||
3860 | next_queue_may_preempt(bfqd)) | 4102 | next_queue_may_preempt(bfqd)) |
3861 | bfq_bfqq_expire(bfqd, bfqd->in_service_queue, | 4103 | bfq_bfqq_expire(bfqd, bfqd->in_service_queue, |
3862 | false, BFQQE_PREEMPTED); | 4104 | false, BFQQE_PREEMPTED); |
@@ -3867,6 +4109,8 @@ static void bfq_add_request(struct request *rq) | |||
3867 | struct bfq_queue *bfqq = RQ_BFQQ(rq); | 4109 | struct bfq_queue *bfqq = RQ_BFQQ(rq); |
3868 | struct bfq_data *bfqd = bfqq->bfqd; | 4110 | struct bfq_data *bfqd = bfqq->bfqd; |
3869 | struct request *next_rq, *prev; | 4111 | struct request *next_rq, *prev; |
4112 | unsigned int old_wr_coeff = bfqq->wr_coeff; | ||
4113 | bool interactive = false; | ||
3870 | 4114 | ||
3871 | bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); | 4115 | bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); |
3872 | bfqq->queued[rq_is_sync(rq)]++; | 4116 | bfqq->queued[rq_is_sync(rq)]++; |
@@ -3882,9 +4126,45 @@ static void bfq_add_request(struct request *rq) | |||
3882 | bfqq->next_rq = next_rq; | 4126 | bfqq->next_rq = next_rq; |
3883 | 4127 | ||
3884 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ | 4128 | if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */ |
3885 | bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, rq); | 4129 | bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff, |
3886 | else if (prev != bfqq->next_rq) | 4130 | rq, &interactive); |
3887 | bfq_updated_next_req(bfqd, bfqq); | 4131 | else { |
4132 | if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && | ||
4133 | time_is_before_jiffies( | ||
4134 | bfqq->last_wr_start_finish + | ||
4135 | bfqd->bfq_wr_min_inter_arr_async)) { | ||
4136 | bfqq->wr_coeff = bfqd->bfq_wr_coeff; | ||
4137 | bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); | ||
4138 | |||
4139 | bfqq->entity.prio_changed = 1; | ||
4140 | } | ||
4141 | if (prev != bfqq->next_rq) | ||
4142 | bfq_updated_next_req(bfqd, bfqq); | ||
4143 | } | ||
4144 | |||
4145 | /* | ||
4146 | * Assign jiffies to last_wr_start_finish in the following | ||
4147 | * cases: | ||
4148 | * | ||
4149 | * . if bfqq is not going to be weight-raised, because, for | ||
4150 | * non weight-raised queues, last_wr_start_finish stores the | ||
4151 | * arrival time of the last request; as of now, this piece | ||
4152 | * of information is used only for deciding whether to | ||
4153 | * weight-raise async queues | ||
4154 | * | ||
4155 | * . if bfqq is not weight-raised, because, if bfqq is now | ||
4156 | * switching to weight-raised, then last_wr_start_finish | ||
4157 | * stores the time when weight-raising starts | ||
4158 | * | ||
4159 | * . if bfqq is interactive, because, regardless of whether | ||
4160 | * bfqq is currently weight-raised, the weight-raising | ||
4161 | * period must start or restart (this case is considered | ||
4162 | * separately because it is not detected by the above | ||
4163 | * conditions, if bfqq is already weight-raised) | ||
4164 | */ | ||
4165 | if (bfqd->low_latency && | ||
4166 | (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) | ||
4167 | bfqq->last_wr_start_finish = jiffies; | ||
3888 | } | 4168 | } |
3889 | 4169 | ||
3890 | static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, | 4170 | static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, |
@@ -4087,6 +4367,46 @@ end: | |||
4087 | bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); | 4367 | bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); |
4088 | } | 4368 | } |
4089 | 4369 | ||
4370 | /* Must be called with bfqq != NULL */ | ||
4371 | static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) | ||
4372 | { | ||
4373 | bfqq->wr_coeff = 1; | ||
4374 | bfqq->wr_cur_max_time = 0; | ||
4375 | /* | ||
4376 | * Trigger a weight change on the next invocation of | ||
4377 | * __bfq_entity_update_weight_prio. | ||
4378 | */ | ||
4379 | bfqq->entity.prio_changed = 1; | ||
4380 | } | ||
4381 | |||
4382 | static void bfq_end_wr_async_queues(struct bfq_data *bfqd, | ||
4383 | struct bfq_group *bfqg) | ||
4384 | { | ||
4385 | int i, j; | ||
4386 | |||
4387 | for (i = 0; i < 2; i++) | ||
4388 | for (j = 0; j < IOPRIO_BE_NR; j++) | ||
4389 | if (bfqg->async_bfqq[i][j]) | ||
4390 | bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); | ||
4391 | if (bfqg->async_idle_bfqq) | ||
4392 | bfq_bfqq_end_wr(bfqg->async_idle_bfqq); | ||
4393 | } | ||
4394 | |||
4395 | static void bfq_end_wr(struct bfq_data *bfqd) | ||
4396 | { | ||
4397 | struct bfq_queue *bfqq; | ||
4398 | |||
4399 | spin_lock_irq(&bfqd->lock); | ||
4400 | |||
4401 | list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) | ||
4402 | bfq_bfqq_end_wr(bfqq); | ||
4403 | list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) | ||
4404 | bfq_bfqq_end_wr(bfqq); | ||
4405 | bfq_end_wr_async(bfqd); | ||
4406 | |||
4407 | spin_unlock_irq(&bfqd->lock); | ||
4408 | } | ||
4409 | |||
4090 | static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, | 4410 | static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, |
4091 | struct bio *bio) | 4411 | struct bio *bio) |
4092 | { | 4412 | { |
@@ -4110,16 +4430,32 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, | |||
4110 | return bfqq == RQ_BFQQ(rq); | 4430 | return bfqq == RQ_BFQQ(rq); |
4111 | } | 4431 | } |
4112 | 4432 | ||
4433 | /* | ||
4434 | * Set the maximum time for the in-service queue to consume its | ||
4435 | * budget. This prevents seeky processes from lowering the throughput. | ||
4436 | * In practice, a time-slice service scheme is used with seeky | ||
4437 | * processes. | ||
4438 | */ | ||
4439 | static void bfq_set_budget_timeout(struct bfq_data *bfqd, | ||
4440 | struct bfq_queue *bfqq) | ||
4441 | { | ||
4442 | bfqd->last_budget_start = ktime_get(); | ||
4443 | |||
4444 | bfqq->budget_timeout = jiffies + | ||
4445 | bfqd->bfq_timeout * | ||
4446 | (bfqq->entity.weight / bfqq->entity.orig_weight); | ||
4447 | } | ||
4448 | |||
4113 | static void __bfq_set_in_service_queue(struct bfq_data *bfqd, | 4449 | static void __bfq_set_in_service_queue(struct bfq_data *bfqd, |
4114 | struct bfq_queue *bfqq) | 4450 | struct bfq_queue *bfqq) |
4115 | { | 4451 | { |
4116 | if (bfqq) { | 4452 | if (bfqq) { |
4117 | bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); | 4453 | bfqg_stats_update_avg_queue_size(bfqq_group(bfqq)); |
4118 | bfq_mark_bfqq_budget_new(bfqq); | ||
4119 | bfq_clear_bfqq_fifo_expire(bfqq); | 4454 | bfq_clear_bfqq_fifo_expire(bfqq); |
4120 | 4455 | ||
4121 | bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; | 4456 | bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; |
4122 | 4457 | ||
4458 | bfq_set_budget_timeout(bfqd, bfqq); | ||
4123 | bfq_log_bfqq(bfqd, bfqq, | 4459 | bfq_log_bfqq(bfqd, bfqq, |
4124 | "set_in_service_queue, cur-budget = %d", | 4460 | "set_in_service_queue, cur-budget = %d", |
4125 | bfqq->entity.budget); | 4461 | bfqq->entity.budget); |
@@ -4159,9 +4495,13 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) | |||
4159 | */ | 4495 | */ |
4160 | sl = bfqd->bfq_slice_idle; | 4496 | sl = bfqd->bfq_slice_idle; |
4161 | /* | 4497 | /* |
4162 | * Grant only minimum idle time if the queue is seeky. | 4498 | * Unless the queue is being weight-raised, grant only minimum |
4499 | * idle time if the queue is seeky. A long idling is preserved | ||
4500 | * for a weight-raised queue, because it is needed for | ||
4501 | * guaranteeing to the queue its reserved share of the | ||
4502 | * throughput. | ||
4163 | */ | 4503 | */ |
4164 | if (BFQQ_SEEKY(bfqq)) | 4504 | if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1) |
4165 | sl = min_t(u64, sl, BFQ_MIN_TT); | 4505 | sl = min_t(u64, sl, BFQ_MIN_TT); |
4166 | 4506 | ||
4167 | bfqd->last_idling_start = ktime_get(); | 4507 | bfqd->last_idling_start = ktime_get(); |
@@ -4171,27 +4511,6 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) | |||
4171 | } | 4511 | } |
4172 | 4512 | ||
4173 | /* | 4513 | /* |
4174 | * Set the maximum time for the in-service queue to consume its | ||
4175 | * budget. This prevents seeky processes from lowering the disk | ||
4176 | * throughput (always guaranteed with a time slice scheme as in CFQ). | ||
4177 | */ | ||
4178 | static void bfq_set_budget_timeout(struct bfq_data *bfqd) | ||
4179 | { | ||
4180 | struct bfq_queue *bfqq = bfqd->in_service_queue; | ||
4181 | unsigned int timeout_coeff = bfqq->entity.weight / | ||
4182 | bfqq->entity.orig_weight; | ||
4183 | |||
4184 | bfqd->last_budget_start = ktime_get(); | ||
4185 | |||
4186 | bfq_clear_bfqq_budget_new(bfqq); | ||
4187 | bfqq->budget_timeout = jiffies + | ||
4188 | bfqd->bfq_timeout * timeout_coeff; | ||
4189 | |||
4190 | bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", | ||
4191 | jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); | ||
4192 | } | ||
4193 | |||
4194 | /* | ||
4195 | * In autotuning mode, max_budget is dynamically recomputed as the | 4514 | * In autotuning mode, max_budget is dynamically recomputed as the |
4196 | * amount of sectors transferred in timeout at the estimated peak | 4515 | * amount of sectors transferred in timeout at the estimated peak |
4197 | * rate. This enables BFQ to utilize a full timeslice with a full | 4516 | * rate. This enables BFQ to utilize a full timeslice with a full |
@@ -4204,6 +4523,42 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd) | |||
4204 | jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT; | 4523 | jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT; |
4205 | } | 4524 | } |
4206 | 4525 | ||
4526 | /* | ||
4527 | * Update parameters related to throughput and responsiveness, as a | ||
4528 | * function of the estimated peak rate. See comments on | ||
4529 | * bfq_calc_max_budget(), and on T_slow and T_fast arrays. | ||
4530 | */ | ||
4531 | static void update_thr_responsiveness_params(struct bfq_data *bfqd) | ||
4532 | { | ||
4533 | int dev_type = blk_queue_nonrot(bfqd->queue); | ||
4534 | |||
4535 | if (bfqd->bfq_user_max_budget == 0) | ||
4536 | bfqd->bfq_max_budget = | ||
4537 | bfq_calc_max_budget(bfqd); | ||
4538 | |||
4539 | if (bfqd->device_speed == BFQ_BFQD_FAST && | ||
4540 | bfqd->peak_rate < device_speed_thresh[dev_type]) { | ||
4541 | bfqd->device_speed = BFQ_BFQD_SLOW; | ||
4542 | bfqd->RT_prod = R_slow[dev_type] * | ||
4543 | T_slow[dev_type]; | ||
4544 | } else if (bfqd->device_speed == BFQ_BFQD_SLOW && | ||
4545 | bfqd->peak_rate > device_speed_thresh[dev_type]) { | ||
4546 | bfqd->device_speed = BFQ_BFQD_FAST; | ||
4547 | bfqd->RT_prod = R_fast[dev_type] * | ||
4548 | T_fast[dev_type]; | ||
4549 | } | ||
4550 | |||
4551 | bfq_log(bfqd, | ||
4552 | "dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec", | ||
4553 | dev_type == 0 ? "ROT" : "NONROT", | ||
4554 | bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW", | ||
4555 | bfqd->device_speed == BFQ_BFQD_FAST ? | ||
4556 | (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT : | ||
4557 | (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT, | ||
4558 | (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>> | ||
4559 | BFQ_RATE_SHIFT); | ||
4560 | } | ||
4561 | |||
4207 | static void bfq_reset_rate_computation(struct bfq_data *bfqd, | 4562 | static void bfq_reset_rate_computation(struct bfq_data *bfqd, |
4208 | struct request *rq) | 4563 | struct request *rq) |
4209 | { | 4564 | { |
@@ -4315,9 +4670,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) | |||
4315 | rate /= divisor; /* smoothing constant alpha = 1/divisor */ | 4670 | rate /= divisor; /* smoothing constant alpha = 1/divisor */ |
4316 | 4671 | ||
4317 | bfqd->peak_rate += rate; | 4672 | bfqd->peak_rate += rate; |
4318 | if (bfqd->bfq_user_max_budget == 0) | 4673 | update_thr_responsiveness_params(bfqd); |
4319 | bfqd->bfq_max_budget = | ||
4320 | bfq_calc_max_budget(bfqd); | ||
4321 | 4674 | ||
4322 | reset_computation: | 4675 | reset_computation: |
4323 | bfq_reset_rate_computation(bfqd, rq); | 4676 | bfq_reset_rate_computation(bfqd, rq); |
@@ -4439,9 +4792,18 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) | |||
4439 | 4792 | ||
4440 | static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) | 4793 | static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) |
4441 | { | 4794 | { |
4442 | if (RB_EMPTY_ROOT(&bfqq->sort_list)) | 4795 | if (RB_EMPTY_ROOT(&bfqq->sort_list)) { |
4796 | if (bfqq->dispatched == 0) | ||
4797 | /* | ||
4798 | * Overloading budget_timeout field to store | ||
4799 | * the time at which the queue remains with no | ||
4800 | * backlog and no outstanding request; used by | ||
4801 | * the weight-raising mechanism. | ||
4802 | */ | ||
4803 | bfqq->budget_timeout = jiffies; | ||
4804 | |||
4443 | bfq_del_bfqq_busy(bfqd, bfqq, true); | 4805 | bfq_del_bfqq_busy(bfqd, bfqq, true); |
4444 | else | 4806 | } else |
4445 | bfq_requeue_bfqq(bfqd, bfqq); | 4807 | bfq_requeue_bfqq(bfqd, bfqq); |
4446 | 4808 | ||
4447 | /* | 4809 | /* |
@@ -4468,9 +4830,18 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, | |||
4468 | struct request *next_rq; | 4830 | struct request *next_rq; |
4469 | int budget, min_budget; | 4831 | int budget, min_budget; |
4470 | 4832 | ||
4471 | budget = bfqq->max_budget; | ||
4472 | min_budget = bfq_min_budget(bfqd); | 4833 | min_budget = bfq_min_budget(bfqd); |
4473 | 4834 | ||
4835 | if (bfqq->wr_coeff == 1) | ||
4836 | budget = bfqq->max_budget; | ||
4837 | else /* | ||
4838 | * Use a constant, low budget for weight-raised queues, | ||
4839 | * to help achieve a low latency. Keep it slightly higher | ||
4840 | * than the minimum possible budget, to cause a little | ||
4841 | * bit fewer expirations. | ||
4842 | */ | ||
4843 | budget = 2 * min_budget; | ||
4844 | |||
4474 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", | 4845 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", |
4475 | bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); | 4846 | bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); |
4476 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", | 4847 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", |
@@ -4478,7 +4849,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, | |||
4478 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", | 4849 | bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", |
4479 | bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); | 4850 | bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); |
4480 | 4851 | ||
4481 | if (bfq_bfqq_sync(bfqq)) { | 4852 | if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { |
4482 | switch (reason) { | 4853 | switch (reason) { |
4483 | /* | 4854 | /* |
4484 | * Caveat: in all the following cases we trade latency | 4855 | * Caveat: in all the following cases we trade latency |
@@ -4577,7 +4948,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, | |||
4577 | default: | 4948 | default: |
4578 | return; | 4949 | return; |
4579 | } | 4950 | } |
4580 | } else { | 4951 | } else if (!bfq_bfqq_sync(bfqq)) { |
4581 | /* | 4952 | /* |
4582 | * Async queues get always the maximum possible | 4953 | * Async queues get always the maximum possible |
4583 | * budget, as for them we do not care about latency | 4954 | * budget, as for them we do not care about latency |
@@ -4766,15 +5137,19 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
4766 | * bandwidth, and not time, distribution with little unlucky | 5137 | * bandwidth, and not time, distribution with little unlucky |
4767 | * or quasi-sequential processes. | 5138 | * or quasi-sequential processes. |
4768 | */ | 5139 | */ |
4769 | if (slow || | 5140 | if (bfqq->wr_coeff == 1 && |
4770 | (reason == BFQQE_BUDGET_TIMEOUT && | 5141 | (slow || |
4771 | bfq_bfqq_budget_left(bfqq) >= entity->budget / 3)) | 5142 | (reason == BFQQE_BUDGET_TIMEOUT && |
5143 | bfq_bfqq_budget_left(bfqq) >= entity->budget / 3))) | ||
4772 | bfq_bfqq_charge_time(bfqd, bfqq, delta); | 5144 | bfq_bfqq_charge_time(bfqd, bfqq, delta); |
4773 | 5145 | ||
4774 | if (reason == BFQQE_TOO_IDLE && | 5146 | if (reason == BFQQE_TOO_IDLE && |
4775 | entity->service <= 2 * entity->budget / 10) | 5147 | entity->service <= 2 * entity->budget / 10) |
4776 | bfq_clear_bfqq_IO_bound(bfqq); | 5148 | bfq_clear_bfqq_IO_bound(bfqq); |
4777 | 5149 | ||
5150 | if (bfqd->low_latency && bfqq->wr_coeff == 1) | ||
5151 | bfqq->last_wr_start_finish = jiffies; | ||
5152 | |||
4778 | bfq_log_bfqq(bfqd, bfqq, | 5153 | bfq_log_bfqq(bfqd, bfqq, |
4779 | "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, | 5154 | "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, |
4780 | slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); | 5155 | slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); |
@@ -4801,10 +5176,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, | |||
4801 | */ | 5176 | */ |
4802 | static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) | 5177 | static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) |
4803 | { | 5178 | { |
4804 | if (bfq_bfqq_budget_new(bfqq) || | 5179 | return time_is_before_eq_jiffies(bfqq->budget_timeout); |
4805 | time_is_after_jiffies(bfqq->budget_timeout)) | ||
4806 | return false; | ||
4807 | return true; | ||
4808 | } | 5180 | } |
4809 | 5181 | ||
4810 | /* | 5182 | /* |
@@ -4831,19 +5203,40 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) | |||
4831 | 5203 | ||
4832 | /* | 5204 | /* |
4833 | * For a queue that becomes empty, device idling is allowed only if | 5205 | * For a queue that becomes empty, device idling is allowed only if |
4834 | * this function returns true for the queue. And this function returns | 5206 | * this function returns true for the queue. As a consequence, since |
4835 | * true only if idling is beneficial for throughput. | 5207 | * device idling plays a critical role in both throughput boosting and |
5208 | * service guarantees, the return value of this function plays a | ||
5209 | * critical role in both these aspects as well. | ||
5210 | * | ||
5211 | * In a nutshell, this function returns true only if idling is | ||
5212 | * beneficial for throughput or, even if detrimental for throughput, | ||
5213 | * idling is however necessary to preserve service guarantees (low | ||
5214 | * latency, desired throughput distribution, ...). In particular, on | ||
5215 | * NCQ-capable devices, this function tries to return false, so as to | ||
5216 | * help keep the drives' internal queues full, whenever this helps the | ||
5217 | * device boost the throughput without causing any service-guarantee | ||
5218 | * issue. | ||
5219 | * | ||
5220 | * In more detail, the return value of this function is obtained by, | ||
5221 | * first, computing a number of boolean variables that take into | ||
5222 | * account throughput and service-guarantee issues, and, then, | ||
5223 | * combining these variables in a logical expression. Most of the | ||
5224 | * issues taken into account are not trivial. We discuss these issues | ||
5225 | * individually while introducing the variables. | ||
4836 | */ | 5226 | */ |
4837 | static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) | 5227 | static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) |
4838 | { | 5228 | { |
4839 | struct bfq_data *bfqd = bfqq->bfqd; | 5229 | struct bfq_data *bfqd = bfqq->bfqd; |
4840 | bool idling_boosts_thr; | 5230 | bool idling_boosts_thr, asymmetric_scenario; |
4841 | 5231 | ||
4842 | if (bfqd->strict_guarantees) | 5232 | if (bfqd->strict_guarantees) |
4843 | return true; | 5233 | return true; |
4844 | 5234 | ||
4845 | /* | 5235 | /* |
4846 | * The value of the next variable is computed considering that | 5236 | * The next variable takes into account the cases where idling |
5237 | * boosts the throughput. | ||
5238 | * | ||
5239 | * The value of the variable is computed considering that | ||
4847 | * idling is usually beneficial for the throughput if: | 5240 | * idling is usually beneficial for the throughput if: |
4848 | * (a) the device is not NCQ-capable, or | 5241 | * (a) the device is not NCQ-capable, or |
4849 | * (b) regardless of the presence of NCQ, the request pattern | 5242 | * (b) regardless of the presence of NCQ, the request pattern |
@@ -4857,13 +5250,80 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) | |||
4857 | idling_boosts_thr = !bfqd->hw_tag || bfq_bfqq_IO_bound(bfqq); | 5250 | idling_boosts_thr = !bfqd->hw_tag || bfq_bfqq_IO_bound(bfqq); |
4858 | 5251 | ||
4859 | /* | 5252 | /* |
4860 | * We have now the components we need to compute the return | 5253 | * There is then a case where idling must be performed not for |
4861 | * value of the function, which is true only if both the | 5254 | * throughput concerns, but to preserve service guarantees. To |
4862 | * following conditions hold: | 5255 | * introduce it, we can note that allowing the drive to |
5256 | * enqueue more than one request at a time, and hence | ||
5257 | * delegating de facto final scheduling decisions to the | ||
5258 | * drive's internal scheduler, causes loss of control on the | ||
5259 | * actual request service order. In particular, the critical | ||
5260 | * situation is when requests from different processes happens | ||
5261 | * to be present, at the same time, in the internal queue(s) | ||
5262 | * of the drive. In such a situation, the drive, by deciding | ||
5263 | * the service order of the internally-queued requests, does | ||
5264 | * determine also the actual throughput distribution among | ||
5265 | * these processes. But the drive typically has no notion or | ||
5266 | * concern about per-process throughput distribution, and | ||
5267 | * makes its decisions only on a per-request basis. Therefore, | ||
5268 | * the service distribution enforced by the drive's internal | ||
5269 | * scheduler is likely to coincide with the desired | ||
5270 | * device-throughput distribution only in a completely | ||
5271 | * symmetric scenario where: (i) each of these processes must | ||
5272 | * get the same throughput as the others; (ii) all these | ||
5273 | * processes have the same I/O pattern (either sequential or | ||
5274 | * random). In fact, in such a scenario, the drive will tend | ||
5275 | * to treat the requests of each of these processes in about | ||
5276 | * the same way as the requests of the others, and thus to | ||
5277 | * provide each of these processes with about the same | ||
5278 | * throughput (which is exactly the desired throughput | ||
5279 | * distribution). In contrast, in any asymmetric scenario, | ||
5280 | * device idling is certainly needed to guarantee that bfqq | ||
5281 | * receives its assigned fraction of the device throughput | ||
5282 | * (see [1] for details). | ||
5283 | * | ||
5284 | * As for sub-condition (i), actually we check only whether | ||
5285 | * bfqq is being weight-raised. In fact, if bfqq is not being | ||
5286 | * weight-raised, we have that: | ||
5287 | * - if the process associated with bfqq is not I/O-bound, then | ||
5288 | * it is not either latency- or throughput-critical; therefore | ||
5289 | * idling is not needed for bfqq; | ||
5290 | * - if the process asociated with bfqq is I/O-bound, then | ||
5291 | * idling is already granted with bfqq (see the comments on | ||
5292 | * idling_boosts_thr). | ||
5293 | * | ||
5294 | * We do not check sub-condition (ii) at all, i.e., the next | ||
5295 | * variable is true if and only if bfqq is being | ||
5296 | * weight-raised. We do not need to control sub-condition (ii) | ||
5297 | * for the following reason: | ||
5298 | * - if bfqq is being weight-raised, then idling is already | ||
5299 | * guaranteed to bfqq by sub-condition (i); | ||
5300 | * - if bfqq is not being weight-raised, then idling is | ||
5301 | * already guaranteed to bfqq (only) if it matters, i.e., if | ||
5302 | * bfqq is associated to a currently I/O-bound process (see | ||
5303 | * the above comment on sub-condition (i)). | ||
5304 | * | ||
5305 | * As a side note, it is worth considering that the above | ||
5306 | * device-idling countermeasures may however fail in the | ||
5307 | * following unlucky scenario: if idling is (correctly) | ||
5308 | * disabled in a time period during which the symmetry | ||
5309 | * sub-condition holds, and hence the device is allowed to | ||
5310 | * enqueue many requests, but at some later point in time some | ||
5311 | * sub-condition stops to hold, then it may become impossible | ||
5312 | * to let requests be served in the desired order until all | ||
5313 | * the requests already queued in the device have been served. | ||
5314 | */ | ||
5315 | asymmetric_scenario = bfqq->wr_coeff > 1; | ||
5316 | |||
5317 | /* | ||
5318 | * We have now all the components we need to compute the return | ||
5319 | * value of the function, which is true only if both the following | ||
5320 | * conditions hold: | ||
4863 | * 1) bfqq is sync, because idling make sense only for sync queues; | 5321 | * 1) bfqq is sync, because idling make sense only for sync queues; |
4864 | * 2) idling boosts the throughput. | 5322 | * 2) idling either boosts the throughput (without issues), or |
5323 | * is necessary to preserve service guarantees. | ||
4865 | */ | 5324 | */ |
4866 | return bfq_bfqq_sync(bfqq) && idling_boosts_thr; | 5325 | return bfq_bfqq_sync(bfqq) && |
5326 | (idling_boosts_thr || asymmetric_scenario); | ||
4867 | } | 5327 | } |
4868 | 5328 | ||
4869 | /* | 5329 | /* |
@@ -4986,6 +5446,43 @@ keep_queue: | |||
4986 | return bfqq; | 5446 | return bfqq; |
4987 | } | 5447 | } |
4988 | 5448 | ||
5449 | static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) | ||
5450 | { | ||
5451 | struct bfq_entity *entity = &bfqq->entity; | ||
5452 | |||
5453 | if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ | ||
5454 | bfq_log_bfqq(bfqd, bfqq, | ||
5455 | "raising period dur %u/%u msec, old coeff %u, w %d(%d)", | ||
5456 | jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), | ||
5457 | jiffies_to_msecs(bfqq->wr_cur_max_time), | ||
5458 | bfqq->wr_coeff, | ||
5459 | bfqq->entity.weight, bfqq->entity.orig_weight); | ||
5460 | |||
5461 | if (entity->prio_changed) | ||
5462 | bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); | ||
5463 | |||
5464 | /* | ||
5465 | * If too much time has elapsed from the beginning of | ||
5466 | * this weight-raising period, then end weight | ||
5467 | * raising. | ||
5468 | */ | ||
5469 | if (time_is_before_jiffies(bfqq->last_wr_start_finish + | ||
5470 | bfqq->wr_cur_max_time)) { | ||
5471 | bfqq->last_wr_start_finish = jiffies; | ||
5472 | bfq_log_bfqq(bfqd, bfqq, | ||
5473 | "wrais ending at %lu, rais_max_time %u", | ||
5474 | bfqq->last_wr_start_finish, | ||
5475 | jiffies_to_msecs(bfqq->wr_cur_max_time)); | ||
5476 | bfq_bfqq_end_wr(bfqq); | ||
5477 | } | ||
5478 | } | ||
5479 | /* Update weight both if it must be raised and if it must be lowered */ | ||
5480 | if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) | ||
5481 | __bfq_entity_update_weight_prio( | ||
5482 | bfq_entity_service_tree(entity), | ||
5483 | entity); | ||
5484 | } | ||
5485 | |||
4989 | /* | 5486 | /* |
4990 | * Dispatch next request from bfqq. | 5487 | * Dispatch next request from bfqq. |
4991 | */ | 5488 | */ |
@@ -5001,6 +5498,19 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, | |||
5001 | 5498 | ||
5002 | bfq_dispatch_remove(bfqd->queue, rq); | 5499 | bfq_dispatch_remove(bfqd->queue, rq); |
5003 | 5500 | ||
5501 | /* | ||
5502 | * If weight raising has to terminate for bfqq, then next | ||
5503 | * function causes an immediate update of bfqq's weight, | ||
5504 | * without waiting for next activation. As a consequence, on | ||
5505 | * expiration, bfqq will be timestamped as if has never been | ||
5506 | * weight-raised during this service slot, even if it has | ||
5507 | * received part or even most of the service as a | ||
5508 | * weight-raised queue. This inflates bfqq's timestamps, which | ||
5509 | * is beneficial, as bfqq is then more willing to leave the | ||
5510 | * device immediately to possible other weight-raised queues. | ||
5511 | */ | ||
5512 | bfq_update_wr_data(bfqd, bfqq); | ||
5513 | |||
5004 | if (!bfqd->in_service_bic) { | 5514 | if (!bfqd->in_service_bic) { |
5005 | atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); | 5515 | atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); |
5006 | bfqd->in_service_bic = RQ_BIC(rq); | 5516 | bfqd->in_service_bic = RQ_BIC(rq); |
@@ -5306,6 +5816,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, | |||
5306 | bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; | 5816 | bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; |
5307 | bfqq->budget_timeout = bfq_smallest_from_now(); | 5817 | bfqq->budget_timeout = bfq_smallest_from_now(); |
5308 | 5818 | ||
5819 | bfqq->wr_coeff = 1; | ||
5820 | bfqq->last_wr_start_finish = bfq_smallest_from_now(); | ||
5821 | |||
5309 | /* first request is almost certainly seeky */ | 5822 | /* first request is almost certainly seeky */ |
5310 | bfqq->seek_history = 1; | 5823 | bfqq->seek_history = 1; |
5311 | } | 5824 | } |
@@ -5440,7 +5953,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, | |||
5440 | (bfqd->hw_tag && BFQQ_SEEKY(bfqq))) | 5953 | (bfqd->hw_tag && BFQQ_SEEKY(bfqq))) |
5441 | enable_idle = 0; | 5954 | enable_idle = 0; |
5442 | else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) { | 5955 | else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) { |
5443 | if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle) | 5956 | if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle && |
5957 | bfqq->wr_coeff == 1) | ||
5444 | enable_idle = 0; | 5958 | enable_idle = 0; |
5445 | else | 5959 | else |
5446 | enable_idle = 1; | 5960 | enable_idle = 1; |
@@ -5618,6 +6132,16 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) | |||
5618 | bfqd->rq_in_driver--; | 6132 | bfqd->rq_in_driver--; |
5619 | bfqq->dispatched--; | 6133 | bfqq->dispatched--; |
5620 | 6134 | ||
6135 | if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { | ||
6136 | /* | ||
6137 | * Set budget_timeout (which we overload to store the | ||
6138 | * time at which the queue remains with no backlog and | ||
6139 | * no outstanding request; used by the weight-raising | ||
6140 | * mechanism). | ||
6141 | */ | ||
6142 | bfqq->budget_timeout = jiffies; | ||
6143 | } | ||
6144 | |||
5621 | now_ns = ktime_get_ns(); | 6145 | now_ns = ktime_get_ns(); |
5622 | 6146 | ||
5623 | bfqq->ttime.last_end_request = now_ns; | 6147 | bfqq->ttime.last_end_request = now_ns; |
@@ -5655,10 +6179,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) | |||
5655 | * or if we want to idle in case it has no pending requests. | 6179 | * or if we want to idle in case it has no pending requests. |
5656 | */ | 6180 | */ |
5657 | if (bfqd->in_service_queue == bfqq) { | 6181 | if (bfqd->in_service_queue == bfqq) { |
5658 | if (bfq_bfqq_budget_new(bfqq)) | 6182 | if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) { |
5659 | bfq_set_budget_timeout(bfqd); | ||
5660 | |||
5661 | if (bfq_bfqq_must_idle(bfqq)) { | ||
5662 | bfq_arm_slice_timer(bfqd); | 6183 | bfq_arm_slice_timer(bfqd); |
5663 | return; | 6184 | return; |
5664 | } else if (bfq_may_expire_for_budg_timeout(bfqq)) | 6185 | } else if (bfq_may_expire_for_budg_timeout(bfqq)) |
@@ -5966,6 +6487,26 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
5966 | 6487 | ||
5967 | bfqd->bfq_requests_within_timer = 120; | 6488 | bfqd->bfq_requests_within_timer = 120; |
5968 | 6489 | ||
6490 | bfqd->low_latency = true; | ||
6491 | |||
6492 | /* | ||
6493 | * Trade-off between responsiveness and fairness. | ||
6494 | */ | ||
6495 | bfqd->bfq_wr_coeff = 30; | ||
6496 | bfqd->bfq_wr_max_time = 0; | ||
6497 | bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); | ||
6498 | bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); | ||
6499 | |||
6500 | /* | ||
6501 | * Begin by assuming, optimistically, that the device is a | ||
6502 | * high-speed one, and that its peak rate is equal to 2/3 of | ||
6503 | * the highest reference rate. | ||
6504 | */ | ||
6505 | bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * | ||
6506 | T_fast[blk_queue_nonrot(bfqd->queue)]; | ||
6507 | bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3; | ||
6508 | bfqd->device_speed = BFQ_BFQD_FAST; | ||
6509 | |||
5969 | spin_lock_init(&bfqd->lock); | 6510 | spin_lock_init(&bfqd->lock); |
5970 | 6511 | ||
5971 | /* | 6512 | /* |
@@ -6047,6 +6588,7 @@ SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2); | |||
6047 | SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); | 6588 | SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); |
6048 | SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1); | 6589 | SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1); |
6049 | SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0); | 6590 | SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0); |
6591 | SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); | ||
6050 | #undef SHOW_FUNCTION | 6592 | #undef SHOW_FUNCTION |
6051 | 6593 | ||
6052 | #define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ | 6594 | #define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ |
@@ -6167,6 +6709,22 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e, | |||
6167 | return ret; | 6709 | return ret; |
6168 | } | 6710 | } |
6169 | 6711 | ||
6712 | static ssize_t bfq_low_latency_store(struct elevator_queue *e, | ||
6713 | const char *page, size_t count) | ||
6714 | { | ||
6715 | struct bfq_data *bfqd = e->elevator_data; | ||
6716 | unsigned long uninitialized_var(__data); | ||
6717 | int ret = bfq_var_store(&__data, (page), count); | ||
6718 | |||
6719 | if (__data > 1) | ||
6720 | __data = 1; | ||
6721 | if (__data == 0 && bfqd->low_latency != 0) | ||
6722 | bfq_end_wr(bfqd); | ||
6723 | bfqd->low_latency = __data; | ||
6724 | |||
6725 | return ret; | ||
6726 | } | ||
6727 | |||
6170 | #define BFQ_ATTR(name) \ | 6728 | #define BFQ_ATTR(name) \ |
6171 | __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store) | 6729 | __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store) |
6172 | 6730 | ||
@@ -6180,6 +6738,7 @@ static struct elv_fs_entry bfq_attrs[] = { | |||
6180 | BFQ_ATTR(max_budget), | 6738 | BFQ_ATTR(max_budget), |
6181 | BFQ_ATTR(timeout_sync), | 6739 | BFQ_ATTR(timeout_sync), |
6182 | BFQ_ATTR(strict_guarantees), | 6740 | BFQ_ATTR(strict_guarantees), |
6741 | BFQ_ATTR(low_latency), | ||
6183 | __ATTR_NULL | 6742 | __ATTR_NULL |
6184 | }; | 6743 | }; |
6185 | 6744 | ||
@@ -6242,6 +6801,39 @@ static int __init bfq_init(void) | |||
6242 | if (bfq_slab_setup()) | 6801 | if (bfq_slab_setup()) |
6243 | goto err_pol_unreg; | 6802 | goto err_pol_unreg; |
6244 | 6803 | ||
6804 | /* | ||
6805 | * Times to load large popular applications for the typical | ||
6806 | * systems installed on the reference devices (see the | ||
6807 | * comments before the definitions of the next two | ||
6808 | * arrays). Actually, we use slightly slower values, as the | ||
6809 | * estimated peak rate tends to be smaller than the actual | ||
6810 | * peak rate. The reason for this last fact is that estimates | ||
6811 | * are computed over much shorter time intervals than the long | ||
6812 | * intervals typically used for benchmarking. Why? First, to | ||
6813 | * adapt more quickly to variations. Second, because an I/O | ||
6814 | * scheduler cannot rely on a peak-rate-evaluation workload to | ||
6815 | * be run for a long time. | ||
6816 | */ | ||
6817 | T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */ | ||
6818 | T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */ | ||
6819 | T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */ | ||
6820 | T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */ | ||
6821 | |||
6822 | /* | ||
6823 | * Thresholds that determine the switch between speed classes | ||
6824 | * (see the comments before the definition of the array | ||
6825 | * device_speed_thresh). These thresholds are biased towards | ||
6826 | * transitions to the fast class. This is safer than the | ||
6827 | * opposite bias. In fact, a wrong transition to the slow | ||
6828 | * class results in short weight-raising periods, because the | ||
6829 | * speed of the device then tends to be higher that the | ||
6830 | * reference peak rate. On the opposite end, a wrong | ||
6831 | * transition to the fast class tends to increase | ||
6832 | * weight-raising periods, because of the opposite reason. | ||
6833 | */ | ||
6834 | device_speed_thresh[0] = (4 * R_slow[0]) / 3; | ||
6835 | device_speed_thresh[1] = (4 * R_slow[1]) / 3; | ||
6836 | |||
6245 | ret = elv_register(&iosched_bfq_mq); | 6837 | ret = elv_register(&iosched_bfq_mq); |
6246 | if (ret) | 6838 | if (ret) |
6247 | goto err_pol_unreg; | 6839 | goto err_pol_unreg; |