2 files changed, 675 insertions, 74 deletions
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 461b27fce979..1b87df6cd476 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -375,6 +375,11 @@ default, low latency mode is enabled. If enabled, interactive and soft
 real-time applications are privileged and experience a lower latency,
 as explained in more detail in the description of how BFQ works.
+DO NOT enable this mode if you need full control on bandwidth
+distribution. In fact, if it is enabled, then BFQ automatically
+increases the bandwidth share of privileged applications, as the main
+means to guarantee a lower latency to them.
 timeout_sync
 ------------
@@ -507,6 +512,10 @@ linear mapping between ioprio and weights, described at the beginning
 of the tunable section, is still valid, but all weights higher than
 IOPRIO_BE_NR*10 are mapped to ioprio 0.
+Recall that, if low-latency is set, then BFQ automatically raises the
+weight of the queues associated with interactive and soft real-time
+applications. Unset this tunable if you need/want to control weights.
 [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O
    Scheduler", Proceedings of the First Workshop on Mobile System
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index dce273b91015..1a32c8341ab0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -339,6 +339,17 @@ struct bfq_queue {
        /* pid of the process owning the queue, used for logging purposes */
        pid_t pid;
+        /* current maximum weight-raising time for this queue */
+        unsigned long wr_cur_max_time;
+        /*
+         * Start time of the current weight-raising period if
+         * the @bfq-queue is being weight-raised, otherwise
+         * finish time of the last weight-raising period.
+         */
+        unsigned long last_wr_start_finish;
+        /* factor by which the weight of this queue is multiplied */
+        unsigned int wr_coeff;
 };
 /**
@@ -356,6 +367,11 @@ struct bfq_io_cq {
 #endif
 };
+enum bfq_device_speed {
+        BFQ_BFQD_FAST,
+        BFQ_BFQD_SLOW,
+};
 /**
 * struct bfq_data - per-device data structure.
 *
@@ -487,6 +503,34 @@ struct bfq_data {
         */
        bool strict_guarantees;
+        /* if set to true, low-latency heuristics are enabled */
+        bool low_latency;
+        /*
+         * Maximum factor by which the weight of a weight-raised queue
+         * is multiplied.
+         */
+        unsigned int bfq_wr_coeff;
+        /* maximum duration of a weight-raising period (jiffies) */
+        unsigned int bfq_wr_max_time;
+        /*
+         * Minimum idle period after which weight-raising may be
+         * reactivated for a queue (in jiffies).
+         */
+        unsigned int bfq_wr_min_idle_time;
+        /*
+         * Minimum period between request arrivals after which
+         * weight-raising may be reactivated for an already busy async
+         * queue (in jiffies).
+         */
+        unsigned long bfq_wr_min_inter_arr_async;
+        /*
+         * Cached value of the product R*T, used for computing the
+         * maximum duration of weight raising automatically.
+         */
+        u64 RT_prod;
+        /* device-speed class for the low-latency heuristic */
+        enum bfq_device_speed device_speed;
        /* fallback dummy bfqq for extreme OOM conditions */
        struct bfq_queue oom_bfqq;
@@ -515,7 +559,6 @@ enum bfqq_state_flags {
        BFQQF_fifo_expire,      /* FIFO checked in this slice */
        BFQQF_idle_window,      /* slice idling enabled */
        BFQQF_sync,             /* synchronous queue */
-        BFQQF_budget_new,       /* no completion with this budget */
        BFQQF_IO_bound,         /*
                                 * bfqq has timed-out at least once
                                 * having consumed at most 2/10 of
@@ -543,7 +586,6 @@ BFQ_BFQQ_FNS(non_blocking_wait_rq);
 BFQ_BFQQ_FNS(fifo_expire);
 BFQ_BFQQ_FNS(idle_window);
 BFQ_BFQQ_FNS(sync);
-BFQ_BFQQ_FNS(budget_new);
 BFQ_BFQQ_FNS(IO_bound);
 #undef BFQ_BFQQ_FNS
@@ -637,7 +679,7 @@ struct bfq_group_data {
        /* must be the first member */
        struct blkcg_policy_data pd;
-        unsigned short weight;
+        unsigned int weight;
 };
 /**
@@ -732,6 +774,8 @@ static void bfq_put_queue(struct bfq_queue *bfqq);
 static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
                                       struct bio *bio, bool is_sync,
                                       struct bfq_io_cq *bic);
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+                                    struct bfq_group *bfqg);
 static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
 static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
@@ -787,6 +831,56 @@ static struct kmem_cache *bfq_pool;
 /* Shift used for peak rate fixed precision calculations. */
 #define BFQ_RATE_SHIFT          16
+/*
+ * By default, BFQ computes the duration of the weight raising for
+ * interactive applications automatically, using the following formula:
+ * duration = (R / r) * T, where r is the peak rate of the device, and
+ * R and T are two reference parameters.
+ * In particular, R is the peak rate of the reference device (see below),
+ * and T is a reference time: given the systems that are likely to be
+ * installed on the reference device according to its speed class, T is
+ * about the maximum time needed, under BFQ and while reading two files in
+ * parallel, to load typical large applications on these systems.
+ * In practice, the slower/faster the device at hand is, the more/less it
+ * takes to load applications with respect to the reference device.
+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive
+ * applications.
+ *
+ * BFQ uses four different reference pairs (R, T), depending on:
+ * . whether the device is rotational or non-rotational;
+ * . whether the device is slow, such as old or portable HDDs, as well as
+ *   SD cards, or fast, such as newer HDDs and SSDs.
+ *
+ * The device's speed class is dynamically (re)detected in
+ * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ *
+ * In the following definitions, R_slow[0]/R_fast[0] and
+ * T_slow[0]/T_fast[0] are the reference values for a slow/fast
+ * rotational device, whereas R_slow[1]/R_fast[1] and
+ * T_slow[1]/T_fast[1] are the reference values for a slow/fast
+ * non-rotational device. Finally, device_speed_thresh are the
+ * thresholds used to switch between speed classes. The reference
+ * rates are not the actual peak rates of the devices used as a
+ * reference, but slightly lower values. The reason for using these
+ * slightly lower values is that the peak-rate estimator tends to
+ * yield slightly lower values than the actual peak rate (it can yield
+ * the actual peak rate only if there is only one process doing I/O,
+ * and the process does sequential I/O).
+ *
+ * Both the reference peak rates and the thresholds are measured in
+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ */
+static int R_slow[2] = {1000, 10700};
+static int R_fast[2] = {14000, 33000};
+/*
+ * To improve readability, a conversion function is used to initialize the
+ * following arrays, which entails that they can be initialized only in a
+ * function.
+ */
+static int T_slow[2];
+static int T_fast[2];
+static int device_speed_thresh[2];
 #define BFQ_SERVICE_TREE_INIT   ((struct bfq_service_tree)              \
                                { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
@@ -1486,7 +1580,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
        if (entity->prio_changed) {
                struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
-                unsigned short prev_weight, new_weight;
+                unsigned int prev_weight, new_weight;
                struct bfq_data *bfqd = NULL;
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
                struct bfq_sched_data *sd;
@@ -1535,7 +1629,8 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
                new_st = bfq_entity_service_tree(entity);
                prev_weight = entity->weight;
-                new_weight = entity->orig_weight;
+                new_weight = entity->orig_weight *
+                             (bfqq ? bfqq->wr_coeff : 1);
                entity->weight = new_weight;
                new_st->wsum += entity->weight;
@@ -1630,6 +1725,8 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
                                        struct bfq_service_tree *st,
                                        bool backshifted)
 {
+        struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
        st = __bfq_entity_update_weight_prio(st, entity);
        bfq_calc_finish(entity, entity->budget);
@@ -1659,10 +1756,19 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
         * time. This may introduce a little unfairness among queues
         * with backshifted timestamps, but it does not break
         * worst-case fairness guarantees.
+         *
+         * As a special case, if bfqq is weight-raised, push up
+         * timestamps much less, to keep very low the probability that
+         * this push up causes the backshifted finish timestamps of
+         * weight-raised queues to become higher than the backshifted
+         * finish timestamps of non weight-raised queues.
         */
        if (backshifted && bfq_gt(st->vtime, entity->finish)) {
                unsigned long delta = st->vtime - entity->finish;
+                if (bfqq)
+                        delta /= bfqq->wr_coeff;
                entity->start += delta;
                entity->finish += delta;
        }
@@ -3070,6 +3176,18 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
        bfqg_stats_xfer_dead(bfqg);
 }
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+        struct blkcg_gq *blkg;
+        list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
+                struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+                bfq_end_wr_async_queues(bfqd, bfqg);
+        }
+        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
 static int bfq_io_show_weight(struct seq_file *sf, void *v)
 {
        struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
@@ -3433,6 +3551,11 @@ static void bfq_init_entity(struct bfq_entity *entity,
 static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
+static void bfq_end_wr_async(struct bfq_data *bfqd)
+{
+        bfq_end_wr_async_queues(bfqd, bfqd->root_group);
+}
 static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
                                            struct blkcg *blkcg)
 {
@@ -3613,7 +3736,7 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
 static unsigned long bfq_serv_to_charge(struct request *rq,
                                        struct bfq_queue *bfqq)
 {
-        if (bfq_bfqq_sync(bfqq))
+        if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
                return blk_rq_sectors(rq);
        return blk_rq_sectors(rq) * bfq_async_charge_factor;
@@ -3700,12 +3823,12 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 * whether the in-service queue should be expired, by returning
 * true. The purpose of expiring the in-service queue is to give bfqq
 * the chance to possibly preempt the in-service queue, and the reason
- * for preempting the in-service queue is to achieve the following
+ * for preempting the in-service queue is to achieve one of the two
- * goal: guarantee to bfqq its reserved bandwidth even if bfqq has
+ * goals below.
- * expired because it has remained idle.
 *
- * In particular, bfqq may have expired for one of the following two
+ * 1. Guarantee to bfqq its reserved bandwidth even if bfqq has
- * reasons:
+ * expired because it has remained idle. In particular, bfqq may have
+ * expired for one of the following two reasons:
 *
 * - BFQQE_NO_MORE_REQUESTS bfqq did not enjoy any device idling
 *   and did not make it to issue a new request before its last
@@ -3769,10 +3892,36 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 * above-described special way, and signals that the in-service queue
 * should be expired. Timestamp back-shifting is done later in
 * __bfq_activate_entity.
+ *
+ * 2. Reduce latency. Even if timestamps are not backshifted to let
+ * the process associated with bfqq recover a service hole, bfqq may
+ * however happen to have, after being (re)activated, a lower finish
+ * timestamp than the in-service queue.  That is, the next budget of
+ * bfqq may have to be completed before the one of the in-service
+ * queue. If this is the case, then preempting the in-service queue
+ * allows this goal to be achieved, apart from the unpreemptible,
+ * outstanding requests mentioned above.
+ *
+ * Unfortunately, regardless of which of the above two goals one wants
+ * to achieve, service trees need first to be updated to know whether
+ * the in-service queue must be preempted. To have service trees
+ * correctly updated, the in-service queue must be expired and
+ * rescheduled, and bfqq must be scheduled too. This is one of the
+ * most costly operations (in future versions, the scheduling
+ * mechanism may be re-designed in such a way to make it possible to
+ * know whether preemption is needed without needing to update service
+ * trees). In addition, queue preemptions almost always cause random
+ * I/O, and thus loss of throughput. Because of these facts, the next
+ * function adopts the following simple scheme to avoid both costly
+ * operations and too frequent preemptions: it requests the expiration
+ * of the in-service queue (unconditionally) only for queues that need
+ * to recover a hole, or that either are weight-raised or deserve to
+ * be weight-raised.
 */
 static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
                                                struct bfq_queue *bfqq,
-                                                bool arrived_in_time)
+                                                bool arrived_in_time,
+                                                bool wr_or_deserves_wr)
 {
        struct bfq_entity *entity = &bfqq->entity;
@@ -3807,14 +3956,85 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
        entity->budget = max_t(unsigned long, bfqq->max_budget,
                               bfq_serv_to_charge(bfqq->next_rq, bfqq));
        bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
-        return false;
+        return wr_or_deserves_wr;
+}
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+{
+        u64 dur;
+        if (bfqd->bfq_wr_max_time > 0)
+                return bfqd->bfq_wr_max_time;
+        dur = bfqd->RT_prod;
+        do_div(dur, bfqd->peak_rate);
+        /*
+         * Limit duration between 3 and 13 seconds. Tests show that
+         * higher values than 13 seconds often yield the opposite of
+         * the desired result, i.e., worsen responsiveness by letting
+         * non-interactive and non-soft-real-time applications
+         * preserve weight raising for a too long time interval.
+         *
+         * On the other end, lower values than 3 seconds make it
+         * difficult for most interactive tasks to complete their jobs
+         * before weight-raising finishes.
+         */
+        if (dur > msecs_to_jiffies(13000))
+                dur = msecs_to_jiffies(13000);
+        else if (dur < msecs_to_jiffies(3000))
+                dur = msecs_to_jiffies(3000);
+        return dur;
+}
+static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
+                                             struct bfq_queue *bfqq,
+                                             unsigned int old_wr_coeff,
+                                             bool wr_or_deserves_wr,
+                                             bool interactive)
+{
+        if (old_wr_coeff == 1 && wr_or_deserves_wr) {
+                /* start a weight-raising period */
+                bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                /* update wr duration */
+                bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+                /*
+                 * If needed, further reduce budget to make sure it is
+                 * close to bfqq's backlog, so as to reduce the
+                 * scheduling-error component due to a too large
+                 * budget. Do not care about throughput consequences,
+                 * but only about latency. Finally, do not assign a
+                 * too small budget either, to avoid increasing
+                 * latency by causing too frequent expirations.
+                 */
+                bfqq->entity.budget = min_t(unsigned long,
+                                            bfqq->entity.budget,
+                                            2 * bfq_min_budget(bfqd));
+        } else if (old_wr_coeff > 1) {
+                /* update wr duration */
+                bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+        }
+}
+static bool bfq_bfqq_idle_for_long_time(struct bfq_data *bfqd,
+                                        struct bfq_queue *bfqq)
+{
+        return bfqq->dispatched == 0 &&
+                time_is_before_jiffies(
+                        bfqq->budget_timeout +
+                        bfqd->bfq_wr_min_idle_time);
 }
 static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
                                             struct bfq_queue *bfqq,
-                                             struct request *rq)
+                                             int old_wr_coeff,
+                                             struct request *rq,
+                                             bool *interactive)
 {
-        bool bfqq_wants_to_preempt,
+        bool wr_or_deserves_wr, bfqq_wants_to_preempt,
+                idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
                /*
                 * See the comments on
                 * bfq_bfqq_update_budg_for_activation for
@@ -3827,12 +4047,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
        bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, rq->cmd_flags);
        /*
-         * Update budget and check whether bfqq may want to preempt
+         * bfqq deserves to be weight-raised if:
-         * the in-service queue.
+         * - it is sync,
+         * - it has been idle for enough time.
+         */
+        *interactive = idle_for_long_time;
+        wr_or_deserves_wr = bfqd->low_latency &&
+                (bfqq->wr_coeff > 1 ||
+                 (bfq_bfqq_sync(bfqq) && *interactive));
+        /*
+         * Using the last flag, update budget and check whether bfqq
+         * may want to preempt the in-service queue.
         */
        bfqq_wants_to_preempt =
                bfq_bfqq_update_budg_for_activation(bfqd, bfqq,
-                                                    arrived_in_time);
+                                                    arrived_in_time,
+                                                    wr_or_deserves_wr);
        if (!bfq_bfqq_IO_bound(bfqq)) {
                if (arrived_in_time) {
@@ -3844,6 +4075,16 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
                        bfqq->requests_within_timer = 0;
        }
+        if (bfqd->low_latency) {
+                bfq_update_bfqq_wr_on_rq_arrival(bfqd, bfqq,
+                                                 old_wr_coeff,
+                                                 wr_or_deserves_wr,
+                                                 *interactive);
+                if (old_wr_coeff != bfqq->wr_coeff)
+                        bfqq->entity.prio_changed = 1;
+        }
        bfq_add_bfqq_busy(bfqd, bfqq);
        /*
@@ -3857,6 +4098,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
         * function bfq_bfqq_update_budg_for_activation).
         */
        if (bfqd->in_service_queue && bfqq_wants_to_preempt &&
+            bfqd->in_service_queue->wr_coeff == 1 &&
            next_queue_may_preempt(bfqd))
                bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
                                false, BFQQE_PREEMPTED);
@@ -3867,6 +4109,8 @@ static void bfq_add_request(struct request *rq)
        struct bfq_queue *bfqq = RQ_BFQQ(rq);
        struct bfq_data *bfqd = bfqq->bfqd;
        struct request *next_rq, *prev;
+        unsigned int old_wr_coeff = bfqq->wr_coeff;
+        bool interactive = false;
        bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
        bfqq->queued[rq_is_sync(rq)]++;
@@ -3882,9 +4126,45 @@ static void bfq_add_request(struct request *rq)
        bfqq->next_rq = next_rq;
        if (!bfq_bfqq_busy(bfqq)) /* switching to busy ... */
-                bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, rq);
+                bfq_bfqq_handle_idle_busy_switch(bfqd, bfqq, old_wr_coeff,
-        else if (prev != bfqq->next_rq)
+                                                 rq, &interactive);
-                bfq_updated_next_req(bfqd, bfqq);
+        else {
+                if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) &&
+                    time_is_before_jiffies(
+                                bfqq->last_wr_start_finish +
+                                bfqd->bfq_wr_min_inter_arr_async)) {
+                        bfqq->wr_coeff = bfqd->bfq_wr_coeff;
+                        bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
+                        bfqq->entity.prio_changed = 1;
+                }
+                if (prev != bfqq->next_rq)
+                        bfq_updated_next_req(bfqd, bfqq);
+        }
+        /*
+         * Assign jiffies to last_wr_start_finish in the following
+         * cases:
+         *
+         * . if bfqq is not going to be weight-raised, because, for
+         *   non weight-raised queues, last_wr_start_finish stores the
+         *   arrival time of the last request; as of now, this piece
+         *   of information is used only for deciding whether to
+         *   weight-raise async queues
+         *
+         * . if bfqq is not weight-raised, because, if bfqq is now
+         *   switching to weight-raised, then last_wr_start_finish
+         *   stores the time when weight-raising starts
+         *
+         * . if bfqq is interactive, because, regardless of whether
+         *   bfqq is currently weight-raised, the weight-raising
+         *   period must start or restart (this case is considered
+         *   separately because it is not detected by the above
+         *   conditions, if bfqq is already weight-raised)
+         */
+        if (bfqd->low_latency &&
+                (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive))
+                bfqq->last_wr_start_finish = jiffies;
 }
 static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
@@ -4087,6 +4367,46 @@ end:
        bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
 }
+/* Must be called with bfqq != NULL */
+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+{
+        bfqq->wr_coeff = 1;
+        bfqq->wr_cur_max_time = 0;
+        /*
+         * Trigger a weight change on the next invocation of
+         * __bfq_entity_update_weight_prio.
+         */
+        bfqq->entity.prio_changed = 1;
+}
+static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
+                                    struct bfq_group *bfqg)
+{
+        int i, j;
+        for (i = 0; i < 2; i++)
+                for (j = 0; j < IOPRIO_BE_NR; j++)
+                        if (bfqg->async_bfqq[i][j])
+                                bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
+        if (bfqg->async_idle_bfqq)
+                bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
+}
+static void bfq_end_wr(struct bfq_data *bfqd)
+{
+        struct bfq_queue *bfqq;
+        spin_lock_irq(&bfqd->lock);
+        list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+                bfq_bfqq_end_wr(bfqq);
+        list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list)
+                bfq_bfqq_end_wr(bfqq);
+        bfq_end_wr_async(bfqd);
+        spin_unlock_irq(&bfqd->lock);
+}
 static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
                                struct bio *bio)
 {
@@ -4110,16 +4430,32 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
        return bfqq == RQ_BFQQ(rq);
 }
+/*
+ * Set the maximum time for the in-service queue to consume its
+ * budget. This prevents seeky processes from lowering the throughput.
+ * In practice, a time-slice service scheme is used with seeky
+ * processes.
+ */
+static void bfq_set_budget_timeout(struct bfq_data *bfqd,
+                                   struct bfq_queue *bfqq)
+{
+        bfqd->last_budget_start = ktime_get();
+        bfqq->budget_timeout = jiffies +
+                bfqd->bfq_timeout *
+                (bfqq->entity.weight / bfqq->entity.orig_weight);
+}
 static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
                                       struct bfq_queue *bfqq)
 {
        if (bfqq) {
                bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
-                bfq_mark_bfqq_budget_new(bfqq);
                bfq_clear_bfqq_fifo_expire(bfqq);
                bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8;
+                bfq_set_budget_timeout(bfqd, bfqq);
                bfq_log_bfqq(bfqd, bfqq,
                             "set_in_service_queue, cur-budget = %d",
                             bfqq->entity.budget);
@@ -4159,9 +4495,13 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
         */
        sl = bfqd->bfq_slice_idle;
        /*
-         * Grant only minimum idle time if the queue is seeky.
+         * Unless the queue is being weight-raised, grant only minimum
+         * idle time if the queue is seeky. A long idling is preserved
+         * for a weight-raised queue, because it is needed for
+         * guaranteeing to the queue its reserved share of the
+         * throughput.
         */
-        if (BFQQ_SEEKY(bfqq))
+        if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1)
                sl = min_t(u64, sl, BFQ_MIN_TT);
        bfqd->last_idling_start = ktime_get();
@@ -4171,27 +4511,6 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
 }
 /*
- * Set the maximum time for the in-service queue to consume its
- * budget. This prevents seeky processes from lowering the disk
- * throughput (always guaranteed with a time slice scheme as in CFQ).
- */
-static void bfq_set_budget_timeout(struct bfq_data *bfqd)
-{
-        struct bfq_queue *bfqq = bfqd->in_service_queue;
-        unsigned int timeout_coeff = bfqq->entity.weight /
-                                     bfqq->entity.orig_weight;
-        bfqd->last_budget_start = ktime_get();
-        bfq_clear_bfqq_budget_new(bfqq);
-        bfqq->budget_timeout = jiffies +
-                bfqd->bfq_timeout * timeout_coeff;
-        bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u",
-                jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff));
-}
-/*
 * In autotuning mode, max_budget is dynamically recomputed as the
 * amount of sectors transferred in timeout at the estimated peak
 * rate. This enables BFQ to utilize a full timeslice with a full
@@ -4204,6 +4523,42 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
                jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT;
 }
+/*
+ * Update parameters related to throughput and responsiveness, as a
+ * function of the estimated peak rate. See comments on
+ * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ */
+static void update_thr_responsiveness_params(struct bfq_data *bfqd)
+{
+        int dev_type = blk_queue_nonrot(bfqd->queue);
+        if (bfqd->bfq_user_max_budget == 0)
+                bfqd->bfq_max_budget =
+                        bfq_calc_max_budget(bfqd);
+        if (bfqd->device_speed == BFQ_BFQD_FAST &&
+            bfqd->peak_rate < device_speed_thresh[dev_type]) {
+                bfqd->device_speed = BFQ_BFQD_SLOW;
+                bfqd->RT_prod = R_slow[dev_type] *
+                        T_slow[dev_type];
+        } else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
+                   bfqd->peak_rate > device_speed_thresh[dev_type]) {
+                bfqd->device_speed = BFQ_BFQD_FAST;
+                bfqd->RT_prod = R_fast[dev_type] *
+                        T_fast[dev_type];
+        }
+        bfq_log(bfqd,
+"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
+                dev_type == 0 ? "ROT" : "NONROT",
+                bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
+                bfqd->device_speed == BFQ_BFQD_FAST ?
+                (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
+                (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
+                (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
+                BFQ_RATE_SHIFT);
+}
 static void bfq_reset_rate_computation(struct bfq_data *bfqd,
                                       struct request *rq)
 {
@@ -4315,9 +4670,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
        rate /= divisor; /* smoothing constant alpha = 1/divisor */
        bfqd->peak_rate += rate;
-        if (bfqd->bfq_user_max_budget == 0)
+        update_thr_responsiveness_params(bfqd);
-                bfqd->bfq_max_budget =
-                        bfq_calc_max_budget(bfqd);
 reset_computation:
        bfq_reset_rate_computation(bfqd, rq);
@@ -4439,9 +4792,18 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
 static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
-        if (RB_EMPTY_ROOT(&bfqq->sort_list))
+        if (RB_EMPTY_ROOT(&bfqq->sort_list)) {
+                if (bfqq->dispatched == 0)
+                        /*
+                         * Overloading budget_timeout field to store
+                         * the time at which the queue remains with no
+                         * backlog and no outstanding request; used by
+                         * the weight-raising mechanism.
+                         */
+                        bfqq->budget_timeout = jiffies;
                bfq_del_bfqq_busy(bfqd, bfqq, true);
-        else
+        } else
                bfq_requeue_bfqq(bfqd, bfqq);
        /*
@@ -4468,9 +4830,18 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
        struct request *next_rq;
        int budget, min_budget;
-        budget = bfqq->max_budget;
        min_budget = bfq_min_budget(bfqd);
+        if (bfqq->wr_coeff == 1)
+                budget = bfqq->max_budget;
+        else /*
+              * Use a constant, low budget for weight-raised queues,
+              * to help achieve a low latency. Keep it slightly higher
+              * than the minimum possible budget, to cause a little
+              * bit fewer expirations.
+              */
+                budget = 2 * min_budget;
        bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
                bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
        bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d",
@@ -4478,7 +4849,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
        bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
                bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
-        if (bfq_bfqq_sync(bfqq)) {
+        if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) {
                switch (reason) {
                /*
                 * Caveat: in all the following cases we trade latency
@@ -4577,7 +4948,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
                default:
                        return;
                }
-        } else {
+        } else if (!bfq_bfqq_sync(bfqq)) {
                /*
                 * Async queues get always the maximum possible
                 * budget, as for them we do not care about latency
@@ -4766,15 +5137,19 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
         * bandwidth, and not time, distribution with little unlucky
         * or quasi-sequential processes.
         */
-        if (slow ||
+        if (bfqq->wr_coeff == 1 &&
-            (reason == BFQQE_BUDGET_TIMEOUT &&
+            (slow ||
-             bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3))
+             (reason == BFQQE_BUDGET_TIMEOUT &&
+              bfq_bfqq_budget_left(bfqq) >=  entity->budget / 3)))
                bfq_bfqq_charge_time(bfqd, bfqq, delta);
        if (reason == BFQQE_TOO_IDLE &&
            entity->service <= 2 * entity->budget / 10)
                bfq_clear_bfqq_IO_bound(bfqq);
+        if (bfqd->low_latency && bfqq->wr_coeff == 1)
+                bfqq->last_wr_start_finish = jiffies;
        bfq_log_bfqq(bfqd, bfqq,
                "expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
                slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
@@ -4801,10 +5176,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
 */
 static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
 {
-        if (bfq_bfqq_budget_new(bfqq) ||
+        return time_is_before_eq_jiffies(bfqq->budget_timeout);
-            time_is_after_jiffies(bfqq->budget_timeout))
-                return false;
-        return true;
 }
 /*
@@ -4831,19 +5203,40 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
 /*
 * For a queue that becomes empty, device idling is allowed only if
- * this function returns true for the queue. And this function returns
+ * this function returns true for the queue. As a consequence, since
- * true only if idling is beneficial for throughput.
+ * device idling plays a critical role in both throughput boosting and
+ * service guarantees, the return value of this function plays a
+ * critical role in both these aspects as well.
+ *
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
+ *
+ * In more detail, the return value of this function is obtained by,
+ * first, computing a number of boolean variables that take into
+ * account throughput and service-guarantee issues, and, then,
+ * combining these variables in a logical expression. Most of the
+ * issues taken into account are not trivial. We discuss these issues
+ * individually while introducing the variables.
 */
 static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
 {
        struct bfq_data *bfqd = bfqq->bfqd;
-        bool idling_boosts_thr;
+        bool idling_boosts_thr, asymmetric_scenario;
        if (bfqd->strict_guarantees)
                return true;
        /*
-         * The value of the next variable is computed considering that
+         * The next variable takes into account the cases where idling
+         * boosts the throughput.
+         *
+         * The value of the variable is computed considering that
         * idling is usually beneficial for the throughput if:
         * (a) the device is not NCQ-capable, or
         * (b) regardless of the presence of NCQ, the request pattern
@@ -4857,13 +5250,80 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
        idling_boosts_thr = !bfqd->hw_tag || bfq_bfqq_IO_bound(bfqq);
        /*
-         * We have now the components we need to compute the return
+         * There is then a case where idling must be performed not for
-         * value of the function, which is true only if both the
+         * throughput concerns, but to preserve service guarantees. To
-         * following conditions hold:
+         * introduce it, we can note that allowing the drive to
+         * enqueue more than one request at a time, and hence
+         * delegating de facto final scheduling decisions to the
+         * drive's internal scheduler, causes loss of control on the
+         * actual request service order. In particular, the critical
+         * situation is when requests from different processes happens
+         * to be present, at the same time, in the internal queue(s)
+         * of the drive. In such a situation, the drive, by deciding
+         * the service order of the internally-queued requests, does
+         * determine also the actual throughput distribution among
+         * these processes. But the drive typically has no notion or
+         * concern about per-process throughput distribution, and
+         * makes its decisions only on a per-request basis. Therefore,
+         * the service distribution enforced by the drive's internal
+         * scheduler is likely to coincide with the desired
+         * device-throughput distribution only in a completely
+         * symmetric scenario where: (i) each of these processes must
+         * get the same throughput as the others; (ii) all these
+         * processes have the same I/O pattern (either sequential or
+         * random).  In fact, in such a scenario, the drive will tend
+         * to treat the requests of each of these processes in about
+         * the same way as the requests of the others, and thus to
+         * provide each of these processes with about the same
+         * throughput (which is exactly the desired throughput
+         * distribution). In contrast, in any asymmetric scenario,
+         * device idling is certainly needed to guarantee that bfqq
+         * receives its assigned fraction of the device throughput
+         * (see [1] for details).
+         *
+         * As for sub-condition (i), actually we check only whether
+         * bfqq is being weight-raised. In fact, if bfqq is not being
+         * weight-raised, we have that:
+         * - if the process associated with bfqq is not I/O-bound, then
+         *   it is not either latency- or throughput-critical; therefore
+         *   idling is not needed for bfqq;
+         * - if the process asociated with bfqq is I/O-bound, then
+         *   idling is already granted with bfqq (see the comments on
+         *   idling_boosts_thr).
+         *
+         * We do not check sub-condition (ii) at all, i.e., the next
+         * variable is true if and only if bfqq is being
+         * weight-raised. We do not need to control sub-condition (ii)
+         * for the following reason:
+         * - if bfqq is being weight-raised, then idling is already
+         *   guaranteed to bfqq by sub-condition (i);
+         * - if bfqq is not being weight-raised, then idling is
+         *   already guaranteed to bfqq (only) if it matters, i.e., if
+         *   bfqq is associated to a currently I/O-bound process (see
+         *   the above comment on sub-condition (i)).
+         *
+         * As a side note, it is worth considering that the above
+         * device-idling countermeasures may however fail in the
+         * following unlucky scenario: if idling is (correctly)
+         * disabled in a time period during which the symmetry
+         * sub-condition holds, and hence the device is allowed to
+         * enqueue many requests, but at some later point in time some
+         * sub-condition stops to hold, then it may become impossible
+         * to let requests be served in the desired order until all
+         * the requests already queued in the device have been served.
+         */
+        asymmetric_scenario = bfqq->wr_coeff > 1;
+        /*
+         * We have now all the components we need to compute the return
+         * value of the function, which is true only if both the following
+         * conditions hold:
         * 1) bfqq is sync, because idling make sense only for sync queues;
-         * 2) idling boosts the throughput.
+         * 2) idling either boosts the throughput (without issues), or
+         *    is necessary to preserve service guarantees.
         */
-        return bfq_bfqq_sync(bfqq) && idling_boosts_thr;
+        return bfq_bfqq_sync(bfqq) &&
+                (idling_boosts_thr || asymmetric_scenario);
 }
 /*
@@ -4986,6 +5446,43 @@ keep_queue:
        return bfqq;
 }
+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+        struct bfq_entity *entity = &bfqq->entity;
+        if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
+                bfq_log_bfqq(bfqd, bfqq,
+                        "raising period dur %u/%u msec, old coeff %u, w %d(%d)",
+                        jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
+                        jiffies_to_msecs(bfqq->wr_cur_max_time),
+                        bfqq->wr_coeff,
+                        bfqq->entity.weight, bfqq->entity.orig_weight);
+                if (entity->prio_changed)
+                        bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
+                /*
+                 * If too much time has elapsed from the beginning of
+                 * this weight-raising period, then end weight
+                 * raising.
+                 */
+                if (time_is_before_jiffies(bfqq->last_wr_start_finish +
+                                           bfqq->wr_cur_max_time)) {
+                        bfqq->last_wr_start_finish = jiffies;
+                        bfq_log_bfqq(bfqd, bfqq,
+                                     "wrais ending at %lu, rais_max_time %u",
+                                     bfqq->last_wr_start_finish,
+                                     jiffies_to_msecs(bfqq->wr_cur_max_time));
+                        bfq_bfqq_end_wr(bfqq);
+                }
+        }
+        /* Update weight both if it must be raised and if it must be lowered */
+        if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
+                __bfq_entity_update_weight_prio(
+                        bfq_entity_service_tree(entity),
+                        entity);
+}
 /*
 * Dispatch next request from bfqq.
 */
@@ -5001,6 +5498,19 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
        bfq_dispatch_remove(bfqd->queue, rq);
+        /*
+         * If weight raising has to terminate for bfqq, then next
+         * function causes an immediate update of bfqq's weight,
+         * without waiting for next activation. As a consequence, on
+         * expiration, bfqq will be timestamped as if has never been
+         * weight-raised during this service slot, even if it has
+         * received part or even most of the service as a
+         * weight-raised queue. This inflates bfqq's timestamps, which
+         * is beneficial, as bfqq is then more willing to leave the
+         * device immediately to possible other weight-raised queues.
+         */
+        bfq_update_wr_data(bfqd, bfqq);
        if (!bfqd->in_service_bic) {
                atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
                bfqd->in_service_bic = RQ_BIC(rq);
@@ -5306,6 +5816,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
        bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3;
        bfqq->budget_timeout = bfq_smallest_from_now();
+        bfqq->wr_coeff = 1;
+        bfqq->last_wr_start_finish = bfq_smallest_from_now();
        /* first request is almost certainly seeky */
        bfqq->seek_history = 1;
 }
@@ -5440,7 +5953,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
                (bfqd->hw_tag && BFQQ_SEEKY(bfqq)))
                enable_idle = 0;
        else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) {
-                if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)
+                if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle &&
+                        bfqq->wr_coeff == 1)
                        enable_idle = 0;
                else
                        enable_idle = 1;
@@ -5618,6 +6132,16 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
        bfqd->rq_in_driver--;
        bfqq->dispatched--;
+        if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
+                /*
+                 * Set budget_timeout (which we overload to store the
+                 * time at which the queue remains with no backlog and
+                 * no outstanding request; used by the weight-raising
+                 * mechanism).
+                 */
+                bfqq->budget_timeout = jiffies;
+        }
        now_ns = ktime_get_ns();
        bfqq->ttime.last_end_request = now_ns;
@@ -5655,10 +6179,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
         * or if we want to idle in case it has no pending requests.
         */
        if (bfqd->in_service_queue == bfqq) {
-                if (bfq_bfqq_budget_new(bfqq))
+                if (bfqq->dispatched == 0 && bfq_bfqq_must_idle(bfqq)) {
-                        bfq_set_budget_timeout(bfqd);
-                if (bfq_bfqq_must_idle(bfqq)) {
                        bfq_arm_slice_timer(bfqd);
                        return;
                } else if (bfq_may_expire_for_budg_timeout(bfqq))
@@ -5966,6 +6487,26 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
        bfqd->bfq_requests_within_timer = 120;
+        bfqd->low_latency = true;
+        /*
+         * Trade-off between responsiveness and fairness.
+         */
+        bfqd->bfq_wr_coeff = 30;
+        bfqd->bfq_wr_max_time = 0;
+        bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000);
+        bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500);
+        /*
+         * Begin by assuming, optimistically, that the device is a
+         * high-speed one, and that its peak rate is equal to 2/3 of
+         * the highest reference rate.
+         */
+        bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
+                        T_fast[blk_queue_nonrot(bfqd->queue)];
+        bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
+        bfqd->device_speed = BFQ_BFQD_FAST;
        spin_lock_init(&bfqd->lock);
        /*
@@ -6047,6 +6588,7 @@ SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2);
 SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
 SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
 SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0);
 #undef SHOW_FUNCTION
 #define USEC_SHOW_FUNCTION(__FUNC, __VAR)                               \
@@ -6167,6 +6709,22 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
        return ret;
 }
+static ssize_t bfq_low_latency_store(struct elevator_queue *e,
+                                     const char *page, size_t count)
+{
+        struct bfq_data *bfqd = e->elevator_data;
+        unsigned long uninitialized_var(__data);
+        int ret = bfq_var_store(&__data, (page), count);
+        if (__data > 1)
+                __data = 1;
+        if (__data == 0 && bfqd->low_latency != 0)
+                bfq_end_wr(bfqd);
+        bfqd->low_latency = __data;
+        return ret;
+}
 #define BFQ_ATTR(name) \
        __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store)
@@ -6180,6 +6738,7 @@ static struct elv_fs_entry bfq_attrs[] = {
        BFQ_ATTR(max_budget),
        BFQ_ATTR(timeout_sync),
        BFQ_ATTR(strict_guarantees),
+        BFQ_ATTR(low_latency),
        __ATTR_NULL
 };
@@ -6242,6 +6801,39 @@ static int __init bfq_init(void)
        if (bfq_slab_setup())
                goto err_pol_unreg;
+        /*
+         * Times to load large popular applications for the typical
+         * systems installed on the reference devices (see the
+         * comments before the definitions of the next two
+         * arrays). Actually, we use slightly slower values, as the
+         * estimated peak rate tends to be smaller than the actual
+         * peak rate.  The reason for this last fact is that estimates
+         * are computed over much shorter time intervals than the long
+         * intervals typically used for benchmarking. Why? First, to
+         * adapt more quickly to variations. Second, because an I/O
+         * scheduler cannot rely on a peak-rate-evaluation workload to
+         * be run for a long time.
+         */
+        T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
+        T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
+        T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+        T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
+        /*
+         * Thresholds that determine the switch between speed classes
+         * (see the comments before the definition of the array
+         * device_speed_thresh). These thresholds are biased towards
+         * transitions to the fast class. This is safer than the
+         * opposite bias. In fact, a wrong transition to the slow
+         * class results in short weight-raising periods, because the
+         * speed of the device then tends to be higher that the
+         * reference peak rate. On the opposite end, a wrong
+         * transition to the fast class tends to increase
+         * weight-raising periods, because of the opposite reason.
+         */
+        device_speed_thresh[0] = (4 * R_slow[0]) / 3;
+        device_speed_thresh[1] = (4 * R_slow[1]) / 3;
        ret = elv_register(&iosched_bfq_mq);
        if (ret)
                goto err_pol_unreg;