1 files changed, 279 insertions, 218 deletions
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 08eb5295c18d..adc8e6393829 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -29,13 +29,16 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-sched.h"
 #include "blk-mq-tag.h"
-#include "blk-stat.h"
-/* Scheduling domains. */
+/*
+ * Scheduling domains: the device is divided into multiple domains based on the
+ * request type.
+ */
 enum {
        KYBER_READ,
-        KYBER_SYNC_WRITE,
+        KYBER_WRITE,
-        KYBER_OTHER, /* Async writes, discard, etc. */
+        KYBER_DISCARD,
+        KYBER_OTHER,
        KYBER_NUM_DOMAINS,
 };
@@ -49,25 +52,82 @@ enum {
 };
 /*
- * Initial device-wide depths for each scheduling domain.
+ * Maximum device-wide depth for each scheduling domain.
 *
- * Even for fast devices with lots of tags like NVMe, you can saturate
+ * Even for fast devices with lots of tags like NVMe, you can saturate the
- * the device with only a fraction of the maximum possible queue depth.
+ * device with only a fraction of the maximum possible queue depth. So, we cap
- * So, we cap these to a reasonable value.
+ * these to a reasonable value.
 */
 static const unsigned int kyber_depth[] = {
        [KYBER_READ] = 256,
-        [KYBER_SYNC_WRITE] = 128,
+        [KYBER_WRITE] = 128,
-        [KYBER_OTHER] = 64,
+        [KYBER_DISCARD] = 64,
+        [KYBER_OTHER] = 16,
 };
 /*
- * Scheduling domain batch sizes. We favor reads.
+ * Default latency targets for each scheduling domain.
+ */
+static const u64 kyber_latency_targets[] = {
+        [KYBER_READ] = 2 * NSEC_PER_MSEC,
+        [KYBER_WRITE] = 10 * NSEC_PER_MSEC,
+        [KYBER_DISCARD] = 5 * NSEC_PER_SEC,
+};
+/*
+ * Batch size (number of requests we'll dispatch in a row) for each scheduling
+ * domain.
 */
 static const unsigned int kyber_batch_size[] = {
        [KYBER_READ] = 16,
-        [KYBER_SYNC_WRITE] = 8,
+        [KYBER_WRITE] = 8,
-        [KYBER_OTHER] = 8,
+        [KYBER_DISCARD] = 1,
+        [KYBER_OTHER] = 1,
+};
+/*
+ * Requests latencies are recorded in a histogram with buckets defined relative
+ * to the target latency:
+ *
+ * <= 1/4 * target latency
+ * <= 1/2 * target latency
+ * <= 3/4 * target latency
+ * <= target latency
+ * <= 1 1/4 * target latency
+ * <= 1 1/2 * target latency
+ * <= 1 3/4 * target latency
+ * > 1 3/4 * target latency
+ */
+enum {
+        /*
+         * The width of the latency histogram buckets is
+         * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
+         */
+        KYBER_LATENCY_SHIFT = 2,
+        /*
+         * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
+         * thus, "good".
+         */
+        KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
+        /* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
+        KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
+};
+/*
+ * We measure both the total latency and the I/O latency (i.e., latency after
+ * submitting to the device).
+ */
+enum {
+        KYBER_TOTAL_LATENCY,
+        KYBER_IO_LATENCY,
+};
+/*
+ * Per-cpu latency histograms: total latency and I/O latency for each scheduling
+ * domain except for KYBER_OTHER.
+ */
+struct kyber_cpu_latency {
+        atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
 };
 /*
@@ -84,14 +144,9 @@ struct kyber_ctx_queue {
 } ____cacheline_aligned_in_smp;
 struct kyber_queue_data {
-        struct request_queue *q;
-        struct blk_stat_callback *cb;
        /*
-         * The device is divided into multiple scheduling domains based on the
+         * Each scheduling domain has a limited number of in-flight requests
-         * request type. Each domain has a fixed number of in-flight requests of
+         * device-wide, limited by these tokens.
-         * that type device-wide, limited by these tokens.
         */
        struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
@@ -101,8 +156,19 @@ struct kyber_queue_data {
         */
        unsigned int async_depth;
+        struct kyber_cpu_latency __percpu *cpu_latency;
+        /* Timer for stats aggregation and adjusting domain tokens. */
+        struct timer_list timer;
+        unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
+        unsigned long latency_timeout[KYBER_OTHER];
+        int domain_p99[KYBER_OTHER];
        /* Target latencies in nanoseconds. */
-        u64 read_lat_nsec, write_lat_nsec;
+        u64 latency_targets[KYBER_OTHER];
 };
 struct kyber_hctx_data {
@@ -122,182 +188,165 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 static unsigned int kyber_sched_domain(unsigned int op)
 {
-        if ((op & REQ_OP_MASK) == REQ_OP_READ)
+        switch (op & REQ_OP_MASK) {
+        case REQ_OP_READ:
                return KYBER_READ;
-        else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
+        case REQ_OP_WRITE:
-                return KYBER_SYNC_WRITE;
+                return KYBER_WRITE;
-        else
+        case REQ_OP_DISCARD:
+                return KYBER_DISCARD;
+        default:
                return KYBER_OTHER;
+        }
 }
-enum {
+static void flush_latency_buckets(struct kyber_queue_data *kqd,
-        NONE = 0,
+                                  struct kyber_cpu_latency *cpu_latency,
-        GOOD = 1,
+                                  unsigned int sched_domain, unsigned int type)
-        GREAT = 2,
-        BAD = -1,
-        AWFUL = -2,
-};
-#define IS_GOOD(status) ((status) > 0)
-#define IS_BAD(status) ((status) < 0)
-static int kyber_lat_status(struct blk_stat_callback *cb,
-                            unsigned int sched_domain, u64 target)
 {
-        u64 latency;
+        unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
+        atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
-        if (!cb->stat[sched_domain].nr_samples)
+        unsigned int bucket;
-                return NONE;
-        latency = cb->stat[sched_domain].mean;
+        for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
-        if (latency >= 2 * target)
+                buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
-                return AWFUL;
-        else if (latency > target)
-                return BAD;
-        else if (latency <= target / 2)
-                return GREAT;
-        else /* (latency <= target) */
-                return GOOD;
 }
 /*
- * Adjust the read or synchronous write depth given the status of reads and
+ * Calculate the histogram bucket with the given percentile rank, or -1 if there
- * writes. The goal is that the latencies of the two domains are fair (i.e., if
+ * aren't enough samples yet.
- * one is good, then the other is good).
 */
-static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
+static int calculate_percentile(struct kyber_queue_data *kqd,
-                                  unsigned int sched_domain, int this_status,
+                                unsigned int sched_domain, unsigned int type,
-                                  int other_status)
+                                unsigned int percentile)
 {
-        unsigned int orig_depth, depth;
+        unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
+        unsigned int bucket, samples = 0, percentile_samples;
+        for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
+                samples += buckets[bucket];
+        if (!samples)
+                return -1;
        /*
-         * If this domain had no samples, or reads and writes are both good or
+         * We do the calculation once we have 500 samples or one second passes
-         * both bad, don't adjust the depth.
+         * since the first sample was recorded, whichever comes first.
         */
-        if (this_status == NONE ||
+        if (!kqd->latency_timeout[sched_domain])
-            (IS_GOOD(this_status) && IS_GOOD(other_status)) ||
+                kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
-            (IS_BAD(this_status) && IS_BAD(other_status)))
+        if (samples < 500 &&
-                return;
+            time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
+                return -1;
-        orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
+        }
+        kqd->latency_timeout[sched_domain] = 0;
-        if (other_status == NONE) {
+        percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
-                depth++;
+        for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
-        } else {
+                if (buckets[bucket] >= percentile_samples)
-                switch (this_status) {
-                case GOOD:
-                        if (other_status == AWFUL)
-                                depth -= max(depth / 4, 1U);
-                        else
-                                depth -= max(depth / 8, 1U);
-                        break;
-                case GREAT:
-                        if (other_status == AWFUL)
-                                depth /= 2;
-                        else
-                                depth -= max(depth / 4, 1U);
                        break;
-                case BAD:
+                percentile_samples -= buckets[bucket];
-                        depth++;
-                        break;
-                case AWFUL:
-                        if (other_status == GREAT)
-                                depth += 2;
-                        else
-                                depth++;
-                        break;
-                }
        }
+        memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
+        return bucket;
+}
+static void kyber_resize_domain(struct kyber_queue_data *kqd,
+                                unsigned int sched_domain, unsigned int depth)
+{
        depth = clamp(depth, 1U, kyber_depth[sched_domain]);
-        if (depth != orig_depth)
+        if (depth != kqd->domain_tokens[sched_domain].sb.depth)
                sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
 }
-/*
+static void kyber_timer_fn(struct timer_list *t)
- * Adjust the depth of other requests given the status of reads and synchronous
+{
- * writes. As long as either domain is doing fine, we don't throttle, but if
+        struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
- * both domains are doing badly, we throttle heavily.
+        unsigned int sched_domain;
- */
+        int cpu;
-static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
+        bool bad = false;
-                                     int read_status, int write_status,
-                                     bool have_samples)
+        /* Sum all of the per-cpu latency histograms. */
-{
+        for_each_online_cpu(cpu) {
-        unsigned int orig_depth, depth;
+                struct kyber_cpu_latency *cpu_latency;
-        int status;
+                cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
-        orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
+                for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
+                        flush_latency_buckets(kqd, cpu_latency, sched_domain,
-        if (read_status == NONE && write_status == NONE) {
+                                              KYBER_TOTAL_LATENCY);
-                depth += 2;
+                        flush_latency_buckets(kqd, cpu_latency, sched_domain,
-        } else if (have_samples) {
+                                              KYBER_IO_LATENCY);
-                if (read_status == NONE)
-                        status = write_status;
-                else if (write_status == NONE)
-                        status = read_status;
-                else
-                        status = max(read_status, write_status);
-                switch (status) {
-                case GREAT:
-                        depth += 2;
-                        break;
-                case GOOD:
-                        depth++;
-                        break;
-                case BAD:
-                        depth -= max(depth / 4, 1U);
-                        break;
-                case AWFUL:
-                        depth /= 2;
-                        break;
                }
        }
-        depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
+        /*
-        if (depth != orig_depth)
+         * Check if any domains have a high I/O latency, which might indicate
-                sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
+         * congestion in the device. Note that we use the p90; we don't want to
-}
+         * be too sensitive to outliers here.
+         */
-/*
+        for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
- * Apply heuristics for limiting queue depths based on gathered latency
+                int p90;
- * statistics.
- */
-static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
-{
-        struct kyber_queue_data *kqd = cb->data;
-        int read_status, write_status;
-        read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
-        write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
-        kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
+                p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
-        kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
+                                           90);
-        kyber_adjust_other_depth(kqd, read_status, write_status,
+                if (p90 >= KYBER_GOOD_BUCKETS)
-                                 cb->stat[KYBER_OTHER].nr_samples != 0);
+                        bad = true;
+        }
        /*
-         * Continue monitoring latencies if we aren't hitting the targets or
+         * Adjust the scheduling domain depths. If we determined that there was
-         * we're still throttling other requests.
+         * congestion, we throttle all domains with good latencies. Either way,
+         * we ease up on throttling domains with bad latencies.
         */
-        if (!blk_stat_is_active(kqd->cb) &&
+        for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
-            ((IS_BAD(read_status) || IS_BAD(write_status) ||
+                unsigned int orig_depth, depth;
-              kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
+                int p99;
-                blk_stat_activate_msecs(kqd->cb, 100);
+                p99 = calculate_percentile(kqd, sched_domain,
+                                           KYBER_TOTAL_LATENCY, 99);
+                /*
+                 * This is kind of subtle: different domains will not
+                 * necessarily have enough samples to calculate the latency
+                 * percentiles during the same window, so we have to remember
+                 * the p99 for the next time we observe congestion; once we do,
+                 * we don't want to throttle again until we get more data, so we
+                 * reset it to -1.
+                 */
+                if (bad) {
+                        if (p99 < 0)
+                                p99 = kqd->domain_p99[sched_domain];
+                        kqd->domain_p99[sched_domain] = -1;
+                } else if (p99 >= 0) {
+                        kqd->domain_p99[sched_domain] = p99;
+                }
+                if (p99 < 0)
+                        continue;
+                /*
+                 * If this domain has bad latency, throttle less. Otherwise,
+                 * throttle more iff we determined that there is congestion.
+                 *
+                 * The new depth is scaled linearly with the p99 latency vs the
+                 * latency target. E.g., if the p99 is 3/4 of the target, then
+                 * we throttle down to 3/4 of the current depth, and if the p99
+                 * is 2x the target, then we double the depth.
+                 */
+                if (bad || p99 >= KYBER_GOOD_BUCKETS) {
+                        orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
+                        depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
+                        kyber_resize_domain(kqd, sched_domain, depth);
+                }
+        }
 }
-static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
+static unsigned int kyber_sched_tags_shift(struct request_queue *q)
 {
        /*
         * All of the hardware queues have the same depth, so we can just grab
         * the shift of the first one.
         */
-        return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+        return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
-}
-static int kyber_bucket_fn(const struct request *rq)
-{
-        return kyber_sched_domain(rq->cmd_flags);
 }
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
@@ -307,16 +356,17 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
        int ret = -ENOMEM;
        int i;
-        kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
+        kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
        if (!kqd)
                goto err;
-        kqd->q = q;
-        kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
+        kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
-                                          KYBER_NUM_DOMAINS, kqd);
+                                            GFP_KERNEL | __GFP_ZERO);
-        if (!kqd->cb)
+        if (!kqd->cpu_latency)
                goto err_kqd;
+        timer_setup(&kqd->timer, kyber_timer_fn, 0);
        for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                WARN_ON(!kyber_depth[i]);
                WARN_ON(!kyber_batch_size[i]);
@@ -326,20 +376,22 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
                if (ret) {
                        while (--i >= 0)
                                sbitmap_queue_free(&kqd->domain_tokens[i]);
-                        goto err_cb;
+                        goto err_buckets;
                }
        }
-        shift = kyber_sched_tags_shift(kqd);
+        for (i = 0; i < KYBER_OTHER; i++) {
-        kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+                kqd->domain_p99[i] = -1;
+                kqd->latency_targets[i] = kyber_latency_targets[i];
+        }
-        kqd->read_lat_nsec = 2000000ULL;
+        shift = kyber_sched_tags_shift(q);
-        kqd->write_lat_nsec = 10000000ULL;
+        kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
        return kqd;
-err_cb:
+err_buckets:
-        blk_stat_free_callback(kqd->cb);
+        free_percpu(kqd->cpu_latency);
 err_kqd:
        kfree(kqd);
 err:
@@ -361,25 +413,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
                return PTR_ERR(kqd);
        }
+        blk_stat_enable_accounting(q);
        eq->elevator_data = kqd;
        q->elevator = eq;
-        blk_stat_add_callback(q, kqd->cb);
        return 0;
 }
 static void kyber_exit_sched(struct elevator_queue *e)
 {
        struct kyber_queue_data *kqd = e->elevator_data;
-        struct request_queue *q = kqd->q;
        int i;
-        blk_stat_remove_callback(q, kqd->cb);
+        del_timer_sync(&kqd->timer);
        for (i = 0; i < KYBER_NUM_DOMAINS; i++)
                sbitmap_queue_free(&kqd->domain_tokens[i]);
-        blk_stat_free_callback(kqd->cb);
+        free_percpu(kqd->cpu_latency);
        kfree(kqd);
 }
@@ -547,40 +598,44 @@ static void kyber_finish_request(struct request *rq)
        rq_clear_domain_token(kqd, rq);
 }
-static void kyber_completed_request(struct request *rq, u64 now)
+static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
+                               unsigned int sched_domain, unsigned int type,
+                               u64 target, u64 latency)
 {
-        struct request_queue *q = rq->q;
+        unsigned int bucket;
-        struct kyber_queue_data *kqd = q->elevator->elevator_data;
+        u64 divisor;
-        unsigned int sched_domain;
-        u64 latency, target;
-        /*
+        if (latency > 0) {
-         * Check if this request met our latency goal. If not, quickly gather
+                divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
-         * some statistics and start throttling.
+                bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
-         */
+                               KYBER_LATENCY_BUCKETS - 1);
-        sched_domain = kyber_sched_domain(rq->cmd_flags);
+        } else {
-        switch (sched_domain) {
+                bucket = 0;
-        case KYBER_READ:
-                target = kqd->read_lat_nsec;
-                break;
-        case KYBER_SYNC_WRITE:
-                target = kqd->write_lat_nsec;
-                break;
-        default:
-                return;
        }
-        /* If we are already monitoring latencies, don't check again. */
+        atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
-        if (blk_stat_is_active(kqd->cb))
+}
-                return;
-        if (now < rq->io_start_time_ns)
+static void kyber_completed_request(struct request *rq, u64 now)
+{
+        struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
+        struct kyber_cpu_latency *cpu_latency;
+        unsigned int sched_domain;
+        u64 target;
+        sched_domain = kyber_sched_domain(rq->cmd_flags);
+        if (sched_domain == KYBER_OTHER)
                return;
-        latency = now - rq->io_start_time_ns;
+        cpu_latency = get_cpu_ptr(kqd->cpu_latency);
+        target = kqd->latency_targets[sched_domain];
+        add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
+                           target, now - rq->start_time_ns);
+        add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
+                           now - rq->io_start_time_ns);
+        put_cpu_ptr(kqd->cpu_latency);
-        if (latency > target)
+        timer_reduce(&kqd->timer, jiffies + HZ / 10);
-                blk_stat_activate_msecs(kqd->cb, 10);
 }
 struct flush_kcq_data {
@@ -778,17 +833,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
        return false;
 }
-#define KYBER_LAT_SHOW_STORE(op)                                        \
+#define KYBER_LAT_SHOW_STORE(domain, name)                              \
-static ssize_t kyber_##op##_lat_show(struct elevator_queue *e,          \
+static ssize_t kyber_##name##_lat_show(struct elevator_queue *e,        \
-                                     char *page)                        \
+                                       char *page)                      \
 {                                                                       \
        struct kyber_queue_data *kqd = e->elevator_data;                \
                                                                        \
-        return sprintf(page, "%llu\n", kqd->op##_lat_nsec);             \
+        return sprintf(page, "%llu\n", kqd->latency_targets[domain]);   \
 }                                                                       \
                                                                        \
-static ssize_t kyber_##op##_lat_store(struct elevator_queue *e,         \
+static ssize_t kyber_##name##_lat_store(struct elevator_queue *e,       \
-                                      const char *page, size_t count)   \
+                                        const char *page, size_t count) \
 {                                                                       \
        struct kyber_queue_data *kqd = e->elevator_data;                \
        unsigned long long nsec;                                        \
@@ -798,12 +853,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e,		\
        if (ret)                                                        \
                return ret;                                             \
                                                                        \
-        kqd->op##_lat_nsec = nsec;                                      \
+        kqd->latency_targets[domain] = nsec;                            \
                                                                        \
        return count;                                                   \
 }
-KYBER_LAT_SHOW_STORE(read);
+KYBER_LAT_SHOW_STORE(KYBER_READ, read);
-KYBER_LAT_SHOW_STORE(write);
+KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
 #undef KYBER_LAT_SHOW_STORE
 #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
@@ -870,7 +925,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
        return 0;                                                       \
 }
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
-KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
 #undef KYBER_DEBUGFS_DOMAIN_ATTRS
@@ -892,8 +948,11 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m)
        case KYBER_READ:
                seq_puts(m, "READ\n");
                break;
-        case KYBER_SYNC_WRITE:
+        case KYBER_WRITE:
-                seq_puts(m, "SYNC_WRITE\n");
+                seq_puts(m, "WRITE\n");
+                break;
+        case KYBER_DISCARD:
+                seq_puts(m, "DISCARD\n");
                break;
        case KYBER_OTHER:
                seq_puts(m, "OTHER\n");
@@ -918,7 +977,8 @@ static int kyber_batching_show(void *data, struct seq_file *m)
        {#name "_tokens", 0400, kyber_##name##_tokens_show}
 static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
        KYBER_QUEUE_DOMAIN_ATTRS(read),
-        KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
+        KYBER_QUEUE_DOMAIN_ATTRS(write),
+        KYBER_QUEUE_DOMAIN_ATTRS(discard),
        KYBER_QUEUE_DOMAIN_ATTRS(other),
        {"async_depth", 0400, kyber_async_depth_show},
        {},
@@ -930,7 +990,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
        {#name "_waiting", 0400, kyber_##name##_waiting_show}
 static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
        KYBER_HCTX_DOMAIN_ATTRS(read),
-        KYBER_HCTX_DOMAIN_ATTRS(sync_write),
+        KYBER_HCTX_DOMAIN_ATTRS(write),
+        KYBER_HCTX_DOMAIN_ATTRS(discard),
        KYBER_HCTX_DOMAIN_ATTRS(other),
        {"cur_domain", 0400, kyber_cur_domain_show},
        {"batching", 0400, kyber_batching_show},