Merge tag 'for-linus-20180210' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe: "A few fixes to round off the merge window on the block side: - a set of bcache fixes by way of Michael Lyle, from the usual bcache suspects. - add a simple-to-hook-into function for bpf EIO error injection. - fix blk-wbt that mischarectized flushes as reads. Improve the logic so that flushes and writes are accounted as writes, and only reads as reads. From me. - fix requeue crash in BFQ, from Paolo" * tag 'for-linus-20180210' of git://git.kernel.dk/linux-block: block, bfq: add requeue-request hook bcache: fix for data collapse after re-attaching an attached device bcache: return attach error when no cache set exist bcache: set writeback_rate_update_seconds in range [1, 60] seconds bcache: fix for allocator and register thread race bcache: set error_limit correctly bcache: properly set task state in bch_writeback_thread() bcache: fix high CPU occupancy during journal bcache: add journal statistic block: Add should_fail_bio() for bpf error injection blk-wbt: account flush requests correctly
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-10 17:05:11 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-10 17:05:11 -0500
commit: 9454473c9dccb7b9d25e5baf915a082bfd490b33 (patch)
tree: 46f7f1a8886088e2f0184f1cf0e47c8ac12d4849
parent: cc5cb5af3a3363bc6f0530703895bf9c5fa2f159 (diff)
parent: 8525e5ff456592effe83640ea1702525e35b0363 (diff)
12 files changed, 212 insertions, 63 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 47e6ec7427c4..aeca22d91101 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
                }
                /*
-                 * We exploit the bfq_finish_request hook to decrement
+                 * We exploit the bfq_finish_requeue_request hook to
-                 * rq_in_driver, but bfq_finish_request will not be
+                 * decrement rq_in_driver, but
-                 * invoked on this request. So, to avoid unbalance,
+                 * bfq_finish_requeue_request will not be invoked on
-                 * just start this request, without incrementing
+                 * this request. So, to avoid unbalance, just start
-                 * rq_in_driver. As a negative consequence,
+                 * this request, without incrementing rq_in_driver. As
-                 * rq_in_driver is deceptively lower than it should be
+                 * a negative consequence, rq_in_driver is deceptively
-                 * while this request is in service. This may cause
+                 * lower than it should be while this request is in
-                 * bfq_schedule_dispatch to be invoked uselessly.
+                 * service. This may cause bfq_schedule_dispatch to be
+                 * invoked uselessly.
                 *
                 * As for implementing an exact solution, the
-                 * bfq_finish_request hook, if defined, is probably
+                 * bfq_finish_requeue_request hook, if defined, is
-                 * invoked also on this request. So, by exploiting
+                 * probably invoked also on this request. So, by
-                 * this hook, we could 1) increment rq_in_driver here,
+                 * exploiting this hook, we could 1) increment
-                 * and 2) decrement it in bfq_finish_request. Such a
+                 * rq_in_driver here, and 2) decrement it in
-                 * solution would let the value of the counter be
+                 * bfq_finish_requeue_request. Such a solution would
-                 * always accurate, but it would entail using an extra
+                 * let the value of the counter be always accurate,
-                 * interface function. This cost seems higher than the
+                 * but it would entail using an extra interface
-                 * benefit, being the frequency of non-elevator-private
+                 * function. This cost seems higher than the benefit,
+                 * being the frequency of non-elevator-private
                 * requests very low.
                 */
                goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
                                           unsigned int cmd_flags) {}
 #endif
+static void bfq_prepare_request(struct request *rq, struct bio *bio);
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                               bool at_head)
 {
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                else
                        list_add_tail(&rq->queuelist, &bfqd->dispatch);
        } else {
+                if (WARN_ON_ONCE(!bfqq)) {
+                        /*
+                         * This should never happen. Most likely rq is
+                         * a requeued regular request, being
+                         * re-inserted without being first
+                         * re-prepared. Do a prepare, to avoid
+                         * failure.
+                         */
+                        bfq_prepare_request(rq, rq->bio);
+                        bfqq = RQ_BFQQ(rq);
+                }
                idle_timer_disabled = __bfq_insert_request(bfqd, rq);
                /*
                 * Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
                bfq_schedule_dispatch(bfqd);
 }
-static void bfq_finish_request_body(struct bfq_queue *bfqq)
+static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
 {
        bfqq->allocated--;
        bfq_put_queue(bfqq);
 }
-static void bfq_finish_request(struct request *rq)
+/*
+ * Handle either a requeue or a finish for rq. The things to do are
+ * the same in both cases: all references to rq are to be dropped. In
+ * particular, rq is considered completed from the point of view of
+ * the scheduler.
+ */
+static void bfq_finish_requeue_request(struct request *rq)
 {
-        struct bfq_queue *bfqq;
+        struct bfq_queue *bfqq = RQ_BFQQ(rq);
        struct bfq_data *bfqd;
-        if (!rq->elv.icq)
+        /*
+         * Requeue and finish hooks are invoked in blk-mq without
+         * checking whether the involved request is actually still
+         * referenced in the scheduler. To handle this fact, the
+         * following two checks make this function exit in case of
+         * spurious invocations, for which there is nothing to do.
+         *
+         * First, check whether rq has nothing to do with an elevator.
+         */
+        if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
+                return;
+        /*
+         * rq either is not associated with any icq, or is an already
+         * requeued request that has not (yet) been re-inserted into
+         * a bfq_queue.
+         */
+        if (!rq->elv.icq || !bfqq)
                return;
-        bfqq = RQ_BFQQ(rq);
        bfqd = bfqq->bfqd;
        if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
                spin_lock_irqsave(&bfqd->lock, flags);
                bfq_completed_request(bfqq, bfqd);
-                bfq_finish_request_body(bfqq);
+                bfq_finish_requeue_request_body(bfqq);
                spin_unlock_irqrestore(&bfqd->lock, flags);
        } else {
                /*
                 * Request rq may be still/already in the scheduler,
-                 * in which case we need to remove it. And we cannot
+                 * in which case we need to remove it (this should
+                 * never happen in case of requeue). And we cannot
                 * defer such a check and removal, to avoid
                 * inconsistencies in the time interval from the end
                 * of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
                        bfqg_stats_update_io_remove(bfqq_group(bfqq),
                                                    rq->cmd_flags);
                }
-                bfq_finish_request_body(bfqq);
+                bfq_finish_requeue_request_body(bfqq);
        }
+        /*
+         * Reset private fields. In case of a requeue, this allows
+         * this function to correctly do nothing if it is spuriously
+         * invoked again on this same request (see the check at the
+         * beginning of the function). Probably, a better general
+         * design would be to prevent blk-mq from invoking the requeue
+         * or finish hooks of an elevator, for a request that is not
+         * referred by that elevator.
+         *
+         * Resetting the following fields would break the
+         * request-insertion logic if rq is re-inserted into a bfq
+         * internal queue, without a re-preparation. Here we assume
+         * that re-insertions of requeued requests, without
+         * re-preparation, can happen only for pass_through or at_head
+         * requests (which are not re-inserted into bfq internal
+         * queues).
+         */
        rq->elv.priv[0] = NULL;
        rq->elv.priv[1] = NULL;
 }
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
        .ops.mq = {
                .limit_depth            = bfq_limit_depth,
                .prepare_request        = bfq_prepare_request,
-                .finish_request         = bfq_finish_request,
+                .requeue_request        = bfq_finish_requeue_request,
+                .finish_request         = bfq_finish_requeue_request,
                .exit_icq               = bfq_exit_icq,
                .insert_requests        = bfq_insert_requests,
                .dispatch_request       = bfq_dispatch_request,
diff --git a/block/blk-core.c b/block/blk-core.c
index d0d104268f1a..2d1a7bbe0634 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
+#include <linux/bpf.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
        return false;
 }
+static noinline int should_fail_bio(struct bio *bio)
+{
+        if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+                return -EIO;
+        return 0;
+}
+ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
 /*
 * Remap block n of partition p to block n+start(p) of the disk.
 */
@@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
        if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
                goto not_supported;
-        if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+        if (should_fail_bio(bio))
                goto end_io;
        if (!bio->bi_partno) {
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index ae8de9780085..f92fc84b5e2c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
 static int wbt_data_dir(const struct request *rq)
 {
-        return rq_data_dir(rq);
+        const int op = req_op(rq);
+        if (op == REQ_OP_READ)
+                return READ;
+        else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+                return WRITE;
+        /* don't account */
+        return -1;
 }
 int wbt_init(struct request_queue *q)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6cc6c0f9c3a9..458e1d38577d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,8 +287,10 @@ do {									\
                        break;                                          \
                                                                        \
                mutex_unlock(&(ca)->set->bucket_lock);                  \
-                if (kthread_should_stop())                              \
+                if (kthread_should_stop()) {                            \
+                        set_current_state(TASK_RUNNING);                \
                        return 0;                                       \
+                }                                                       \
                                                                        \
                schedule();                                             \
                mutex_lock(&(ca)->set->bucket_lock);                    \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 5e2d4e80198e..12e5197f186c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -658,10 +658,15 @@ struct cache_set {
        atomic_long_t           writeback_keys_done;
        atomic_long_t           writeback_keys_failed;
+        atomic_long_t           reclaim;
+        atomic_long_t           flush_write;
+        atomic_long_t           retry_flush_write;
        enum                    {
                ON_ERROR_UNREGISTER,
                ON_ERROR_PANIC,
        }                       on_error;
+#define DEFAULT_IO_ERROR_LIMIT 8
        unsigned                error_limit;
        unsigned                error_decay;
@@ -675,6 +680,8 @@ struct cache_set {
 #define BUCKET_HASH_BITS        12
        struct hlist_head       bucket_hash[1 << BUCKET_HASH_BITS];
+        DECLARE_HEAP(struct btree *, flush_btree);
 };
 struct bbio {
@@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);
 int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
 void bch_cached_dev_detach(struct cached_dev *);
 void bch_cached_dev_run(struct cached_dev *);
 void bcache_device_stop(struct bcache_device *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index bf3a48aa9a9a..fad9fe8817eb 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1869,14 +1869,17 @@ void bch_initial_gc_finish(struct cache_set *c)
         */
        for_each_cache(ca, c, i) {
                for_each_bucket(b, ca) {
-                        if (fifo_full(&ca->free[RESERVE_PRIO]))
+                        if (fifo_full(&ca->free[RESERVE_PRIO]) &&
+                            fifo_full(&ca->free[RESERVE_BTREE]))
                                break;
                        if (bch_can_invalidate_bucket(ca, b) &&
                            !GC_MARK(b)) {
                                __bch_invalidate_one_bucket(ca, b);
-                                fifo_push(&ca->free[RESERVE_PRIO],
+                                if (!fifo_push(&ca->free[RESERVE_PRIO],
-                                          b - ca->buckets);
+                                   b - ca->buckets))
+                                        fifo_push(&ca->free[RESERVE_BTREE],
+                                                  b - ca->buckets);
                        }
                }
        }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index a87165c1d8e5..1b736b860739 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -368,6 +368,12 @@ err:
 }
 /* Journalling */
+#define journal_max_cmp(l, r) \
+        (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
+         fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
+#define journal_min_cmp(l, r) \
+        (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
+         fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
 static void btree_flush_write(struct cache_set *c)
 {
@@ -375,28 +381,41 @@ static void btree_flush_write(struct cache_set *c)
         * Try to find the btree node with that references the oldest journal
         * entry, best is our current candidate and is locked if non NULL:
         */
-        struct btree *b, *best;
+        struct btree *b;
-        unsigned i;
+        int i;
+        atomic_long_inc(&c->flush_write);
 retry:
-        best = NULL;
+        spin_lock(&c->journal.lock);
+        if (heap_empty(&c->flush_btree)) {
-        for_each_cached_btree(b, c, i)
+                for_each_cached_btree(b, c, i)
-                if (btree_current_write(b)->journal) {
+                        if (btree_current_write(b)->journal) {
-                        if (!best)
+                                if (!heap_full(&c->flush_btree))
-                                best = b;
+                                        heap_add(&c->flush_btree, b,
-                        else if (journal_pin_cmp(c,
+                                                 journal_max_cmp);
-                                        btree_current_write(best)->journal,
+                                else if (journal_max_cmp(b,
-                                        btree_current_write(b)->journal)) {
+                                         heap_peek(&c->flush_btree))) {
-                                best = b;
+                                        c->flush_btree.data[0] = b;
+                                        heap_sift(&c->flush_btree, 0,
+                                                  journal_max_cmp);
+                                }
                        }
-                }
-        b = best;
+                for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
+                        heap_sift(&c->flush_btree, i, journal_min_cmp);
+        }
+        b = NULL;
+        heap_pop(&c->flush_btree, b, journal_min_cmp);
+        spin_unlock(&c->journal.lock);
        if (b) {
                mutex_lock(&b->write_lock);
                if (!btree_current_write(b)->journal) {
                        mutex_unlock(&b->write_lock);
                        /* We raced */
+                        atomic_long_inc(&c->retry_flush_write);
                        goto retry;
                }
@@ -476,6 +495,8 @@ static void journal_reclaim(struct cache_set *c)
        unsigned iter, n = 0;
        atomic_t p;
+        atomic_long_inc(&c->reclaim);
        while (!atomic_read(&fifo_front(&c->journal.pin)))
                fifo_pop(&c->journal.pin, p);
@@ -819,7 +840,8 @@ int bch_journal_alloc(struct cache_set *c)
        j->w[0].c = c;
        j->w[1].c = c;
-        if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
+        if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
+            !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
            !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
            !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
                return -ENOMEM;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 133b81225ea9..312895788036 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -957,7 +957,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
        cached_dev_put(dc);
 }
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+                          uint8_t *set_uuid)
 {
        uint32_t rtime = cpu_to_le32(get_seconds());
        struct uuid_entry *u;
@@ -965,7 +966,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
        bdevname(dc->bdev, buf);
-        if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
+        if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
+            (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
                return -ENOENT;
        if (dc->disk.c) {
@@ -1194,7 +1196,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
        list_add(&dc->list, &uncached_devices);
        list_for_each_entry(c, &bch_cache_sets, list)
-                bch_cached_dev_attach(dc, c);
+                bch_cached_dev_attach(dc, c, NULL);
        if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
            BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
@@ -1553,7 +1555,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
        c->congested_read_threshold_us  = 2000;
        c->congested_write_threshold_us = 20000;
-        c->error_limit  = 8 << IO_ERROR_SHIFT;
+        c->error_limit  = DEFAULT_IO_ERROR_LIMIT;
        return c;
 err:
@@ -1716,7 +1718,7 @@ static void run_cache_set(struct cache_set *c)
        bcache_write_super(c);
        list_for_each_entry_safe(dc, t, &uncached_devices, list)
-                bch_cached_dev_attach(dc, c);
+                bch_cached_dev_attach(dc, c, NULL);
        flash_devs_run(c);
@@ -1833,6 +1835,7 @@ void bch_cache_release(struct kobject *kobj)
 static int cache_alloc(struct cache *ca)
 {
        size_t free;
+        size_t btree_buckets;
        struct bucket *b;
        __module_get(THIS_MODULE);
@@ -1840,9 +1843,19 @@ static int cache_alloc(struct cache *ca)
        bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
+        /*
+         * when ca->sb.njournal_buckets is not zero, journal exists,
+         * and in bch_journal_replay(), tree node may split,
+         * so bucket of RESERVE_BTREE type is needed,
+         * the worst situation is all journal buckets are valid journal,
+         * and all the keys need to replay,
+         * so the number of  RESERVE_BTREE type buckets should be as much
+         * as journal buckets
+         */
+        btree_buckets = ca->sb.njournal_buckets ?: 8;
        free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
-        if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+        if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
            !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b4184092c727..78cd7bd50fdd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -65,6 +65,9 @@ read_attribute(bset_tree_stats);
 read_attribute(state);
 read_attribute(cache_read_races);
+read_attribute(reclaim);
+read_attribute(flush_write);
+read_attribute(retry_flush_write);
 read_attribute(writeback_keys_done);
 read_attribute(writeback_keys_failed);
 read_attribute(io_errors);
@@ -195,7 +198,7 @@ STORE(__cached_dev)
 {
        struct cached_dev *dc = container_of(kobj, struct cached_dev,
                                             disk.kobj);
-        ssize_t v = size;
+        ssize_t v;
        struct cache_set *c;
        struct kobj_uevent_env *env;
@@ -215,7 +218,9 @@ STORE(__cached_dev)
        sysfs_strtoul_clamp(writeback_rate,
                            dc->writeback_rate.rate, 1, INT_MAX);
-        d_strtoul_nonzero(writeback_rate_update_seconds);
+        sysfs_strtoul_clamp(writeback_rate_update_seconds,
+                            dc->writeback_rate_update_seconds,
+                            1, WRITEBACK_RATE_UPDATE_SECS_MAX);
        d_strtoul(writeback_rate_i_term_inverse);
        d_strtoul_nonzero(writeback_rate_p_term_inverse);
@@ -267,17 +272,20 @@ STORE(__cached_dev)
        }
        if (attr == &sysfs_attach) {
-                if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
+                uint8_t         set_uuid[16];
+                if (bch_parse_uuid(buf, set_uuid) < 16)
                        return -EINVAL;
+                v = -ENOENT;
                list_for_each_entry(c, &bch_cache_sets, list) {
-                        v = bch_cached_dev_attach(dc, c);
+                        v = bch_cached_dev_attach(dc, c, set_uuid);
                        if (!v)
                                return size;
                }
                pr_err("Can't attach %s: cache set not found", buf);
-                size = v;
+                return v;
        }
        if (attr == &sysfs_detach && dc->disk.c)
@@ -545,6 +553,15 @@ SHOW(__bch_cache_set)
        sysfs_print(cache_read_races,
                    atomic_long_read(&c->cache_read_races));
+        sysfs_print(reclaim,
+                    atomic_long_read(&c->reclaim));
+        sysfs_print(flush_write,
+                    atomic_long_read(&c->flush_write));
+        sysfs_print(retry_flush_write,
+                    atomic_long_read(&c->retry_flush_write));
        sysfs_print(writeback_keys_done,
                    atomic_long_read(&c->writeback_keys_done));
        sysfs_print(writeback_keys_failed,
@@ -556,7 +573,7 @@ SHOW(__bch_cache_set)
        /* See count_io_errors for why 88 */
        sysfs_print(io_error_halflife,  c->error_decay * 88);
-        sysfs_print(io_error_limit,     c->error_limit >> IO_ERROR_SHIFT);
+        sysfs_print(io_error_limit,     c->error_limit);
        sysfs_hprint(congested,
                     ((uint64_t) bch_get_congested(c)) << 9);
@@ -656,7 +673,7 @@ STORE(__bch_cache_set)
        }
        if (attr == &sysfs_io_error_limit)
-                c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
+                c->error_limit = strtoul_or_return(buf);
        /* See count_io_errors() for why 88 */
        if (attr == &sysfs_io_error_halflife)
@@ -731,6 +748,9 @@ static struct attribute *bch_cache_set_internal_files[] = {
        &sysfs_bset_tree_stats,
        &sysfs_cache_read_races,
+        &sysfs_reclaim,
+        &sysfs_flush_write,
+        &sysfs_retry_flush_write,
        &sysfs_writeback_keys_done,
        &sysfs_writeback_keys_failed,
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 4df4c5c1cab2..a6763db7f061 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -112,6 +112,8 @@ do {									\
 #define heap_full(h)    ((h)->used == (h)->size)
+#define heap_empty(h)   ((h)->used == 0)
 #define DECLARE_FIFO(type, name)                                        \
        struct {                                                        \
                size_t front, back, size, mask;                         \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 51306a19ab03..f1d2fc15abcc 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
        while (!kthread_should_stop()) {
                down_write(&dc->writeback_lock);
+                set_current_state(TASK_INTERRUPTIBLE);
                if (!atomic_read(&dc->has_dirty) ||
                    (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
                     !dc->writeback_running)) {
                        up_write(&dc->writeback_lock);
-                        set_current_state(TASK_INTERRUPTIBLE);
-                        if (kthread_should_stop())
+                        if (kthread_should_stop()) {
+                                set_current_state(TASK_RUNNING);
                                return 0;
+                        }
                        schedule();
                        continue;
                }
+                set_current_state(TASK_RUNNING);
                searched_full_index = refill_dirty(dc);
@@ -652,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
        dc->writeback_rate.rate         = 1024;
        dc->writeback_rate_minimum      = 8;
-        dc->writeback_rate_update_seconds = 5;
+        dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
        dc->writeback_rate_p_term_inverse = 40;
        dc->writeback_rate_i_term_inverse = 10000;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 66f1c527fa24..587b25599856 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -8,6 +8,9 @@
 #define MAX_WRITEBACKS_IN_PASS  5
 #define MAX_WRITESIZE_IN_PASS   5000    /* *512b */
+#define WRITEBACK_RATE_UPDATE_SECS_MAX          60
+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT      5
 /*
 * 14 (16384ths) is chosen here as something that each backing device
 * should be a reasonable fraction of the share, and not to blow up
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-10 17:05:11 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-10 17:05:11 -0500
commit	9454473c9dccb7b9d25e5baf915a082bfd490b33 (patch)
tree	46f7f1a8886088e2f0184f1cf0e47c8ac12d4849
parent	cc5cb5af3a3363bc6f0530703895bf9c5fa2f159 (diff)
parent	8525e5ff456592effe83640ea1702525e35b0363 (diff)