bcache: set max writeback rate when I/O request is idle

Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable@vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli@suse.de> Tested-by: Kai Krakow <kai@kaishome.de> Tested-by: Stefan Priebe <s.priebe@profihost.ag> Cc: Michael Lyle <mlyle@lyle.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Coly Li <colyli@suse.de> 2018-08-09 03:48:49 -0400
committer: Jens Axboe <axboe@kernel.dk> 2018-08-09 10:21:15 -0400
commit: ea8c5356d39048bc94bae068228f51ddbecc6b89 (patch)
tree: 26eb52bd46bd1b5099115799be67d483c8ec18e8
parent: b467a6ac0b4bf57ec8c2329212e8a8a0231a2ef2 (diff)
7 files changed, 138 insertions, 45 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b393b3fd06b6..05f82ff6f016 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -328,13 +328,6 @@ struct cached_dev {
         */
        atomic_t                has_dirty;
-        /*
-         * Set to zero by things that touch the backing volume-- except
-         * writeback.  Incremented by writeback.  Used to determine when to
-         * accelerate idle writeback.
-         */
-        atomic_t                backing_idle;
        struct bch_ratelimit    writeback_rate;
        struct delayed_work     writeback_rate_update;
@@ -515,6 +508,8 @@ struct cache_set {
        struct cache_accounting accounting;
        unsigned long           flags;
+        atomic_t                idle_counter;
+        atomic_t                at_max_writeback_rate;
        struct cache_sb         sb;
@@ -524,6 +519,7 @@ struct cache_set {
        struct bcache_device    **devices;
        unsigned                devices_max_used;
+        atomic_t                attached_dev_nr;
        struct list_head        cached_devs;
        uint64_t                cached_dev_sectors;
        atomic_long_t           flash_dev_dirty_sectors;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 914d501ad1e0..7dbe8b6316a0 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1103,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
                generic_make_request(bio);
 }
+static void quit_max_writeback_rate(struct cache_set *c,
+                                    struct cached_dev *this_dc)
+{
+        int i;
+        struct bcache_device *d;
+        struct cached_dev *dc;
+        /*
+         * mutex bch_register_lock may compete with other parallel requesters,
+         * or attach/detach operations on other backing device. Waiting to
+         * the mutex lock may increase I/O request latency for seconds or more.
+         * To avoid such situation, if mutext_trylock() failed, only writeback
+         * rate of current cached device is set to 1, and __update_write_back()
+         * will decide writeback rate of other cached devices (remember now
+         * c->idle_counter is 0 already).
+         */
+        if (mutex_trylock(&bch_register_lock)) {
+                for (i = 0; i < c->devices_max_used; i++) {
+                        if (!c->devices[i])
+                                continue;
+                        if (UUID_FLASH_ONLY(&c->uuids[i]))
+                                continue;
+                        d = c->devices[i];
+                        dc = container_of(d, struct cached_dev, disk);
+                        /*
+                         * set writeback rate to default minimum value,
+                         * then let update_writeback_rate() to decide the
+                         * upcoming rate.
+                         */
+                        atomic_long_set(&dc->writeback_rate.rate, 1);
+                }
+                mutex_unlock(&bch_register_lock);
+        } else
+                atomic_long_set(&this_dc->writeback_rate.rate, 1);
+}
 /* Cached devices - read & write stuff */
 static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1120,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
                return BLK_QC_T_NONE;
        }
-        atomic_set(&dc->backing_idle, 0);
+        if (likely(d->c)) {
-        generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
+                if (atomic_read(&d->c->idle_counter))
+                        atomic_set(&d->c->idle_counter, 0);
+                /*
+                 * If at_max_writeback_rate of cache set is true and new I/O
+                 * comes, quit max writeback rate of all cached devices
+                 * attached to this cache set, and set at_max_writeback_rate
+                 * to false.
+                 */
+                if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
+                        atomic_set(&d->c->at_max_writeback_rate, 0);
+                        quit_max_writeback_rate(d->c, dc);
+                }
+        }
+        generic_start_io_acct(q,
+                              bio_op(bio),
+                              bio_sectors(bio),
+                              &d->disk->part0);
        bio_set_dev(bio, dc->bdev);
        bio->bi_iter.bi_sector += dc->sb.data_offset;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1e85cbb4c159..55a37641aa95 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d)
 {
        lockdep_assert_held(&bch_register_lock);
+        atomic_dec(&d->c->attached_dev_nr);
        if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
                struct uuid_entry *u = d->c->uuids + d->id;
@@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
        bch_cached_dev_run(dc);
        bcache_device_link(&dc->disk, c, "bdev");
+        atomic_inc(&c->attached_dev_nr);
        /* Allow the writeback thread to proceed */
        up_write(&dc->writeback_lock);
@@ -1696,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
        c->block_bits           = ilog2(sb->block_size);
        c->nr_uuids             = bucket_bytes(c) / sizeof(struct uuid_entry);
        c->devices_max_used     = 0;
+        atomic_set(&c->attached_dev_nr, 0);
        c->btree_pages          = bucket_pages(c);
        if (c->btree_pages > BTREE_MAX_PAGES)
                c->btree_pages = max_t(int, c->btree_pages / 4,
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3e9d3459a224..6e88142514fb 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -171,7 +171,8 @@ SHOW(__bch_cached_dev)
        var_printf(writeback_running,   "%i");
        var_print(writeback_delay);
        var_print(writeback_percent);
-        sysfs_hprint(writeback_rate,    wb ? dc->writeback_rate.rate << 9 : 0);
+        sysfs_hprint(writeback_rate,
+                     wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
        sysfs_hprint(io_errors,         atomic_read(&dc->io_errors));
        sysfs_printf(io_error_limit,    "%i", dc->error_limit);
        sysfs_printf(io_disable,        "%i", dc->io_disable);
@@ -193,7 +194,9 @@ SHOW(__bch_cached_dev)
                 * Except for dirty and target, other values should
                 * be 0 if writeback is not running.
                 */
-                bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0);
+                bch_hprint(rate,
+                           wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
+                              : 0);
                bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
                bch_hprint(target, dc->writeback_rate_target << 9);
                bch_hprint(proportional,
@@ -261,8 +264,12 @@ STORE(__cached_dev)
        sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
-        sysfs_strtoul_clamp(writeback_rate,
+        if (attr == &sysfs_writeback_rate) {
-                            dc->writeback_rate.rate, 1, INT_MAX);
+                int v;
+                sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX);
+                atomic_long_set(&dc->writeback_rate.rate, v);
+        }
        sysfs_strtoul_clamp(writeback_rate_update_seconds,
                            dc->writeback_rate_update_seconds,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index fc479b026d6d..b15256bcf0e7 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
        uint64_t now = local_clock();
-        d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+        d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
        /* Bound the time.  Don't let us fall further than 2 seconds behind
         * (this prevents unnecessary backlog that would make it impossible
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cced87f8eb27..f7b0133c9d2f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -442,7 +442,7 @@ struct bch_ratelimit {
         * Rate at which we want to do work, in units per second
         * The units here correspond to the units passed to bch_next_delay()
         */
-        uint32_t                rate;
+        atomic_long_t           rate;
 };
 static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 912e969fedba..481d4cf38ac0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
        dc->writeback_rate_proportional = proportional_scaled;
        dc->writeback_rate_integral_scaled = integral_scaled;
-        dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;
+        dc->writeback_rate_change = new_rate -
-        dc->writeback_rate.rate = new_rate;
+                        atomic_long_read(&dc->writeback_rate.rate);
+        atomic_long_set(&dc->writeback_rate.rate, new_rate);
        dc->writeback_rate_target = target;
 }
+static bool set_at_max_writeback_rate(struct cache_set *c,
+                                       struct cached_dev *dc)
+{
+        /*
+         * Idle_counter is increased everytime when update_writeback_rate() is
+         * called. If all backing devices attached to the same cache set have
+         * identical dc->writeback_rate_update_seconds values, it is about 6
+         * rounds of update_writeback_rate() on each backing device before
+         * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
+         * to each dc->writeback_rate.rate.
+         * In order to avoid extra locking cost for counting exact dirty cached
+         * devices number, c->attached_dev_nr is used to calculate the idle
+         * throushold. It might be bigger if not all cached device are in write-
+         * back mode, but it still works well with limited extra rounds of
+         * update_writeback_rate().
+         */
+        if (atomic_inc_return(&c->idle_counter) <
+            atomic_read(&c->attached_dev_nr) * 6)
+                return false;
+        if (atomic_read(&c->at_max_writeback_rate) != 1)
+                atomic_set(&c->at_max_writeback_rate, 1);
+        atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
+        /* keep writeback_rate_target as existing value */
+        dc->writeback_rate_proportional = 0;
+        dc->writeback_rate_integral_scaled = 0;
+        dc->writeback_rate_change = 0;
+        /*
+         * Check c->idle_counter and c->at_max_writeback_rate agagain in case
+         * new I/O arrives during before set_at_max_writeback_rate() returns.
+         * Then the writeback rate is set to 1, and its new value should be
+         * decided via __update_writeback_rate().
+         */
+        if ((atomic_read(&c->idle_counter) <
+             atomic_read(&c->attached_dev_nr) * 6) ||
+            !atomic_read(&c->at_max_writeback_rate))
+                return false;
+        return true;
+}
 static void update_writeback_rate(struct work_struct *work)
 {
        struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
                return;
        }
-        down_read(&dc->writeback_lock);
+        if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
+                /*
-        if (atomic_read(&dc->has_dirty) &&
+                 * If the whole cache set is idle, set_at_max_writeback_rate()
-            dc->writeback_percent)
+                 * will set writeback rate to a max number. Then it is
-                __update_writeback_rate(dc);
+                 * unncessary to update writeback rate for an idle cache set
+                 * in maximum writeback rate number(s).
+                 */
+                if (!set_at_max_writeback_rate(c, dc)) {
+                        down_read(&dc->writeback_lock);
+                        __update_writeback_rate(dc);
+                        up_read(&dc->writeback_lock);
+                }
+        }
-        up_read(&dc->writeback_lock);
        /*
         * CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc)
                delay = writeback_delay(dc, size);
-                /* If the control system would wait for at least half a
-                 * second, and there's been no reqs hitting the backing disk
-                 * for awhile: use an alternate mode where we have at most
-                 * one contiguous set of writebacks in flight at a time.  If
-                 * someone wants to do IO it will be quick, as it will only
-                 * have to contend with one operation in flight, and we'll
-                 * be round-tripping data to the backing disk as quickly as
-                 * it can accept it.
-                 */
-                if (delay >= HZ / 2) {
-                        /* 3 means at least 1.5 seconds, up to 7.5 if we
-                         * have slowed way down.
-                         */
-                        if (atomic_inc_return(&dc->backing_idle) >= 3) {
-                                /* Wait for current I/Os to finish */
-                                closure_sync(&cl);
-                                /* And immediately launch a new set. */
-                                delay = 0;
-                        }
-                }
                while (!kthread_should_stop() &&
                       !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
                       delay) {
@@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
        dc->writeback_running           = true;
        dc->writeback_percent           = 10;
        dc->writeback_delay             = 30;
-        dc->writeback_rate.rate         = 1024;
+        atomic_long_set(&dc->writeback_rate.rate, 1024);
        dc->writeback_rate_minimum      = 8;
        dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
author	Coly Li <colyli@suse.de>	2018-08-09 03:48:49 -0400
committer	Jens Axboe <axboe@kernel.dk>	2018-08-09 10:21:15 -0400
commit	ea8c5356d39048bc94bae068228f51ddbecc6b89 (patch)
tree	26eb52bd46bd1b5099115799be67d483c8ec18e8
parent	b467a6ac0b4bf57ec8c2329212e8a8a0231a2ef2 (diff)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b393b3fd06b6..05f82ff6f016 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h
@@ -328,13 +328,6 @@ struct cached_dev {
328	*/	328	*/
329	atomic_t has_dirty;	329	atomic_t has_dirty;
330		330
331	/*
332	* Set to zero by things that touch the backing volume-- except
333	* writeback. Incremented by writeback. Used to determine when to
334	* accelerate idle writeback.
335	*/
336	atomic_t backing_idle;
337
338	struct bch_ratelimit writeback_rate;	331	struct bch_ratelimit writeback_rate;
339	struct delayed_work writeback_rate_update;	332	struct delayed_work writeback_rate_update;
340		333
@@ -515,6 +508,8 @@ struct cache_set {
515	struct cache_accounting accounting;	508	struct cache_accounting accounting;
516		509
517	unsigned long flags;	510	unsigned long flags;
		511	atomic_t idle_counter;
		512	atomic_t at_max_writeback_rate;
518		513
519	struct cache_sb sb;	514	struct cache_sb sb;
520		515
@@ -524,6 +519,7 @@ struct cache_set {
524		519
525	struct bcache_device **devices;	520	struct bcache_device **devices;
526	unsigned devices_max_used;	521	unsigned devices_max_used;
		522	atomic_t attached_dev_nr;
527	struct list_head cached_devs;	523	struct list_head cached_devs;
528	uint64_t cached_dev_sectors;	524	uint64_t cached_dev_sectors;
529	atomic_long_t flash_dev_dirty_sectors;	525	atomic_long_t flash_dev_dirty_sectors;


diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 914d501ad1e0..7dbe8b6316a0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c
@@ -1103,6 +1103,44 @@ static void detached_dev_do_request(struct bcache_device d, struct bio bio)
1103	generic_make_request(bio);	1103	generic_make_request(bio);
1104	}	1104	}
1105		1105
		1106	static void quit_max_writeback_rate(struct cache_set *c,
		1107	struct cached_dev *this_dc)
		1108	{
		1109	int i;
		1110	struct bcache_device *d;
		1111	struct cached_dev *dc;
		1112
		1113	/*
		1114	* mutex bch_register_lock may compete with other parallel requesters,
		1115	* or attach/detach operations on other backing device. Waiting to
		1116	* the mutex lock may increase I/O request latency for seconds or more.
		1117	* To avoid such situation, if mutext_trylock() failed, only writeback
		1118	* rate of current cached device is set to 1, and __update_write_back()
		1119	* will decide writeback rate of other cached devices (remember now
		1120	* c->idle_counter is 0 already).
		1121	*/
		1122	if (mutex_trylock(&bch_register_lock)) {
		1123	for (i = 0; i < c->devices_max_used; i++) {
		1124	if (!c->devices[i])
		1125	continue;
		1126
		1127	if (UUID_FLASH_ONLY(&c->uuids[i]))
		1128	continue;
		1129
		1130	d = c->devices[i];
		1131	dc = container_of(d, struct cached_dev, disk);
		1132	/*
		1133	* set writeback rate to default minimum value,
		1134	* then let update_writeback_rate() to decide the
		1135	* upcoming rate.
		1136	*/
		1137	atomic_long_set(&dc->writeback_rate.rate, 1);
		1138	}
		1139	mutex_unlock(&bch_register_lock);
		1140	} else
		1141	atomic_long_set(&this_dc->writeback_rate.rate, 1);
		1142	}
		1143
1106	/* Cached devices - read & write stuff */	1144	/* Cached devices - read & write stuff */
1107		1145
1108	static blk_qc_t cached_dev_make_request(struct request_queue *q,	1146	static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -1120,8 +1158,25 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
1120	return BLK_QC_T_NONE;	1158	return BLK_QC_T_NONE;
1121	}	1159	}
1122		1160
1123	atomic_set(&dc->backing_idle, 0);	1161	if (likely(d->c)) {
1124	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);	1162	if (atomic_read(&d->c->idle_counter))
		1163	atomic_set(&d->c->idle_counter, 0);
		1164	/*
		1165	* If at_max_writeback_rate of cache set is true and new I/O
		1166	* comes, quit max writeback rate of all cached devices
		1167	* attached to this cache set, and set at_max_writeback_rate
		1168	* to false.
		1169	*/
		1170	if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
		1171	atomic_set(&d->c->at_max_writeback_rate, 0);
		1172	quit_max_writeback_rate(d->c, dc);
		1173	}
		1174	}
		1175
		1176	generic_start_io_acct(q,
		1177	bio_op(bio),
		1178	bio_sectors(bio),
		1179	&d->disk->part0);
1125		1180
1126	bio_set_dev(bio, dc->bdev);	1181	bio_set_dev(bio, dc->bdev);
1127	bio->bi_iter.bi_sector += dc->sb.data_offset;	1182	bio->bi_iter.bi_sector += dc->sb.data_offset;


diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1e85cbb4c159..55a37641aa95 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c
@@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d)
696	{	696	{
697	lockdep_assert_held(&bch_register_lock);	697	lockdep_assert_held(&bch_register_lock);
698		698
		699	atomic_dec(&d->c->attached_dev_nr);
		700
699	if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {	701	if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
700	struct uuid_entry *u = d->c->uuids + d->id;	702	struct uuid_entry *u = d->c->uuids + d->id;
701		703
@@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev dc, struct cache_set c,
1144		1146
1145	bch_cached_dev_run(dc);	1147	bch_cached_dev_run(dc);
1146	bcache_device_link(&dc->disk, c, "bdev");	1148	bcache_device_link(&dc->disk, c, "bdev");
		1149	atomic_inc(&c->attached_dev_nr);
1147		1150
1148	/* Allow the writeback thread to proceed */	1151	/* Allow the writeback thread to proceed */
1149	up_write(&dc->writeback_lock);	1152	up_write(&dc->writeback_lock);
@@ -1696,6 +1699,7 @@ struct cache_set bch_cache_set_alloc(struct cache_sb sb)
1696	c->block_bits = ilog2(sb->block_size);	1699	c->block_bits = ilog2(sb->block_size);
1697	c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);	1700	c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
1698	c->devices_max_used = 0;	1701	c->devices_max_used = 0;
		1702	atomic_set(&c->attached_dev_nr, 0);
1699	c->btree_pages = bucket_pages(c);	1703	c->btree_pages = bucket_pages(c);
1700	if (c->btree_pages > BTREE_MAX_PAGES)	1704	if (c->btree_pages > BTREE_MAX_PAGES)
1701	c->btree_pages = max_t(int, c->btree_pages / 4,	1705	c->btree_pages = max_t(int, c->btree_pages / 4,


diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 3e9d3459a224..6e88142514fb 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c
@@ -171,7 +171,8 @@ SHOW(__bch_cached_dev)
171	var_printf(writeback_running, "%i");	171	var_printf(writeback_running, "%i");
172	var_print(writeback_delay);	172	var_print(writeback_delay);
173	var_print(writeback_percent);	173	var_print(writeback_percent);
174	sysfs_hprint(writeback_rate, wb ? dc->writeback_rate.rate << 9 : 0);	174	sysfs_hprint(writeback_rate,
		175	wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
175	sysfs_hprint(io_errors, atomic_read(&dc->io_errors));	176	sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
176	sysfs_printf(io_error_limit, "%i", dc->error_limit);	177	sysfs_printf(io_error_limit, "%i", dc->error_limit);
177	sysfs_printf(io_disable, "%i", dc->io_disable);	178	sysfs_printf(io_disable, "%i", dc->io_disable);
@@ -193,7 +194,9 @@ SHOW(__bch_cached_dev)
193	* Except for dirty and target, other values should	194	* Except for dirty and target, other values should
194	* be 0 if writeback is not running.	195	* be 0 if writeback is not running.
195	*/	196	*/
196	bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0);	197	bch_hprint(rate,
		198	wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
		199	: 0);
197	bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);	200	bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
198	bch_hprint(target, dc->writeback_rate_target << 9);	201	bch_hprint(target, dc->writeback_rate_target << 9);
199	bch_hprint(proportional,	202	bch_hprint(proportional,
@@ -261,8 +264,12 @@ STORE(__cached_dev)
261		264
262	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);	265	sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
263		266
264	sysfs_strtoul_clamp(writeback_rate,	267	if (attr == &sysfs_writeback_rate) {
265	dc->writeback_rate.rate, 1, INT_MAX);	268	int v;
		269
		270	sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX);
		271	atomic_long_set(&dc->writeback_rate.rate, v);
		272	}
266		273
267	sysfs_strtoul_clamp(writeback_rate_update_seconds,	274	sysfs_strtoul_clamp(writeback_rate_update_seconds,
268	dc->writeback_rate_update_seconds,	275	dc->writeback_rate_update_seconds,


diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index fc479b026d6d..b15256bcf0e7 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c
@@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
200	{	200	{
201	uint64_t now = local_clock();	201	uint64_t now = local_clock();
202		202
203	d->next += div_u64(done * NSEC_PER_SEC, d->rate);	203	d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
204		204
205	/* Bound the time. Don't let us fall further than 2 seconds behind	205	/* Bound the time. Don't let us fall further than 2 seconds behind
206	* (this prevents unnecessary backlog that would make it impossible	206	* (this prevents unnecessary backlog that would make it impossible


diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cced87f8eb27..f7b0133c9d2f 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h
@@ -442,7 +442,7 @@ struct bch_ratelimit {
442	* Rate at which we want to do work, in units per second	442	* Rate at which we want to do work, in units per second
443	* The units here correspond to the units passed to bch_next_delay()	443	* The units here correspond to the units passed to bch_next_delay()
444	*/	444	*/
445	uint32_t rate;	445	atomic_long_t rate;
446	};	446	};
447		447
448	static inline void bch_ratelimit_reset(struct bch_ratelimit *d)	448	static inline void bch_ratelimit_reset(struct bch_ratelimit *d)


diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 912e969fedba..481d4cf38ac0 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c
@@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc)
104		104
105	dc->writeback_rate_proportional = proportional_scaled;	105	dc->writeback_rate_proportional = proportional_scaled;
106	dc->writeback_rate_integral_scaled = integral_scaled;	106	dc->writeback_rate_integral_scaled = integral_scaled;
107	dc->writeback_rate_change = new_rate - dc->writeback_rate.rate;	107	dc->writeback_rate_change = new_rate -
108	dc->writeback_rate.rate = new_rate;	108	atomic_long_read(&dc->writeback_rate.rate);
		109	atomic_long_set(&dc->writeback_rate.rate, new_rate);
109	dc->writeback_rate_target = target;	110	dc->writeback_rate_target = target;
110	}	111	}
111		112
		113	static bool set_at_max_writeback_rate(struct cache_set *c,
		114	struct cached_dev *dc)
		115	{
		116	/*
		117	* Idle_counter is increased everytime when update_writeback_rate() is
		118	* called. If all backing devices attached to the same cache set have
		119	* identical dc->writeback_rate_update_seconds values, it is about 6
		120	* rounds of update_writeback_rate() on each backing device before
		121	* c->at_max_writeback_rate is set to 1, and then max wrteback rate set
		122	* to each dc->writeback_rate.rate.
		123	* In order to avoid extra locking cost for counting exact dirty cached
		124	* devices number, c->attached_dev_nr is used to calculate the idle
		125	* throushold. It might be bigger if not all cached device are in write-
		126	* back mode, but it still works well with limited extra rounds of
		127	* update_writeback_rate().
		128	*/
		129	if (atomic_inc_return(&c->idle_counter) <
		130	atomic_read(&c->attached_dev_nr) * 6)
		131	return false;
		132
		133	if (atomic_read(&c->at_max_writeback_rate) != 1)
		134	atomic_set(&c->at_max_writeback_rate, 1);
		135
		136	atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
		137
		138	/* keep writeback_rate_target as existing value */
		139	dc->writeback_rate_proportional = 0;
		140	dc->writeback_rate_integral_scaled = 0;
		141	dc->writeback_rate_change = 0;
		142
		143	/*
		144	* Check c->idle_counter and c->at_max_writeback_rate agagain in case
		145	* new I/O arrives during before set_at_max_writeback_rate() returns.
		146	* Then the writeback rate is set to 1, and its new value should be
		147	* decided via __update_writeback_rate().
		148	*/
		149	if ((atomic_read(&c->idle_counter) <
		150	atomic_read(&c->attached_dev_nr) * 6) \|\|
		151	!atomic_read(&c->at_max_writeback_rate))
		152	return false;
		153
		154	return true;
		155	}
		156
112	static void update_writeback_rate(struct work_struct *work)	157	static void update_writeback_rate(struct work_struct *work)
113	{	158	{
114	struct cached_dev *dc = container_of(to_delayed_work(work),	159	struct cached_dev *dc = container_of(to_delayed_work(work),
@@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work)
136	return;	181	return;
137	}	182	}
138		183
139	down_read(&dc->writeback_lock);	184	if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
140		185	/*
141	if (atomic_read(&dc->has_dirty) &&	186	* If the whole cache set is idle, set_at_max_writeback_rate()
142	dc->writeback_percent)	187	* will set writeback rate to a max number. Then it is
143	__update_writeback_rate(dc);	188	* unncessary to update writeback rate for an idle cache set
		189	* in maximum writeback rate number(s).
		190	*/
		191	if (!set_at_max_writeback_rate(c, dc)) {
		192	down_read(&dc->writeback_lock);
		193	__update_writeback_rate(dc);
		194	up_read(&dc->writeback_lock);
		195	}
		196	}
144		197
145	up_read(&dc->writeback_lock);
146		198
147	/*	199	/*
148	* CACHE_SET_IO_DISABLE might be set via sysfs interface,	200	* CACHE_SET_IO_DISABLE might be set via sysfs interface,
@@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc)
422		474
423	delay = writeback_delay(dc, size);	475	delay = writeback_delay(dc, size);
424		476
425	/* If the control system would wait for at least half a
426	* second, and there's been no reqs hitting the backing disk
427	* for awhile: use an alternate mode where we have at most
428	* one contiguous set of writebacks in flight at a time. If
429	* someone wants to do IO it will be quick, as it will only
430	* have to contend with one operation in flight, and we'll
431	* be round-tripping data to the backing disk as quickly as
432	* it can accept it.
433	*/
434	if (delay >= HZ / 2) {
435	/* 3 means at least 1.5 seconds, up to 7.5 if we
436	* have slowed way down.
437	*/
438	if (atomic_inc_return(&dc->backing_idle) >= 3) {
439	/* Wait for current I/Os to finish */
440	closure_sync(&cl);
441	/* And immediately launch a new set. */
442	delay = 0;
443	}
444	}
445
446	while (!kthread_should_stop() &&	477	while (!kthread_should_stop() &&
447	!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&	478	!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
448	delay) {	479	delay) {
@@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
741	dc->writeback_running = true;	772	dc->writeback_running = true;
742	dc->writeback_percent = 10;	773	dc->writeback_percent = 10;
743	dc->writeback_delay = 30;	774	dc->writeback_delay = 30;
744	dc->writeback_rate.rate = 1024;	775	atomic_long_set(&dc->writeback_rate.rate, 1024);
745	dc->writeback_rate_minimum = 8;	776	dc->writeback_rate_minimum = 8;
746		777
747	dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;	778	dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;