aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-11-11 16:58:34 -0500
committerKent Overstreet <kmo@daterainc.com>2013-12-16 17:22:59 -0500
commit16749c23c00c686ed168471963e3ddb0f3fcd855 (patch)
treec64fc28761ab499bbcdcf1f626576d1781c87ba9 /drivers/md
parent6d3d1a9c542b19dff1c7d7c8354d0869e4655287 (diff)
bcache: New writeback PD controller
The old writeback PD controller could get into states where it had throttled all the way down and take way too long to recover - it was too complicated to really understand what it was doing. This rewrites a good chunk of it to hopefully be simpler and make more sense, and it also pays more attention to units which should make the behaviour a bit easier to understand. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h6
-rw-r--r--drivers/md/bcache/sysfs.c50
-rw-r--r--drivers/md/bcache/util.c8
-rw-r--r--drivers/md/bcache/writeback.c47
4 files changed, 62 insertions, 49 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index a7b1a7631ed2..754f43177483 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -373,14 +373,14 @@ struct cached_dev {
373 unsigned char writeback_percent; 373 unsigned char writeback_percent;
374 unsigned writeback_delay; 374 unsigned writeback_delay;
375 375
376 int writeback_rate_change;
377 int64_t writeback_rate_derivative;
378 uint64_t writeback_rate_target; 376 uint64_t writeback_rate_target;
377 int64_t writeback_rate_proportional;
378 int64_t writeback_rate_derivative;
379 int64_t writeback_rate_change;
379 380
380 unsigned writeback_rate_update_seconds; 381 unsigned writeback_rate_update_seconds;
381 unsigned writeback_rate_d_term; 382 unsigned writeback_rate_d_term;
382 unsigned writeback_rate_p_term_inverse; 383 unsigned writeback_rate_p_term_inverse;
383 unsigned writeback_rate_d_smooth;
384}; 384};
385 385
386enum alloc_watermarks { 386enum alloc_watermarks {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 80d4c2bee18a..a1f85612f0b3 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -83,7 +83,6 @@ rw_attribute(writeback_rate);
83rw_attribute(writeback_rate_update_seconds); 83rw_attribute(writeback_rate_update_seconds);
84rw_attribute(writeback_rate_d_term); 84rw_attribute(writeback_rate_d_term);
85rw_attribute(writeback_rate_p_term_inverse); 85rw_attribute(writeback_rate_p_term_inverse);
86rw_attribute(writeback_rate_d_smooth);
87read_attribute(writeback_rate_debug); 86read_attribute(writeback_rate_debug);
88 87
89read_attribute(stripe_size); 88read_attribute(stripe_size);
@@ -129,31 +128,41 @@ SHOW(__bch_cached_dev)
129 var_printf(writeback_running, "%i"); 128 var_printf(writeback_running, "%i");
130 var_print(writeback_delay); 129 var_print(writeback_delay);
131 var_print(writeback_percent); 130 var_print(writeback_percent);
132 sysfs_print(writeback_rate, dc->writeback_rate.rate); 131 sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
133 132
134 var_print(writeback_rate_update_seconds); 133 var_print(writeback_rate_update_seconds);
135 var_print(writeback_rate_d_term); 134 var_print(writeback_rate_d_term);
136 var_print(writeback_rate_p_term_inverse); 135 var_print(writeback_rate_p_term_inverse);
137 var_print(writeback_rate_d_smooth);
138 136
139 if (attr == &sysfs_writeback_rate_debug) { 137 if (attr == &sysfs_writeback_rate_debug) {
138 char rate[20];
140 char dirty[20]; 139 char dirty[20];
141 char derivative[20];
142 char target[20]; 140 char target[20];
143 bch_hprint(dirty, 141 char proportional[20];
144 bcache_dev_sectors_dirty(&dc->disk) << 9); 142 char derivative[20];
145 bch_hprint(derivative, dc->writeback_rate_derivative << 9); 143 char change[20];
144 s64 next_io;
145
146 bch_hprint(rate, dc->writeback_rate.rate << 9);
147 bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
146 bch_hprint(target, dc->writeback_rate_target << 9); 148 bch_hprint(target, dc->writeback_rate_target << 9);
149 bch_hprint(proportional,dc->writeback_rate_proportional << 9);
150 bch_hprint(derivative, dc->writeback_rate_derivative << 9);
151 bch_hprint(change, dc->writeback_rate_change << 9);
152
153 next_io = div64_s64(dc->writeback_rate.next - local_clock(),
154 NSEC_PER_MSEC);
147 155
148 return sprintf(buf, 156 return sprintf(buf,
149 "rate:\t\t%u\n" 157 "rate:\t\t%s/sec\n"
150 "change:\t\t%i\n"
151 "dirty:\t\t%s\n" 158 "dirty:\t\t%s\n"
159 "target:\t\t%s\n"
160 "proportional:\t%s\n"
152 "derivative:\t%s\n" 161 "derivative:\t%s\n"
153 "target:\t\t%s\n", 162 "change:\t\t%s/sec\n"
154 dc->writeback_rate.rate, 163 "next io:\t%llims\n",
155 dc->writeback_rate_change, 164 rate, dirty, target, proportional,
156 dirty, derivative, target); 165 derivative, change, next_io);
157 } 166 }
158 167
159 sysfs_hprint(dirty_data, 168 sysfs_hprint(dirty_data,
@@ -189,6 +198,7 @@ STORE(__cached_dev)
189 struct kobj_uevent_env *env; 198 struct kobj_uevent_env *env;
190 199
191#define d_strtoul(var) sysfs_strtoul(var, dc->var) 200#define d_strtoul(var) sysfs_strtoul(var, dc->var)
201#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
192#define d_strtoi_h(var) sysfs_hatoi(var, dc->var) 202#define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
193 203
194 sysfs_strtoul(data_csum, dc->disk.data_csum); 204 sysfs_strtoul(data_csum, dc->disk.data_csum);
@@ -197,16 +207,15 @@ STORE(__cached_dev)
197 d_strtoul(writeback_metadata); 207 d_strtoul(writeback_metadata);
198 d_strtoul(writeback_running); 208 d_strtoul(writeback_running);
199 d_strtoul(writeback_delay); 209 d_strtoul(writeback_delay);
200 sysfs_strtoul_clamp(writeback_rate, 210
201 dc->writeback_rate.rate, 1, 1000000);
202 sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); 211 sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
203 212
204 d_strtoul(writeback_rate_update_seconds); 213 sysfs_strtoul_clamp(writeback_rate,
214 dc->writeback_rate.rate, 1, INT_MAX);
215
216 d_strtoul_nonzero(writeback_rate_update_seconds);
205 d_strtoul(writeback_rate_d_term); 217 d_strtoul(writeback_rate_d_term);
206 d_strtoul(writeback_rate_p_term_inverse); 218 d_strtoul_nonzero(writeback_rate_p_term_inverse);
207 sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
208 dc->writeback_rate_p_term_inverse, 1, INT_MAX);
209 d_strtoul(writeback_rate_d_smooth);
210 219
211 d_strtoi_h(sequential_cutoff); 220 d_strtoi_h(sequential_cutoff);
212 d_strtoi_h(readahead); 221 d_strtoi_h(readahead);
@@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = {
313 &sysfs_writeback_rate_update_seconds, 322 &sysfs_writeback_rate_update_seconds,
314 &sysfs_writeback_rate_d_term, 323 &sysfs_writeback_rate_d_term,
315 &sysfs_writeback_rate_p_term_inverse, 324 &sysfs_writeback_rate_p_term_inverse,
316 &sysfs_writeback_rate_d_smooth,
317 &sysfs_writeback_rate_debug, 325 &sysfs_writeback_rate_debug,
318 &sysfs_dirty_data, 326 &sysfs_dirty_data,
319 &sysfs_stripe_size, 327 &sysfs_stripe_size,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 462214eeacbe..bb37618e7664 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
209{ 209{
210 uint64_t now = local_clock(); 210 uint64_t now = local_clock();
211 211
212 d->next += div_u64(done, d->rate); 212 d->next += div_u64(done * NSEC_PER_SEC, d->rate);
213
214 if (time_before64(now + NSEC_PER_SEC, d->next))
215 d->next = now + NSEC_PER_SEC;
216
217 if (time_after64(now - NSEC_PER_SEC * 2, d->next))
218 d->next = now - NSEC_PER_SEC * 2;
213 219
214 return time_after64(d->next, now) 220 return time_after64(d->next, now)
215 ? div_u64(d->next - now, NSEC_PER_SEC / HZ) 221 ? div_u64(d->next - now, NSEC_PER_SEC / HZ)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3cd931d3f26c..6c44fe059c27 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc)
30 30
31 /* PD controller */ 31 /* PD controller */
32 32
33 int change = 0;
34 int64_t error;
35 int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); 33 int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
36 int64_t derivative = dirty - dc->disk.sectors_dirty_last; 34 int64_t derivative = dirty - dc->disk.sectors_dirty_last;
35 int64_t proportional = dirty - target;
36 int64_t change;
37 37
38 dc->disk.sectors_dirty_last = dirty; 38 dc->disk.sectors_dirty_last = dirty;
39 39
40 derivative *= dc->writeback_rate_d_term; 40 /* Scale to sectors per second */
41 derivative = clamp(derivative, -dirty, dirty);
42 41
43 derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, 42 proportional *= dc->writeback_rate_update_seconds;
44 dc->writeback_rate_d_smooth, 0); 43 proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
45 44
46 /* Avoid divide by zero */ 45 derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
47 if (!target)
48 goto out;
49 46
50 error = div64_s64((dirty + derivative - target) << 8, target); 47 derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
48 (dc->writeback_rate_d_term /
49 dc->writeback_rate_update_seconds) ?: 1, 0);
50
51 derivative *= dc->writeback_rate_d_term;
52 derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
51 53
52 change = div_s64((dc->writeback_rate.rate * error) >> 8, 54 change = proportional + derivative;
53 dc->writeback_rate_p_term_inverse);
54 55
55 /* Don't increase writeback rate if the device isn't keeping up */ 56 /* Don't increase writeback rate if the device isn't keeping up */
56 if (change > 0 && 57 if (change > 0 &&
57 time_after64(local_clock(), 58 time_after64(local_clock(),
58 dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) 59 dc->writeback_rate.next + NSEC_PER_MSEC))
59 change = 0; 60 change = 0;
60 61
61 dc->writeback_rate.rate = 62 dc->writeback_rate.rate =
62 clamp_t(int64_t, dc->writeback_rate.rate + change, 63 clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
63 1, NSEC_PER_MSEC); 64 1, NSEC_PER_MSEC);
64out: 65
66 dc->writeback_rate_proportional = proportional;
65 dc->writeback_rate_derivative = derivative; 67 dc->writeback_rate_derivative = derivative;
66 dc->writeback_rate_change = change; 68 dc->writeback_rate_change = change;
67 dc->writeback_rate_target = target; 69 dc->writeback_rate_target = target;
@@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work)
87 89
88static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 90static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
89{ 91{
90 uint64_t ret;
91
92 if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || 92 if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
93 !dc->writeback_percent) 93 !dc->writeback_percent)
94 return 0; 94 return 0;
95 95
96 ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); 96 return bch_next_delay(&dc->writeback_rate, sectors);
97
98 return min_t(uint64_t, ret, HZ);
99} 97}
100 98
101struct dirty_io { 99struct dirty_io {
@@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
476 474
477 bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), 475 bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
478 sectors_dirty_init_fn, 0); 476 sectors_dirty_init_fn, 0);
477
478 dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
479} 479}
480 480
481int bch_cached_dev_writeback_init(struct cached_dev *dc) 481int bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -490,10 +490,9 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
490 dc->writeback_delay = 30; 490 dc->writeback_delay = 30;
491 dc->writeback_rate.rate = 1024; 491 dc->writeback_rate.rate = 1024;
492 492
493 dc->writeback_rate_update_seconds = 30; 493 dc->writeback_rate_update_seconds = 5;
494 dc->writeback_rate_d_term = 16; 494 dc->writeback_rate_d_term = 30;
495 dc->writeback_rate_p_term_inverse = 64; 495 dc->writeback_rate_p_term_inverse = 6000;
496 dc->writeback_rate_d_smooth = 8;
497 496
498 dc->writeback_thread = kthread_create(bch_writeback_thread, dc, 497 dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
499 "bcache_writeback"); 498 "bcache_writeback");