aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKent Overstreet <koverstreet@google.com>2013-06-05 09:24:39 -0400
committerKent Overstreet <koverstreet@google.com>2013-06-27 00:58:04 -0400
commit72c270612bd33192fa836ad0f2939af1ca218292 (patch)
tree344129d75f3b5c0abcf77dd4b6340783a126cde8 /drivers/md
parent279afbad4e54acbd61bf88a54a73af3bbfdeb5dd (diff)
bcache: Write out full stripes
Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet <koverstreet@google.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h3
-rw-r--r--drivers/md/bcache/btree.c19
-rw-r--r--drivers/md/bcache/btree.h9
-rw-r--r--drivers/md/bcache/debug.c4
-rw-r--r--drivers/md/bcache/movinggc.c5
-rw-r--r--drivers/md/bcache/request.c23
-rw-r--r--drivers/md/bcache/sysfs.c8
-rw-r--r--drivers/md/bcache/writeback.c44
-rw-r--r--drivers/md/bcache/writeback.h43
9 files changed, 121 insertions, 37 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index dbddef0cdb59..342ba86c6e4f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -387,8 +387,6 @@ struct keybuf_key {
387typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); 387typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
388 388
389struct keybuf { 389struct keybuf {
390 keybuf_pred_fn *key_predicate;
391
392 struct bkey last_scanned; 390 struct bkey last_scanned;
393 spinlock_t lock; 391 spinlock_t lock;
394 392
@@ -532,6 +530,7 @@ struct cached_dev {
532 unsigned sequential_merge:1; 530 unsigned sequential_merge:1;
533 unsigned verify:1; 531 unsigned verify:1;
534 532
533 unsigned partial_stripes_expensive:1;
535 unsigned writeback_metadata:1; 534 unsigned writeback_metadata:1;
536 unsigned writeback_running:1; 535 unsigned writeback_running:1;
537 unsigned char writeback_percent; 536 unsigned char writeback_percent;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index b93cf56260a4..09fb8a2f43da 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
2252} 2252}
2253 2253
2254static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op, 2254static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2255 struct keybuf *buf, struct bkey *end) 2255 struct keybuf *buf, struct bkey *end,
2256 keybuf_pred_fn *pred)
2256{ 2257{
2257 struct btree_iter iter; 2258 struct btree_iter iter;
2258 bch_btree_iter_init(b, &iter, &buf->last_scanned); 2259 bch_btree_iter_init(b, &iter, &buf->last_scanned);
@@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2271 if (bkey_cmp(&buf->last_scanned, end) >= 0) 2272 if (bkey_cmp(&buf->last_scanned, end) >= 0)
2272 break; 2273 break;
2273 2274
2274 if (buf->key_predicate(buf, k)) { 2275 if (pred(buf, k)) {
2275 struct keybuf_key *w; 2276 struct keybuf_key *w;
2276 2277
2277 spin_lock(&buf->lock); 2278 spin_lock(&buf->lock);
@@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2290 if (!k) 2291 if (!k)
2291 break; 2292 break;
2292 2293
2293 btree(refill_keybuf, k, b, op, buf, end); 2294 btree(refill_keybuf, k, b, op, buf, end, pred);
2294 /* 2295 /*
2295 * Might get an error here, but can't really do anything 2296 * Might get an error here, but can't really do anything
2296 * and it'll get logged elsewhere. Just read what we 2297 * and it'll get logged elsewhere. Just read what we
@@ -2308,7 +2309,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2308} 2309}
2309 2310
2310void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, 2311void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
2311 struct bkey *end) 2312 struct bkey *end, keybuf_pred_fn *pred)
2312{ 2313{
2313 struct bkey start = buf->last_scanned; 2314 struct bkey start = buf->last_scanned;
2314 struct btree_op op; 2315 struct btree_op op;
@@ -2316,7 +2317,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
2316 2317
2317 cond_resched(); 2318 cond_resched();
2318 2319
2319 btree_root(refill_keybuf, c, &op, buf, end); 2320 btree_root(refill_keybuf, c, &op, buf, end, pred);
2320 closure_sync(&op.cl); 2321 closure_sync(&op.cl);
2321 2322
2322 pr_debug("found %s keys from %llu:%llu to %llu:%llu", 2323 pr_debug("found %s keys from %llu:%llu to %llu:%llu",
@@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
2402 2403
2403struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, 2404struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
2404 struct keybuf *buf, 2405 struct keybuf *buf,
2405 struct bkey *end) 2406 struct bkey *end,
2407 keybuf_pred_fn *pred)
2406{ 2408{
2407 struct keybuf_key *ret; 2409 struct keybuf_key *ret;
2408 2410
@@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
2416 break; 2418 break;
2417 } 2419 }
2418 2420
2419 bch_refill_keybuf(c, buf, end); 2421 bch_refill_keybuf(c, buf, end, pred);
2420 } 2422 }
2421 2423
2422 return ret; 2424 return ret;
2423} 2425}
2424 2426
2425void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn) 2427void bch_keybuf_init(struct keybuf *buf)
2426{ 2428{
2427 buf->key_predicate = fn;
2428 buf->last_scanned = MAX_KEY; 2429 buf->last_scanned = MAX_KEY;
2429 buf->keys = RB_ROOT; 2430 buf->keys = RB_ROOT;
2430 2431
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 2b016b93cad4..f66d69a7baf1 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *);
391int bch_btree_check(struct cache_set *, struct btree_op *); 391int bch_btree_check(struct cache_set *, struct btree_op *);
392uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); 392uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
393 393
394void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *); 394void bch_keybuf_init(struct keybuf *);
395void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *); 395void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
396 keybuf_pred_fn *);
396bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, 397bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
397 struct bkey *); 398 struct bkey *);
398void bch_keybuf_del(struct keybuf *, struct keybuf_key *); 399void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
399struct keybuf_key *bch_keybuf_next(struct keybuf *); 400struct keybuf_key *bch_keybuf_next(struct keybuf *);
400struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, 401struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
401 struct keybuf *, struct bkey *); 402 struct bkey *, keybuf_pred_fn *);
402 403
403#endif 404#endif
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 82e3a07771ec..1c8fd319846e 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
357 if (i->bytes) 357 if (i->bytes)
358 break; 358 break;
359 359
360 w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY); 360 w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
361 if (!w) 361 if (!w)
362 break; 362 break;
363 363
@@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file)
380 380
381 file->private_data = i; 381 file->private_data = i;
382 i->c = c; 382 i->c = c;
383 bch_keybuf_init(&i->keys, dump_pred); 383 bch_keybuf_init(&i->keys);
384 i->keys.last_scanned = KEY(0, 0, 0); 384 i->keys.last_scanned = KEY(0, 0, 0);
385 385
386 return 0; 386 return 0;
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 04f6b97ffda6..a241e9fd4f7f 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -136,7 +136,8 @@ static void read_moving(struct closure *cl)
136 /* XXX: if we error, background writeback could stall indefinitely */ 136 /* XXX: if we error, background writeback could stall indefinitely */
137 137
138 while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { 138 while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
139 w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY); 139 w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
140 &MAX_KEY, moving_pred);
140 if (!w) 141 if (!w)
141 break; 142 break;
142 143
@@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl)
248 249
249void bch_moving_init_cache_set(struct cache_set *c) 250void bch_moving_init_cache_set(struct cache_set *c)
250{ 251{
251 bch_keybuf_init(&c->moving_gc_keys, moving_pred); 252 bch_keybuf_init(&c->moving_gc_keys);
252} 253}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 017c95fced8e..17bd59704eba 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -22,8 +22,6 @@
22 22
23#define CUTOFF_CACHE_ADD 95 23#define CUTOFF_CACHE_ADD 95
24#define CUTOFF_CACHE_READA 90 24#define CUTOFF_CACHE_READA 90
25#define CUTOFF_WRITEBACK 50
26#define CUTOFF_WRITEBACK_SYNC 75
27 25
28struct kmem_cache *bch_search_cache; 26struct kmem_cache *bch_search_cache;
29 27
@@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl)
998 cached_dev_bio_complete(cl); 996 cached_dev_bio_complete(cl);
999} 997}
1000 998
1001static bool should_writeback(struct cached_dev *dc, struct bio *bio)
1002{
1003 unsigned threshold = (bio->bi_rw & REQ_SYNC)
1004 ? CUTOFF_WRITEBACK_SYNC
1005 : CUTOFF_WRITEBACK;
1006
1007 return !atomic_read(&dc->disk.detaching) &&
1008 cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
1009 dc->disk.c->gc_stats.in_use < threshold;
1010}
1011
1012static void request_write(struct cached_dev *dc, struct search *s) 999static void request_write(struct cached_dev *dc, struct search *s)
1013{ 1000{
1014 struct closure *cl = &s->cl; 1001 struct closure *cl = &s->cl;
@@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s)
1030 if (bio->bi_rw & REQ_DISCARD) 1017 if (bio->bi_rw & REQ_DISCARD)
1031 goto skip; 1018 goto skip;
1032 1019
1020 if (should_writeback(dc, s->orig_bio,
1021 cache_mode(dc, bio),
1022 s->op.skip)) {
1023 s->op.skip = false;
1024 s->writeback = true;
1025 }
1026
1033 if (s->op.skip) 1027 if (s->op.skip)
1034 goto skip; 1028 goto skip;
1035 1029
1036 if (should_writeback(dc, s->orig_bio))
1037 s->writeback = true;
1038
1039 trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); 1030 trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
1040 1031
1041 if (!s->writeback) { 1032 if (!s->writeback) {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index cf8d91ec3238..70c6dff0d0cd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse);
81rw_attribute(writeback_rate_d_smooth); 81rw_attribute(writeback_rate_d_smooth);
82read_attribute(writeback_rate_debug); 82read_attribute(writeback_rate_debug);
83 83
84read_attribute(stripe_size);
85read_attribute(partial_stripes_expensive);
86
84rw_attribute(synchronous); 87rw_attribute(synchronous);
85rw_attribute(journal_delay_ms); 88rw_attribute(journal_delay_ms);
86rw_attribute(discard); 89rw_attribute(discard);
@@ -147,6 +150,9 @@ SHOW(__bch_cached_dev)
147 sysfs_hprint(dirty_data, 150 sysfs_hprint(dirty_data,
148 bcache_dev_sectors_dirty(&dc->disk) << 9); 151 bcache_dev_sectors_dirty(&dc->disk) << 9);
149 152
153 sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9);
154 var_printf(partial_stripes_expensive, "%u");
155
150 var_printf(sequential_merge, "%i"); 156 var_printf(sequential_merge, "%i");
151 var_hprint(sequential_cutoff); 157 var_hprint(sequential_cutoff);
152 var_hprint(readahead); 158 var_hprint(readahead);
@@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = {
286 &sysfs_writeback_rate_d_smooth, 292 &sysfs_writeback_rate_d_smooth,
287 &sysfs_writeback_rate_debug, 293 &sysfs_writeback_rate_debug,
288 &sysfs_dirty_data, 294 &sysfs_dirty_data,
295 &sysfs_stripe_size,
296 &sysfs_partial_stripes_expensive,
289 &sysfs_sequential_cutoff, 297 &sysfs_sequential_cutoff,
290 &sysfs_sequential_merge, 298 &sysfs_sequential_merge,
291 &sysfs_clear_stats, 299 &sysfs_clear_stats,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index dd815475c524..d81ee5ccc726 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
108 return KEY_DIRTY(k); 108 return KEY_DIRTY(k);
109} 109}
110 110
111static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
112{
113 uint64_t stripe;
114 unsigned nr_sectors = KEY_SIZE(k);
115 struct cached_dev *dc = container_of(buf, struct cached_dev,
116 writeback_keys);
117 unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
118
119 if (!KEY_DIRTY(k))
120 return false;
121
122 stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
123 while (1) {
124 if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
125 stripe_size)
126 return false;
127
128 if (nr_sectors <= stripe_size)
129 return true;
130
131 nr_sectors -= stripe_size;
132 stripe++;
133 }
134}
135
111static void dirty_init(struct keybuf_key *w) 136static void dirty_init(struct keybuf_key *w)
112{ 137{
113 struct dirty_io *io = w->private; 138 struct dirty_io *io = w->private;
@@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl)
152 searched_from_start = true; 177 searched_from_start = true;
153 } 178 }
154 179
155 bch_refill_keybuf(dc->disk.c, buf, &end); 180 if (dc->partial_stripes_expensive) {
181 uint64_t i;
182
183 for (i = 0; i < dc->disk.nr_stripes; i++)
184 if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
185 1 << dc->disk.stripe_size_bits)
186 goto full_stripes;
187
188 goto normal_refill;
189full_stripes:
190 bch_refill_keybuf(dc->disk.c, buf, &end,
191 dirty_full_stripe_pred);
192 } else {
193normal_refill:
194 bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
195 }
156 196
157 if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { 197 if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
158 /* Searched the entire btree - delay awhile */ 198 /* Searched the entire btree - delay awhile */
@@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
446 closure_init_unlocked(&dc->writeback); 486 closure_init_unlocked(&dc->writeback);
447 init_rwsem(&dc->writeback_lock); 487 init_rwsem(&dc->writeback_lock);
448 488
449 bch_keybuf_init(&dc->writeback_keys, dirty_pred); 489 bch_keybuf_init(&dc->writeback_keys);
450 490
451 dc->writeback_metadata = true; 491 dc->writeback_metadata = true;
452 dc->writeback_running = true; 492 dc->writeback_running = true;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 5ce9771df047..c91f61bb95b6 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -1,6 +1,9 @@
1#ifndef _BCACHE_WRITEBACK_H 1#ifndef _BCACHE_WRITEBACK_H
2#define _BCACHE_WRITEBACK_H 2#define _BCACHE_WRITEBACK_H
3 3
4#define CUTOFF_WRITEBACK 40
5#define CUTOFF_WRITEBACK_SYNC 70
6
4static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) 7static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
5{ 8{
6 uint64_t i, ret = 0; 9 uint64_t i, ret = 0;
@@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
11 return ret; 14 return ret;
12} 15}
13 16
17static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
18 uint64_t offset,
19 unsigned nr_sectors)
20{
21 uint64_t stripe = offset >> d->stripe_size_bits;
22
23 while (1) {
24 if (atomic_read(d->stripe_sectors_dirty + stripe))
25 return true;
26
27 if (nr_sectors <= 1 << d->stripe_size_bits)
28 return false;
29
30 nr_sectors -= 1 << d->stripe_size_bits;
31 stripe++;
32 }
33}
34
35static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
36 unsigned cache_mode, bool would_skip)
37{
38 unsigned in_use = dc->disk.c->gc_stats.in_use;
39
40 if (cache_mode != CACHE_MODE_WRITEBACK ||
41 atomic_read(&dc->disk.detaching) ||
42 in_use > CUTOFF_WRITEBACK_SYNC)
43 return false;
44
45 if (dc->partial_stripes_expensive &&
46 bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
47 bio_sectors(bio)))
48 return true;
49
50 if (would_skip)
51 return false;
52
53 return bio->bi_rw & REQ_SYNC ||
54 in_use <= CUTOFF_WRITEBACK;
55}
56
14void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); 57void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
15void bch_writeback_queue(struct cached_dev *); 58void bch_writeback_queue(struct cached_dev *);
16void bch_writeback_add(struct cached_dev *); 59void bch_writeback_add(struct cached_dev *);