aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c39
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c33
-rw-r--r--drivers/md/bcache/request.c20
-rw-r--r--drivers/md/bcache/sysfs.c9
-rw-r--r--drivers/md/bcache/util.c11
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/bcache/writeback.c42
9 files changed, 110 insertions, 67 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0b45f2..0f12382aa35d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
498 */ 498 */
499 atomic_t has_dirty; 499 atomic_t has_dirty;
500 500
501 struct ratelimit writeback_rate; 501 struct bch_ratelimit writeback_rate;
502 struct delayed_work writeback_rate_update; 502 struct delayed_work writeback_rate_update;
503 503
504 /* 504 /*
@@ -507,10 +507,9 @@ struct cached_dev {
507 */ 507 */
508 sector_t last_read; 508 sector_t last_read;
509 509
510 /* Number of writeback bios in flight */ 510 /* Limit number of writeback bios in flight */
511 atomic_t in_flight; 511 struct semaphore in_flight;
512 struct closure_with_timer writeback; 512 struct closure_with_timer writeback;
513 struct closure_waitlist writeback_wait;
514 513
515 struct keybuf writeback_keys; 514 struct keybuf writeback_keys;
516 515
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8010eed06a51..22d1ae72c282 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
926 926
927/* Mergesort */ 927/* Mergesort */
928 928
929static void sort_key_next(struct btree_iter *iter,
930 struct btree_iter_set *i)
931{
932 i->k = bkey_next(i->k);
933
934 if (i->k == i->end)
935 *i = iter->data[--iter->used];
936}
937
929static void btree_sort_fixup(struct btree_iter *iter) 938static void btree_sort_fixup(struct btree_iter *iter)
930{ 939{
931 while (iter->used > 1) { 940 while (iter->used > 1) {
932 struct btree_iter_set *top = iter->data, *i = top + 1; 941 struct btree_iter_set *top = iter->data, *i = top + 1;
933 struct bkey *k;
934 942
935 if (iter->used > 2 && 943 if (iter->used > 2 &&
936 btree_iter_cmp(i[0], i[1])) 944 btree_iter_cmp(i[0], i[1]))
937 i++; 945 i++;
938 946
939 for (k = i->k; 947 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
940 k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
941 k = bkey_next(k))
942 if (top->k > i->k)
943 __bch_cut_front(top->k, k);
944 else if (KEY_SIZE(k))
945 bch_cut_back(&START_KEY(k), top->k);
946
947 if (top->k < i->k || k == i->k)
948 break; 948 break;
949 949
950 heap_sift(iter, i - top, btree_iter_cmp); 950 if (!KEY_SIZE(i->k)) {
951 sort_key_next(iter, i);
952 heap_sift(iter, i - top, btree_iter_cmp);
953 continue;
954 }
955
956 if (top->k > i->k) {
957 if (bkey_cmp(top->k, i->k) >= 0)
958 sort_key_next(iter, i);
959 else
960 bch_cut_front(top->k, i->k);
961
962 heap_sift(iter, i - top, btree_iter_cmp);
963 } else {
964 /* can't happen because of comparison func */
965 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
966 bch_cut_back(&START_KEY(i->k), top->k);
967 }
951 } 968 }
952} 969}
953 970
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index f9764e61978b..f42fc7ed9cd6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
255 255
256 return; 256 return;
257err: 257err:
258 bch_cache_set_error(b->c, "io error reading bucket %lu", 258 bch_cache_set_error(b->c, "io error reading bucket %zu",
259 PTR_BUCKET_NR(b->c, &b->key, 0)); 259 PTR_BUCKET_NR(b->c, &b->key, 0));
260} 260}
261 261
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
612 return SHRINK_STOP; 612 return SHRINK_STOP;
613 613
614 /* Return -1 if we can't do anything right now */ 614 /* Return -1 if we can't do anything right now */
615 if (sc->gfp_mask & __GFP_WAIT) 615 if (sc->gfp_mask & __GFP_IO)
616 mutex_lock(&c->bucket_lock); 616 mutex_lock(&c->bucket_lock);
617 else if (!mutex_trylock(&c->bucket_lock)) 617 else if (!mutex_trylock(&c->bucket_lock))
618 return -1; 618 return -1;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ba95ab84b2be..8435f81e5d85 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); 153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
154 pr_debug("%u journal buckets", ca->sb.njournal_buckets); 154 pr_debug("%u journal buckets", ca->sb.njournal_buckets);
155 155
156 /* Read journal buckets ordered by golden ratio hash to quickly 156 /*
157 * Read journal buckets ordered by golden ratio hash to quickly
157 * find a sequence of buckets with valid journal entries 158 * find a sequence of buckets with valid journal entries
158 */ 159 */
159 for (i = 0; i < ca->sb.njournal_buckets; i++) { 160 for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
166 goto bsearch; 167 goto bsearch;
167 } 168 }
168 169
169 /* If that fails, check all the buckets we haven't checked 170 /*
171 * If that fails, check all the buckets we haven't checked
170 * already 172 * already
171 */ 173 */
172 pr_debug("falling back to linear search"); 174 pr_debug("falling back to linear search");
173 175
174 for (l = 0; l < ca->sb.njournal_buckets; l++) { 176 for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
175 if (test_bit(l, bitmap)) 177 l < ca->sb.njournal_buckets;
176 continue; 178 l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
177
178 if (read_bucket(l)) 179 if (read_bucket(l))
179 goto bsearch; 180 goto bsearch;
180 } 181
182 if (list_empty(list))
183 continue;
181bsearch: 184bsearch:
182 /* Binary search */ 185 /* Binary search */
183 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 186 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
197 r = m; 200 r = m;
198 } 201 }
199 202
200 /* Read buckets in reverse order until we stop finding more 203 /*
204 * Read buckets in reverse order until we stop finding more
201 * journal entries 205 * journal entries
202 */ 206 */
203 pr_debug("finishing up"); 207 pr_debug("finishing up: m %u njournal_buckets %u",
208 m, ca->sb.njournal_buckets);
204 l = m; 209 l = m;
205 210
206 while (1) { 211 while (1) {
@@ -228,9 +233,10 @@ bsearch:
228 } 233 }
229 } 234 }
230 235
231 c->journal.seq = list_entry(list->prev, 236 if (!list_empty(list))
232 struct journal_replay, 237 c->journal.seq = list_entry(list->prev,
233 list)->j.seq; 238 struct journal_replay,
239 list)->j.seq;
234 240
235 return 0; 241 return 0;
236#undef read_bucket 242#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
428 return; 434 return;
429 } 435 }
430 436
431 switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { 437 switch (atomic_read(&ja->discard_in_flight)) {
432 case DISCARD_IN_FLIGHT: 438 case DISCARD_IN_FLIGHT:
433 return; 439 return;
434 440
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
689 if (cl) 695 if (cl)
690 BUG_ON(!closure_wait(&w->wait, cl)); 696 BUG_ON(!closure_wait(&w->wait, cl));
691 697
698 closure_flush(&c->journal.io);
692 __journal_try_write(c, true); 699 __journal_try_write(c, true);
693 } 700 }
694} 701}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 786a1a4f74d8..b6a74bcbb08f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -996,17 +996,19 @@ static void request_write(struct cached_dev *dc, struct search *s)
996 closure_bio_submit(bio, cl, s->d); 996 closure_bio_submit(bio, cl, s->d);
997 } else { 997 } else {
998 bch_writeback_add(dc); 998 bch_writeback_add(dc);
999 s->op.cache_bio = bio;
999 1000
1000 if (s->op.flush_journal) { 1001 if (bio->bi_rw & REQ_FLUSH) {
1001 /* Also need to send a flush to the backing device */ 1002 /* Also need to send a flush to the backing device */
1002 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 1003 struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
1003 dc->disk.bio_split); 1004 dc->disk.bio_split);
1004 1005
1005 bio->bi_size = 0; 1006 flush->bi_rw = WRITE_FLUSH;
1006 bio->bi_vcnt = 0; 1007 flush->bi_bdev = bio->bi_bdev;
1007 closure_bio_submit(bio, cl, s->d); 1008 flush->bi_end_io = request_endio;
1008 } else { 1009 flush->bi_private = cl;
1009 s->op.cache_bio = bio; 1010
1011 closure_bio_submit(flush, cl, s->d);
1010 } 1012 }
1011 } 1013 }
1012out: 1014out:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fe6ab2fbe2e..924dcfdae111 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -223,8 +223,13 @@ STORE(__cached_dev)
223 } 223 }
224 224
225 if (attr == &sysfs_label) { 225 if (attr == &sysfs_label) {
226 /* note: endlines are preserved */ 226 if (size > SB_LABEL_SIZE)
227 memcpy(dc->sb.label, buf, SB_LABEL_SIZE); 227 return -EINVAL;
228 memcpy(dc->sb.label, buf, size);
229 if (size < SB_LABEL_SIZE)
230 dc->sb.label[size] = '\0';
231 if (size && dc->sb.label[size - 1] == '\n')
232 dc->sb.label[size - 1] = '\0';
228 bch_write_bdev_super(dc, NULL); 233 bch_write_bdev_super(dc, NULL);
229 if (dc->disk.c) { 234 if (dc->disk.c) {
230 memcpy(dc->disk.c->uuids[dc->disk.id].label, 235 memcpy(dc->disk.c->uuids[dc->disk.id].label,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb81159a22..420dad545c7d 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
190 stats->last = now ?: 1; 190 stats->last = now ?: 1;
191} 191}
192 192
193unsigned bch_next_delay(struct ratelimit *d, uint64_t done) 193/**
194 * bch_next_delay() - increment @d by the amount of work done, and return how
195 * long to delay until the next time to do some work.
196 *
197 * @d - the struct bch_ratelimit to update
198 * @done - the amount of work done, in arbitrary units
199 *
200 * Returns the amount of time to delay by, in jiffies
201 */
202uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
194{ 203{
195 uint64_t now = local_clock(); 204 uint64_t now = local_clock();
196 205
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73ad85f..ea345c6896f4 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
450 (ewma) >> factor; \ 450 (ewma) >> factor; \
451}) 451})
452 452
453struct ratelimit { 453struct bch_ratelimit {
454 /* Next time we want to do some work, in nanoseconds */
454 uint64_t next; 455 uint64_t next;
456
457 /*
458 * Rate at which we want to do work, in units per nanosecond
459 * The units here correspond to the units passed to bch_next_delay()
460 */
455 unsigned rate; 461 unsigned rate;
456}; 462};
457 463
458static inline void ratelimit_reset(struct ratelimit *d) 464static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
459{ 465{
460 d->next = local_clock(); 466 d->next = local_clock();
461} 467}
462 468
463unsigned bch_next_delay(struct ratelimit *d, uint64_t done); 469uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
464 470
465#define __DIV_SAFE(n, d, zero) \ 471#define __DIV_SAFE(n, d, zero) \
466({ \ 472({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff551628..ba3ee48320f2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
94 94
95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
96{ 96{
97 uint64_t ret;
98
97 if (atomic_read(&dc->disk.detaching) || 99 if (atomic_read(&dc->disk.detaching) ||
98 !dc->writeback_percent) 100 !dc->writeback_percent)
99 return 0; 101 return 0;
100 102
101 return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); 103 ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
104
105 return min_t(uint64_t, ret, HZ);
102} 106}
103 107
104/* Background writeback */ 108/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
208 212
209 up_write(&dc->writeback_lock); 213 up_write(&dc->writeback_lock);
210 214
211 ratelimit_reset(&dc->writeback_rate); 215 bch_ratelimit_reset(&dc->writeback_rate);
212 216
213 /* Punt to workqueue only so we don't recurse and blow the stack */ 217 /* Punt to workqueue only so we don't recurse and blow the stack */
214 continue_at(cl, read_dirty, dirty_wq); 218 continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
318 } 322 }
319 323
320 bch_keybuf_del(&dc->writeback_keys, w); 324 bch_keybuf_del(&dc->writeback_keys, w);
321 atomic_dec_bug(&dc->in_flight); 325 up(&dc->in_flight);
322
323 closure_wake_up(&dc->writeback_wait);
324 326
325 closure_return_with_destructor(cl, dirty_io_destructor); 327 closure_return_with_destructor(cl, dirty_io_destructor);
326} 328}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
349 351
350 closure_bio_submit(&io->bio, cl, &io->dc->disk); 352 closure_bio_submit(&io->bio, cl, &io->dc->disk);
351 353
352 continue_at(cl, write_dirty_finish, dirty_wq); 354 continue_at(cl, write_dirty_finish, system_wq);
353} 355}
354 356
355static void read_dirty_endio(struct bio *bio, int error) 357static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
369 371
370 closure_bio_submit(&io->bio, cl, &io->dc->disk); 372 closure_bio_submit(&io->bio, cl, &io->dc->disk);
371 373
372 continue_at(cl, write_dirty, dirty_wq); 374 continue_at(cl, write_dirty, system_wq);
373} 375}
374 376
375static void read_dirty(struct closure *cl) 377static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
394 396
395 if (delay > 0 && 397 if (delay > 0 &&
396 (KEY_START(&w->key) != dc->last_read || 398 (KEY_START(&w->key) != dc->last_read ||
397 jiffies_to_msecs(delay) > 50)) { 399 jiffies_to_msecs(delay) > 50))
398 w->private = NULL; 400 delay = schedule_timeout_uninterruptible(delay);
399
400 closure_delay(&dc->writeback, delay);
401 continue_at(cl, read_dirty, dirty_wq);
402 }
403 401
404 dc->last_read = KEY_OFFSET(&w->key); 402 dc->last_read = KEY_OFFSET(&w->key);
405 403
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
424 422
425 trace_bcache_writeback(&w->key); 423 trace_bcache_writeback(&w->key);
426 424
427 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); 425 down(&dc->in_flight);
426 closure_call(&io->cl, read_dirty_submit, NULL, cl);
428 427
429 delay = writeback_delay(dc, KEY_SIZE(&w->key)); 428 delay = writeback_delay(dc, KEY_SIZE(&w->key));
430
431 atomic_inc(&dc->in_flight);
432
433 if (!closure_wait_event(&dc->writeback_wait, cl,
434 atomic_read(&dc->in_flight) < 64))
435 continue_at(cl, read_dirty, dirty_wq);
436 } 429 }
437 430
438 if (0) { 431 if (0) {
@@ -442,7 +435,11 @@ err:
442 bch_keybuf_del(&dc->writeback_keys, w); 435 bch_keybuf_del(&dc->writeback_keys, w);
443 } 436 }
444 437
445 refill_dirty(cl); 438 /*
439 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
440 * freed) before refilling again
441 */
442 continue_at(cl, refill_dirty, dirty_wq);
446} 443}
447 444
448/* Init */ 445/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
484 481
485void bch_cached_dev_writeback_init(struct cached_dev *dc) 482void bch_cached_dev_writeback_init(struct cached_dev *dc)
486{ 483{
484 sema_init(&dc->in_flight, 64);
487 closure_init_unlocked(&dc->writeback); 485 closure_init_unlocked(&dc->writeback);
488 init_rwsem(&dc->writeback_lock); 486 init_rwsem(&dc->writeback_lock);
489 487
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
513 511
514int __init bch_writeback_init(void) 512int __init bch_writeback_init(void)
515{ 513{
516 dirty_wq = create_singlethread_workqueue("bcache_writeback"); 514 dirty_wq = create_workqueue("bcache_writeback");
517 if (!dirty_wq) 515 if (!dirty_wq)
518 return -ENOMEM; 516 return -ENOMEM;
519 517