diff options
Diffstat (limited to 'drivers/md/bcache/writeback.c')
| -rw-r--r-- | drivers/md/bcache/writeback.c | 133 |
1 files changed, 120 insertions, 13 deletions
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 2714ed3991d1..22cbff551628 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
| @@ -9,6 +9,9 @@ | |||
| 9 | #include "bcache.h" | 9 | #include "bcache.h" |
| 10 | #include "btree.h" | 10 | #include "btree.h" |
| 11 | #include "debug.h" | 11 | #include "debug.h" |
| 12 | #include "writeback.h" | ||
| 13 | |||
| 14 | #include <trace/events/bcache.h> | ||
| 12 | 15 | ||
| 13 | static struct workqueue_struct *dirty_wq; | 16 | static struct workqueue_struct *dirty_wq; |
| 14 | 17 | ||
| @@ -36,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc) | |||
| 36 | 39 | ||
| 37 | int change = 0; | 40 | int change = 0; |
| 38 | int64_t error; | 41 | int64_t error; |
| 39 | int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); | 42 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); |
| 40 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; | 43 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; |
| 41 | 44 | ||
| 42 | dc->disk.sectors_dirty_last = dirty; | 45 | dc->disk.sectors_dirty_last = dirty; |
| @@ -105,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) | |||
| 105 | return KEY_DIRTY(k); | 108 | return KEY_DIRTY(k); |
| 106 | } | 109 | } |
| 107 | 110 | ||
| 111 | static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) | ||
| 112 | { | ||
| 113 | uint64_t stripe; | ||
| 114 | unsigned nr_sectors = KEY_SIZE(k); | ||
| 115 | struct cached_dev *dc = container_of(buf, struct cached_dev, | ||
| 116 | writeback_keys); | ||
| 117 | unsigned stripe_size = 1 << dc->disk.stripe_size_bits; | ||
| 118 | |||
| 119 | if (!KEY_DIRTY(k)) | ||
| 120 | return false; | ||
| 121 | |||
| 122 | stripe = KEY_START(k) >> dc->disk.stripe_size_bits; | ||
| 123 | while (1) { | ||
| 124 | if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != | ||
| 125 | stripe_size) | ||
| 126 | return false; | ||
| 127 | |||
| 128 | if (nr_sectors <= stripe_size) | ||
| 129 | return true; | ||
| 130 | |||
| 131 | nr_sectors -= stripe_size; | ||
| 132 | stripe++; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 108 | static void dirty_init(struct keybuf_key *w) | 136 | static void dirty_init(struct keybuf_key *w) |
| 109 | { | 137 | { |
| 110 | struct dirty_io *io = w->private; | 138 | struct dirty_io *io = w->private; |
| @@ -149,7 +177,22 @@ static void refill_dirty(struct closure *cl) | |||
| 149 | searched_from_start = true; | 177 | searched_from_start = true; |
| 150 | } | 178 | } |
| 151 | 179 | ||
| 152 | bch_refill_keybuf(dc->disk.c, buf, &end); | 180 | if (dc->partial_stripes_expensive) { |
| 181 | uint64_t i; | ||
| 182 | |||
| 183 | for (i = 0; i < dc->disk.nr_stripes; i++) | ||
| 184 | if (atomic_read(dc->disk.stripe_sectors_dirty + i) == | ||
| 185 | 1 << dc->disk.stripe_size_bits) | ||
| 186 | goto full_stripes; | ||
| 187 | |||
| 188 | goto normal_refill; | ||
| 189 | full_stripes: | ||
| 190 | bch_refill_keybuf(dc->disk.c, buf, &end, | ||
| 191 | dirty_full_stripe_pred); | ||
| 192 | } else { | ||
| 193 | normal_refill: | ||
| 194 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); | ||
| 195 | } | ||
| 153 | 196 | ||
| 154 | if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { | 197 | if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { |
| 155 | /* Searched the entire btree - delay awhile */ | 198 | /* Searched the entire btree - delay awhile */ |
| @@ -181,10 +224,8 @@ void bch_writeback_queue(struct cached_dev *dc) | |||
| 181 | } | 224 | } |
| 182 | } | 225 | } |
| 183 | 226 | ||
| 184 | void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | 227 | void bch_writeback_add(struct cached_dev *dc) |
| 185 | { | 228 | { |
| 186 | atomic_long_add(sectors, &dc->disk.sectors_dirty); | ||
| 187 | |||
| 188 | if (!atomic_read(&dc->has_dirty) && | 229 | if (!atomic_read(&dc->has_dirty) && |
| 189 | !atomic_xchg(&dc->has_dirty, 1)) { | 230 | !atomic_xchg(&dc->has_dirty, 1)) { |
| 190 | atomic_inc(&dc->count); | 231 | atomic_inc(&dc->count); |
| @@ -203,6 +244,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | |||
| 203 | } | 244 | } |
| 204 | } | 245 | } |
| 205 | 246 | ||
| 247 | void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | ||
| 248 | uint64_t offset, int nr_sectors) | ||
| 249 | { | ||
| 250 | struct bcache_device *d = c->devices[inode]; | ||
| 251 | unsigned stripe_size, stripe_offset; | ||
| 252 | uint64_t stripe; | ||
| 253 | |||
| 254 | if (!d) | ||
| 255 | return; | ||
| 256 | |||
| 257 | stripe_size = 1 << d->stripe_size_bits; | ||
| 258 | stripe = offset >> d->stripe_size_bits; | ||
| 259 | stripe_offset = offset & (stripe_size - 1); | ||
| 260 | |||
| 261 | while (nr_sectors) { | ||
| 262 | int s = min_t(unsigned, abs(nr_sectors), | ||
| 263 | stripe_size - stripe_offset); | ||
| 264 | |||
| 265 | if (nr_sectors < 0) | ||
| 266 | s = -s; | ||
| 267 | |||
| 268 | atomic_add(s, d->stripe_sectors_dirty + stripe); | ||
| 269 | nr_sectors -= s; | ||
| 270 | stripe_offset = 0; | ||
| 271 | stripe++; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 206 | /* Background writeback - IO loop */ | 275 | /* Background writeback - IO loop */ |
| 207 | 276 | ||
| 208 | static void dirty_io_destructor(struct closure *cl) | 277 | static void dirty_io_destructor(struct closure *cl) |
| @@ -216,9 +285,10 @@ static void write_dirty_finish(struct closure *cl) | |||
| 216 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); | 285 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
| 217 | struct keybuf_key *w = io->bio.bi_private; | 286 | struct keybuf_key *w = io->bio.bi_private; |
| 218 | struct cached_dev *dc = io->dc; | 287 | struct cached_dev *dc = io->dc; |
| 219 | struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); | 288 | struct bio_vec *bv; |
| 289 | int i; | ||
| 220 | 290 | ||
| 221 | while (bv-- != io->bio.bi_io_vec) | 291 | bio_for_each_segment_all(bv, &io->bio, i) |
| 222 | __free_page(bv->bv_page); | 292 | __free_page(bv->bv_page); |
| 223 | 293 | ||
| 224 | /* This is kind of a dumb way of signalling errors. */ | 294 | /* This is kind of a dumb way of signalling errors. */ |
| @@ -236,10 +306,12 @@ static void write_dirty_finish(struct closure *cl) | |||
| 236 | for (i = 0; i < KEY_PTRS(&w->key); i++) | 306 | for (i = 0; i < KEY_PTRS(&w->key); i++) |
| 237 | atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); | 307 | atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); |
| 238 | 308 | ||
| 239 | pr_debug("clearing %s", pkey(&w->key)); | ||
| 240 | bch_btree_insert(&op, dc->disk.c); | 309 | bch_btree_insert(&op, dc->disk.c); |
| 241 | closure_sync(&op.cl); | 310 | closure_sync(&op.cl); |
| 242 | 311 | ||
| 312 | if (op.insert_collision) | ||
| 313 | trace_bcache_writeback_collision(&w->key); | ||
| 314 | |||
| 243 | atomic_long_inc(op.insert_collision | 315 | atomic_long_inc(op.insert_collision |
| 244 | ? &dc->disk.c->writeback_keys_failed | 316 | ? &dc->disk.c->writeback_keys_failed |
| 245 | : &dc->disk.c->writeback_keys_done); | 317 | : &dc->disk.c->writeback_keys_done); |
| @@ -275,7 +347,6 @@ static void write_dirty(struct closure *cl) | |||
| 275 | io->bio.bi_bdev = io->dc->bdev; | 347 | io->bio.bi_bdev = io->dc->bdev; |
| 276 | io->bio.bi_end_io = dirty_endio; | 348 | io->bio.bi_end_io = dirty_endio; |
| 277 | 349 | ||
| 278 | trace_bcache_write_dirty(&io->bio); | ||
| 279 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 350 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
| 280 | 351 | ||
| 281 | continue_at(cl, write_dirty_finish, dirty_wq); | 352 | continue_at(cl, write_dirty_finish, dirty_wq); |
| @@ -296,7 +367,6 @@ static void read_dirty_submit(struct closure *cl) | |||
| 296 | { | 367 | { |
| 297 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); | 368 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
| 298 | 369 | ||
| 299 | trace_bcache_read_dirty(&io->bio); | ||
| 300 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 370 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
| 301 | 371 | ||
| 302 | continue_at(cl, write_dirty, dirty_wq); | 372 | continue_at(cl, write_dirty, dirty_wq); |
| @@ -349,10 +419,10 @@ static void read_dirty(struct closure *cl) | |||
| 349 | io->bio.bi_rw = READ; | 419 | io->bio.bi_rw = READ; |
| 350 | io->bio.bi_end_io = read_dirty_endio; | 420 | io->bio.bi_end_io = read_dirty_endio; |
| 351 | 421 | ||
| 352 | if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) | 422 | if (bio_alloc_pages(&io->bio, GFP_KERNEL)) |
| 353 | goto err_free; | 423 | goto err_free; |
| 354 | 424 | ||
| 355 | pr_debug("%s", pkey(&w->key)); | 425 | trace_bcache_writeback(&w->key); |
| 356 | 426 | ||
| 357 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); | 427 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); |
| 358 | 428 | ||
| @@ -375,12 +445,49 @@ err: | |||
| 375 | refill_dirty(cl); | 445 | refill_dirty(cl); |
| 376 | } | 446 | } |
| 377 | 447 | ||
| 448 | /* Init */ | ||
| 449 | |||
| 450 | static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, | ||
| 451 | struct cached_dev *dc) | ||
| 452 | { | ||
| 453 | struct bkey *k; | ||
| 454 | struct btree_iter iter; | ||
| 455 | |||
| 456 | bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0)); | ||
| 457 | while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) | ||
| 458 | if (!b->level) { | ||
| 459 | if (KEY_INODE(k) > dc->disk.id) | ||
| 460 | break; | ||
| 461 | |||
| 462 | if (KEY_DIRTY(k)) | ||
| 463 | bcache_dev_sectors_dirty_add(b->c, dc->disk.id, | ||
| 464 | KEY_START(k), | ||
| 465 | KEY_SIZE(k)); | ||
| 466 | } else { | ||
| 467 | btree(sectors_dirty_init, k, b, op, dc); | ||
| 468 | if (KEY_INODE(k) > dc->disk.id) | ||
| 469 | break; | ||
| 470 | |||
| 471 | cond_resched(); | ||
| 472 | } | ||
| 473 | |||
| 474 | return 0; | ||
| 475 | } | ||
| 476 | |||
| 477 | void bch_sectors_dirty_init(struct cached_dev *dc) | ||
| 478 | { | ||
| 479 | struct btree_op op; | ||
| 480 | |||
| 481 | bch_btree_op_init_stack(&op); | ||
| 482 | btree_root(sectors_dirty_init, dc->disk.c, &op, dc); | ||
| 483 | } | ||
| 484 | |||
| 378 | void bch_cached_dev_writeback_init(struct cached_dev *dc) | 485 | void bch_cached_dev_writeback_init(struct cached_dev *dc) |
| 379 | { | 486 | { |
| 380 | closure_init_unlocked(&dc->writeback); | 487 | closure_init_unlocked(&dc->writeback); |
| 381 | init_rwsem(&dc->writeback_lock); | 488 | init_rwsem(&dc->writeback_lock); |
| 382 | 489 | ||
| 383 | bch_keybuf_init(&dc->writeback_keys, dirty_pred); | 490 | bch_keybuf_init(&dc->writeback_keys); |
| 384 | 491 | ||
| 385 | dc->writeback_metadata = true; | 492 | dc->writeback_metadata = true; |
| 386 | dc->writeback_running = true; | 493 | dc->writeback_running = true; |
