diff options
Diffstat (limited to 'drivers/md/bcache/writeback.c')
-rw-r--r-- | drivers/md/bcache/writeback.c | 133 |
1 files changed, 120 insertions, 13 deletions
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 2714ed3991d1..22cbff551628 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
@@ -9,6 +9,9 @@ | |||
9 | #include "bcache.h" | 9 | #include "bcache.h" |
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "debug.h" | 11 | #include "debug.h" |
12 | #include "writeback.h" | ||
13 | |||
14 | #include <trace/events/bcache.h> | ||
12 | 15 | ||
13 | static struct workqueue_struct *dirty_wq; | 16 | static struct workqueue_struct *dirty_wq; |
14 | 17 | ||
@@ -36,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc) | |||
36 | 39 | ||
37 | int change = 0; | 40 | int change = 0; |
38 | int64_t error; | 41 | int64_t error; |
39 | int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); | 42 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); |
40 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; | 43 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; |
41 | 44 | ||
42 | dc->disk.sectors_dirty_last = dirty; | 45 | dc->disk.sectors_dirty_last = dirty; |
@@ -105,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) | |||
105 | return KEY_DIRTY(k); | 108 | return KEY_DIRTY(k); |
106 | } | 109 | } |
107 | 110 | ||
111 | static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) | ||
112 | { | ||
113 | uint64_t stripe; | ||
114 | unsigned nr_sectors = KEY_SIZE(k); | ||
115 | struct cached_dev *dc = container_of(buf, struct cached_dev, | ||
116 | writeback_keys); | ||
117 | unsigned stripe_size = 1 << dc->disk.stripe_size_bits; | ||
118 | |||
119 | if (!KEY_DIRTY(k)) | ||
120 | return false; | ||
121 | |||
122 | stripe = KEY_START(k) >> dc->disk.stripe_size_bits; | ||
123 | while (1) { | ||
124 | if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) != | ||
125 | stripe_size) | ||
126 | return false; | ||
127 | |||
128 | if (nr_sectors <= stripe_size) | ||
129 | return true; | ||
130 | |||
131 | nr_sectors -= stripe_size; | ||
132 | stripe++; | ||
133 | } | ||
134 | } | ||
135 | |||
108 | static void dirty_init(struct keybuf_key *w) | 136 | static void dirty_init(struct keybuf_key *w) |
109 | { | 137 | { |
110 | struct dirty_io *io = w->private; | 138 | struct dirty_io *io = w->private; |
@@ -149,7 +177,22 @@ static void refill_dirty(struct closure *cl) | |||
149 | searched_from_start = true; | 177 | searched_from_start = true; |
150 | } | 178 | } |
151 | 179 | ||
152 | bch_refill_keybuf(dc->disk.c, buf, &end); | 180 | if (dc->partial_stripes_expensive) { |
181 | uint64_t i; | ||
182 | |||
183 | for (i = 0; i < dc->disk.nr_stripes; i++) | ||
184 | if (atomic_read(dc->disk.stripe_sectors_dirty + i) == | ||
185 | 1 << dc->disk.stripe_size_bits) | ||
186 | goto full_stripes; | ||
187 | |||
188 | goto normal_refill; | ||
189 | full_stripes: | ||
190 | bch_refill_keybuf(dc->disk.c, buf, &end, | ||
191 | dirty_full_stripe_pred); | ||
192 | } else { | ||
193 | normal_refill: | ||
194 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); | ||
195 | } | ||
153 | 196 | ||
154 | if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { | 197 | if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) { |
155 | /* Searched the entire btree - delay awhile */ | 198 | /* Searched the entire btree - delay awhile */ |
@@ -181,10 +224,8 @@ void bch_writeback_queue(struct cached_dev *dc) | |||
181 | } | 224 | } |
182 | } | 225 | } |
183 | 226 | ||
184 | void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | 227 | void bch_writeback_add(struct cached_dev *dc) |
185 | { | 228 | { |
186 | atomic_long_add(sectors, &dc->disk.sectors_dirty); | ||
187 | |||
188 | if (!atomic_read(&dc->has_dirty) && | 229 | if (!atomic_read(&dc->has_dirty) && |
189 | !atomic_xchg(&dc->has_dirty, 1)) { | 230 | !atomic_xchg(&dc->has_dirty, 1)) { |
190 | atomic_inc(&dc->count); | 231 | atomic_inc(&dc->count); |
@@ -203,6 +244,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | |||
203 | } | 244 | } |
204 | } | 245 | } |
205 | 246 | ||
247 | void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | ||
248 | uint64_t offset, int nr_sectors) | ||
249 | { | ||
250 | struct bcache_device *d = c->devices[inode]; | ||
251 | unsigned stripe_size, stripe_offset; | ||
252 | uint64_t stripe; | ||
253 | |||
254 | if (!d) | ||
255 | return; | ||
256 | |||
257 | stripe_size = 1 << d->stripe_size_bits; | ||
258 | stripe = offset >> d->stripe_size_bits; | ||
259 | stripe_offset = offset & (stripe_size - 1); | ||
260 | |||
261 | while (nr_sectors) { | ||
262 | int s = min_t(unsigned, abs(nr_sectors), | ||
263 | stripe_size - stripe_offset); | ||
264 | |||
265 | if (nr_sectors < 0) | ||
266 | s = -s; | ||
267 | |||
268 | atomic_add(s, d->stripe_sectors_dirty + stripe); | ||
269 | nr_sectors -= s; | ||
270 | stripe_offset = 0; | ||
271 | stripe++; | ||
272 | } | ||
273 | } | ||
274 | |||
206 | /* Background writeback - IO loop */ | 275 | /* Background writeback - IO loop */ |
207 | 276 | ||
208 | static void dirty_io_destructor(struct closure *cl) | 277 | static void dirty_io_destructor(struct closure *cl) |
@@ -216,9 +285,10 @@ static void write_dirty_finish(struct closure *cl) | |||
216 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); | 285 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
217 | struct keybuf_key *w = io->bio.bi_private; | 286 | struct keybuf_key *w = io->bio.bi_private; |
218 | struct cached_dev *dc = io->dc; | 287 | struct cached_dev *dc = io->dc; |
219 | struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt); | 288 | struct bio_vec *bv; |
289 | int i; | ||
220 | 290 | ||
221 | while (bv-- != io->bio.bi_io_vec) | 291 | bio_for_each_segment_all(bv, &io->bio, i) |
222 | __free_page(bv->bv_page); | 292 | __free_page(bv->bv_page); |
223 | 293 | ||
224 | /* This is kind of a dumb way of signalling errors. */ | 294 | /* This is kind of a dumb way of signalling errors. */ |
@@ -236,10 +306,12 @@ static void write_dirty_finish(struct closure *cl) | |||
236 | for (i = 0; i < KEY_PTRS(&w->key); i++) | 306 | for (i = 0; i < KEY_PTRS(&w->key); i++) |
237 | atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); | 307 | atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); |
238 | 308 | ||
239 | pr_debug("clearing %s", pkey(&w->key)); | ||
240 | bch_btree_insert(&op, dc->disk.c); | 309 | bch_btree_insert(&op, dc->disk.c); |
241 | closure_sync(&op.cl); | 310 | closure_sync(&op.cl); |
242 | 311 | ||
312 | if (op.insert_collision) | ||
313 | trace_bcache_writeback_collision(&w->key); | ||
314 | |||
243 | atomic_long_inc(op.insert_collision | 315 | atomic_long_inc(op.insert_collision |
244 | ? &dc->disk.c->writeback_keys_failed | 316 | ? &dc->disk.c->writeback_keys_failed |
245 | : &dc->disk.c->writeback_keys_done); | 317 | : &dc->disk.c->writeback_keys_done); |
@@ -275,7 +347,6 @@ static void write_dirty(struct closure *cl) | |||
275 | io->bio.bi_bdev = io->dc->bdev; | 347 | io->bio.bi_bdev = io->dc->bdev; |
276 | io->bio.bi_end_io = dirty_endio; | 348 | io->bio.bi_end_io = dirty_endio; |
277 | 349 | ||
278 | trace_bcache_write_dirty(&io->bio); | ||
279 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 350 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
280 | 351 | ||
281 | continue_at(cl, write_dirty_finish, dirty_wq); | 352 | continue_at(cl, write_dirty_finish, dirty_wq); |
@@ -296,7 +367,6 @@ static void read_dirty_submit(struct closure *cl) | |||
296 | { | 367 | { |
297 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); | 368 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
298 | 369 | ||
299 | trace_bcache_read_dirty(&io->bio); | ||
300 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 370 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
301 | 371 | ||
302 | continue_at(cl, write_dirty, dirty_wq); | 372 | continue_at(cl, write_dirty, dirty_wq); |
@@ -349,10 +419,10 @@ static void read_dirty(struct closure *cl) | |||
349 | io->bio.bi_rw = READ; | 419 | io->bio.bi_rw = READ; |
350 | io->bio.bi_end_io = read_dirty_endio; | 420 | io->bio.bi_end_io = read_dirty_endio; |
351 | 421 | ||
352 | if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) | 422 | if (bio_alloc_pages(&io->bio, GFP_KERNEL)) |
353 | goto err_free; | 423 | goto err_free; |
354 | 424 | ||
355 | pr_debug("%s", pkey(&w->key)); | 425 | trace_bcache_writeback(&w->key); |
356 | 426 | ||
357 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); | 427 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); |
358 | 428 | ||
@@ -375,12 +445,49 @@ err: | |||
375 | refill_dirty(cl); | 445 | refill_dirty(cl); |
376 | } | 446 | } |
377 | 447 | ||
448 | /* Init */ | ||
449 | |||
450 | static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, | ||
451 | struct cached_dev *dc) | ||
452 | { | ||
453 | struct bkey *k; | ||
454 | struct btree_iter iter; | ||
455 | |||
456 | bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0)); | ||
457 | while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) | ||
458 | if (!b->level) { | ||
459 | if (KEY_INODE(k) > dc->disk.id) | ||
460 | break; | ||
461 | |||
462 | if (KEY_DIRTY(k)) | ||
463 | bcache_dev_sectors_dirty_add(b->c, dc->disk.id, | ||
464 | KEY_START(k), | ||
465 | KEY_SIZE(k)); | ||
466 | } else { | ||
467 | btree(sectors_dirty_init, k, b, op, dc); | ||
468 | if (KEY_INODE(k) > dc->disk.id) | ||
469 | break; | ||
470 | |||
471 | cond_resched(); | ||
472 | } | ||
473 | |||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | void bch_sectors_dirty_init(struct cached_dev *dc) | ||
478 | { | ||
479 | struct btree_op op; | ||
480 | |||
481 | bch_btree_op_init_stack(&op); | ||
482 | btree_root(sectors_dirty_init, dc->disk.c, &op, dc); | ||
483 | } | ||
484 | |||
378 | void bch_cached_dev_writeback_init(struct cached_dev *dc) | 485 | void bch_cached_dev_writeback_init(struct cached_dev *dc) |
379 | { | 486 | { |
380 | closure_init_unlocked(&dc->writeback); | 487 | closure_init_unlocked(&dc->writeback); |
381 | init_rwsem(&dc->writeback_lock); | 488 | init_rwsem(&dc->writeback_lock); |
382 | 489 | ||
383 | bch_keybuf_init(&dc->writeback_keys, dirty_pred); | 490 | bch_keybuf_init(&dc->writeback_keys); |
384 | 491 | ||
385 | dc->writeback_metadata = true; | 492 | dc->writeback_metadata = true; |
386 | dc->writeback_running = true; | 493 | dc->writeback_running = true; |