diff options
| -rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.c | 19 | ||||
| -rw-r--r-- | drivers/md/bcache/super.c | 17 | ||||
| -rw-r--r-- | drivers/md/bcache/writeback.c | 94 | ||||
| -rw-r--r-- | drivers/md/bcache/writeback.h | 21 | ||||
| -rw-r--r-- | include/trace/events/bcache.h | 29 |
6 files changed, 128 insertions, 57 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 816d07958fac..ab0b2150fed6 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
| @@ -237,7 +237,7 @@ struct keybuf { | |||
| 237 | 237 | ||
| 238 | struct rb_root keys; | 238 | struct rb_root keys; |
| 239 | 239 | ||
| 240 | #define KEYBUF_NR 100 | 240 | #define KEYBUF_NR 500 |
| 241 | DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); | 241 | DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); |
| 242 | }; | 242 | }; |
| 243 | 243 | ||
| @@ -273,9 +273,10 @@ struct bcache_device { | |||
| 273 | atomic_t detaching; | 273 | atomic_t detaching; |
| 274 | int flush_done; | 274 | int flush_done; |
| 275 | 275 | ||
| 276 | uint64_t nr_stripes; | 276 | unsigned nr_stripes; |
| 277 | unsigned stripe_size; | 277 | unsigned stripe_size; |
| 278 | atomic_t *stripe_sectors_dirty; | 278 | atomic_t *stripe_sectors_dirty; |
| 279 | unsigned long *full_dirty_stripes; | ||
| 279 | 280 | ||
| 280 | unsigned long sectors_dirty_last; | 281 | unsigned long sectors_dirty_last; |
| 281 | long sectors_dirty_derivative; | 282 | long sectors_dirty_derivative; |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 6def7c9a1228..5e2765aadce1 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
| @@ -2378,6 +2378,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, | |||
| 2378 | 2378 | ||
| 2379 | struct refill { | 2379 | struct refill { |
| 2380 | struct btree_op op; | 2380 | struct btree_op op; |
| 2381 | unsigned nr_found; | ||
| 2381 | struct keybuf *buf; | 2382 | struct keybuf *buf; |
| 2382 | struct bkey *end; | 2383 | struct bkey *end; |
| 2383 | keybuf_pred_fn *pred; | 2384 | keybuf_pred_fn *pred; |
| @@ -2414,6 +2415,8 @@ static int refill_keybuf_fn(struct btree_op *op, struct btree *b, | |||
| 2414 | 2415 | ||
| 2415 | if (RB_INSERT(&buf->keys, w, node, keybuf_cmp)) | 2416 | if (RB_INSERT(&buf->keys, w, node, keybuf_cmp)) |
| 2416 | array_free(&buf->freelist, w); | 2417 | array_free(&buf->freelist, w); |
| 2418 | else | ||
| 2419 | refill->nr_found++; | ||
| 2417 | 2420 | ||
| 2418 | if (array_freelist_empty(&buf->freelist)) | 2421 | if (array_freelist_empty(&buf->freelist)) |
| 2419 | ret = MAP_DONE; | 2422 | ret = MAP_DONE; |
| @@ -2434,18 +2437,18 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, | |||
| 2434 | cond_resched(); | 2437 | cond_resched(); |
| 2435 | 2438 | ||
| 2436 | bch_btree_op_init(&refill.op, -1); | 2439 | bch_btree_op_init(&refill.op, -1); |
| 2437 | refill.buf = buf; | 2440 | refill.nr_found = 0; |
| 2438 | refill.end = end; | 2441 | refill.buf = buf; |
| 2439 | refill.pred = pred; | 2442 | refill.end = end; |
| 2443 | refill.pred = pred; | ||
| 2440 | 2444 | ||
| 2441 | bch_btree_map_keys(&refill.op, c, &buf->last_scanned, | 2445 | bch_btree_map_keys(&refill.op, c, &buf->last_scanned, |
| 2442 | refill_keybuf_fn, MAP_END_KEY); | 2446 | refill_keybuf_fn, MAP_END_KEY); |
| 2443 | 2447 | ||
| 2444 | pr_debug("found %s keys from %llu:%llu to %llu:%llu", | 2448 | trace_bcache_keyscan(refill.nr_found, |
| 2445 | RB_EMPTY_ROOT(&buf->keys) ? "no" : | 2449 | KEY_INODE(&start), KEY_OFFSET(&start), |
| 2446 | array_freelist_empty(&buf->freelist) ? "some" : "a few", | 2450 | KEY_INODE(&buf->last_scanned), |
| 2447 | KEY_INODE(&start), KEY_OFFSET(&start), | 2451 | KEY_OFFSET(&buf->last_scanned)); |
| 2448 | KEY_INODE(&buf->last_scanned), KEY_OFFSET(&buf->last_scanned)); | ||
| 2449 | 2452 | ||
| 2450 | spin_lock(&buf->lock); | 2453 | spin_lock(&buf->lock); |
| 2451 | 2454 | ||
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 4813ef67cef5..43fcfe38be11 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
| @@ -738,6 +738,10 @@ static void bcache_device_free(struct bcache_device *d) | |||
| 738 | mempool_destroy(d->unaligned_bvec); | 738 | mempool_destroy(d->unaligned_bvec); |
| 739 | if (d->bio_split) | 739 | if (d->bio_split) |
| 740 | bioset_free(d->bio_split); | 740 | bioset_free(d->bio_split); |
| 741 | if (is_vmalloc_addr(d->full_dirty_stripes)) | ||
| 742 | vfree(d->full_dirty_stripes); | ||
| 743 | else | ||
| 744 | kfree(d->full_dirty_stripes); | ||
| 741 | if (is_vmalloc_addr(d->stripe_sectors_dirty)) | 745 | if (is_vmalloc_addr(d->stripe_sectors_dirty)) |
| 742 | vfree(d->stripe_sectors_dirty); | 746 | vfree(d->stripe_sectors_dirty); |
| 743 | else | 747 | else |
| @@ -757,8 +761,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, | |||
| 757 | 761 | ||
| 758 | d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); | 762 | d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); |
| 759 | 763 | ||
| 760 | if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) | 764 | if (!d->nr_stripes || |
| 765 | d->nr_stripes > INT_MAX || | ||
| 766 | d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { | ||
| 767 | pr_err("nr_stripes too large"); | ||
| 761 | return -ENOMEM; | 768 | return -ENOMEM; |
| 769 | } | ||
| 762 | 770 | ||
| 763 | n = d->nr_stripes * sizeof(atomic_t); | 771 | n = d->nr_stripes * sizeof(atomic_t); |
| 764 | d->stripe_sectors_dirty = n < PAGE_SIZE << 6 | 772 | d->stripe_sectors_dirty = n < PAGE_SIZE << 6 |
| @@ -767,6 +775,13 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, | |||
| 767 | if (!d->stripe_sectors_dirty) | 775 | if (!d->stripe_sectors_dirty) |
| 768 | return -ENOMEM; | 776 | return -ENOMEM; |
| 769 | 777 | ||
| 778 | n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long); | ||
| 779 | d->full_dirty_stripes = n < PAGE_SIZE << 6 | ||
| 780 | ? kzalloc(n, GFP_KERNEL) | ||
| 781 | : vzalloc(n); | ||
| 782 | if (!d->full_dirty_stripes) | ||
| 783 | return -ENOMEM; | ||
| 784 | |||
| 770 | if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || | 785 | if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || |
| 771 | !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, | 786 | !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, |
| 772 | sizeof(struct bio_vec) * BIO_MAX_PAGES)) || | 787 | sizeof(struct bio_vec) * BIO_MAX_PAGES)) || |
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index ab0f6b449111..22e21dc9a037 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
| @@ -292,14 +292,12 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | |||
| 292 | uint64_t offset, int nr_sectors) | 292 | uint64_t offset, int nr_sectors) |
| 293 | { | 293 | { |
| 294 | struct bcache_device *d = c->devices[inode]; | 294 | struct bcache_device *d = c->devices[inode]; |
| 295 | unsigned stripe_offset; | 295 | unsigned stripe_offset, stripe, sectors_dirty; |
| 296 | uint64_t stripe = offset; | ||
| 297 | 296 | ||
| 298 | if (!d) | 297 | if (!d) |
| 299 | return; | 298 | return; |
| 300 | 299 | ||
| 301 | do_div(stripe, d->stripe_size); | 300 | stripe = offset_to_stripe(d, offset); |
| 302 | |||
| 303 | stripe_offset = offset & (d->stripe_size - 1); | 301 | stripe_offset = offset & (d->stripe_size - 1); |
| 304 | 302 | ||
| 305 | while (nr_sectors) { | 303 | while (nr_sectors) { |
| @@ -309,7 +307,16 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | |||
| 309 | if (nr_sectors < 0) | 307 | if (nr_sectors < 0) |
| 310 | s = -s; | 308 | s = -s; |
| 311 | 309 | ||
| 312 | atomic_add(s, d->stripe_sectors_dirty + stripe); | 310 | if (stripe >= d->nr_stripes) |
| 311 | return; | ||
| 312 | |||
| 313 | sectors_dirty = atomic_add_return(s, | ||
| 314 | d->stripe_sectors_dirty + stripe); | ||
| 315 | if (sectors_dirty == d->stripe_size) | ||
| 316 | set_bit(stripe, d->full_dirty_stripes); | ||
| 317 | else | ||
| 318 | clear_bit(stripe, d->full_dirty_stripes); | ||
| 319 | |||
| 313 | nr_sectors -= s; | 320 | nr_sectors -= s; |
| 314 | stripe_offset = 0; | 321 | stripe_offset = 0; |
| 315 | stripe++; | 322 | stripe++; |
| @@ -321,59 +328,70 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) | |||
| 321 | return KEY_DIRTY(k); | 328 | return KEY_DIRTY(k); |
| 322 | } | 329 | } |
| 323 | 330 | ||
| 324 | static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k) | 331 | static void refill_full_stripes(struct cached_dev *dc) |
| 325 | { | 332 | { |
| 326 | uint64_t stripe = KEY_START(k); | 333 | struct keybuf *buf = &dc->writeback_keys; |
| 327 | unsigned nr_sectors = KEY_SIZE(k); | 334 | unsigned start_stripe, stripe, next_stripe; |
| 328 | struct cached_dev *dc = container_of(buf, struct cached_dev, | 335 | bool wrapped = false; |
| 329 | writeback_keys); | 336 | |
| 337 | stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); | ||
| 330 | 338 | ||
| 331 | if (!KEY_DIRTY(k)) | 339 | if (stripe >= dc->disk.nr_stripes) |
| 332 | return false; | 340 | stripe = 0; |
| 333 | 341 | ||
| 334 | do_div(stripe, dc->disk.stripe_size); | 342 | start_stripe = stripe; |
| 335 | 343 | ||
| 336 | while (1) { | 344 | while (1) { |
| 337 | if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) == | 345 | stripe = find_next_bit(dc->disk.full_dirty_stripes, |
| 338 | dc->disk.stripe_size) | 346 | dc->disk.nr_stripes, stripe); |
| 339 | return true; | ||
| 340 | 347 | ||
| 341 | if (nr_sectors <= dc->disk.stripe_size) | 348 | if (stripe == dc->disk.nr_stripes) |
| 342 | return false; | 349 | goto next; |
| 343 | 350 | ||
| 344 | nr_sectors -= dc->disk.stripe_size; | 351 | next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes, |
| 345 | stripe++; | 352 | dc->disk.nr_stripes, stripe); |
| 353 | |||
| 354 | buf->last_scanned = KEY(dc->disk.id, | ||
| 355 | stripe * dc->disk.stripe_size, 0); | ||
| 356 | |||
| 357 | bch_refill_keybuf(dc->disk.c, buf, | ||
| 358 | &KEY(dc->disk.id, | ||
| 359 | next_stripe * dc->disk.stripe_size, 0), | ||
| 360 | dirty_pred); | ||
| 361 | |||
| 362 | if (array_freelist_empty(&buf->freelist)) | ||
| 363 | return; | ||
| 364 | |||
| 365 | stripe = next_stripe; | ||
| 366 | next: | ||
| 367 | if (wrapped && stripe > start_stripe) | ||
| 368 | return; | ||
| 369 | |||
| 370 | if (stripe == dc->disk.nr_stripes) { | ||
| 371 | stripe = 0; | ||
| 372 | wrapped = true; | ||
| 373 | } | ||
| 346 | } | 374 | } |
| 347 | } | 375 | } |
| 348 | 376 | ||
| 349 | static bool refill_dirty(struct cached_dev *dc) | 377 | static bool refill_dirty(struct cached_dev *dc) |
| 350 | { | 378 | { |
| 351 | struct keybuf *buf = &dc->writeback_keys; | 379 | struct keybuf *buf = &dc->writeback_keys; |
| 352 | bool searched_from_start = false; | ||
| 353 | struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); | 380 | struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); |
| 381 | bool searched_from_start = false; | ||
| 382 | |||
| 383 | if (dc->partial_stripes_expensive) { | ||
| 384 | refill_full_stripes(dc); | ||
| 385 | if (array_freelist_empty(&buf->freelist)) | ||
| 386 | return false; | ||
| 387 | } | ||
| 354 | 388 | ||
| 355 | if (bkey_cmp(&buf->last_scanned, &end) >= 0) { | 389 | if (bkey_cmp(&buf->last_scanned, &end) >= 0) { |
| 356 | buf->last_scanned = KEY(dc->disk.id, 0, 0); | 390 | buf->last_scanned = KEY(dc->disk.id, 0, 0); |
| 357 | searched_from_start = true; | 391 | searched_from_start = true; |
| 358 | } | 392 | } |
| 359 | 393 | ||
| 360 | if (dc->partial_stripes_expensive) { | 394 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); |
| 361 | uint64_t i; | ||
| 362 | |||
| 363 | for (i = 0; i < dc->disk.nr_stripes; i++) | ||
| 364 | if (atomic_read(dc->disk.stripe_sectors_dirty + i) == | ||
| 365 | dc->disk.stripe_size) | ||
| 366 | goto full_stripes; | ||
| 367 | |||
| 368 | goto normal_refill; | ||
| 369 | full_stripes: | ||
| 370 | searched_from_start = false; /* not searching entire btree */ | ||
| 371 | bch_refill_keybuf(dc->disk.c, buf, &end, | ||
| 372 | dirty_full_stripe_pred); | ||
| 373 | } else { | ||
| 374 | normal_refill: | ||
| 375 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); | ||
| 376 | } | ||
| 377 | 395 | ||
| 378 | return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; | 396 | return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; |
| 379 | } | 397 | } |
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 60516bfa6052..fe7d9d56492b 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h | |||
| @@ -14,22 +14,27 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) | |||
| 14 | return ret; | 14 | return ret; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | static inline bool bcache_dev_stripe_dirty(struct bcache_device *d, | 17 | static inline unsigned offset_to_stripe(struct bcache_device *d, |
| 18 | uint64_t offset) | ||
| 19 | { | ||
| 20 | do_div(offset, d->stripe_size); | ||
| 21 | return offset; | ||
| 22 | } | ||
| 23 | |||
| 24 | static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, | ||
| 18 | uint64_t offset, | 25 | uint64_t offset, |
| 19 | unsigned nr_sectors) | 26 | unsigned nr_sectors) |
| 20 | { | 27 | { |
| 21 | uint64_t stripe = offset; | 28 | unsigned stripe = offset_to_stripe(&dc->disk, offset); |
| 22 | |||
| 23 | do_div(stripe, d->stripe_size); | ||
| 24 | 29 | ||
| 25 | while (1) { | 30 | while (1) { |
| 26 | if (atomic_read(d->stripe_sectors_dirty + stripe)) | 31 | if (atomic_read(dc->disk.stripe_sectors_dirty + stripe)) |
| 27 | return true; | 32 | return true; |
| 28 | 33 | ||
| 29 | if (nr_sectors <= d->stripe_size) | 34 | if (nr_sectors <= dc->disk.stripe_size) |
| 30 | return false; | 35 | return false; |
| 31 | 36 | ||
| 32 | nr_sectors -= d->stripe_size; | 37 | nr_sectors -= dc->disk.stripe_size; |
| 33 | stripe++; | 38 | stripe++; |
| 34 | } | 39 | } |
| 35 | } | 40 | } |
| @@ -45,7 +50,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, | |||
| 45 | return false; | 50 | return false; |
| 46 | 51 | ||
| 47 | if (dc->partial_stripes_expensive && | 52 | if (dc->partial_stripes_expensive && |
| 48 | bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector, | 53 | bcache_dev_stripe_dirty(dc, bio->bi_sector, |
| 49 | bio_sectors(bio))) | 54 | bio_sectors(bio))) |
| 50 | return true; | 55 | return true; |
| 51 | 56 | ||
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 32c89b33c391..e2b9576d00e2 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h | |||
| @@ -368,6 +368,35 @@ DEFINE_EVENT(btree_node, bcache_btree_set_root, | |||
| 368 | TP_ARGS(b) | 368 | TP_ARGS(b) |
| 369 | ); | 369 | ); |
| 370 | 370 | ||
| 371 | TRACE_EVENT(bcache_keyscan, | ||
| 372 | TP_PROTO(unsigned nr_found, | ||
| 373 | unsigned start_inode, uint64_t start_offset, | ||
| 374 | unsigned end_inode, uint64_t end_offset), | ||
| 375 | TP_ARGS(nr_found, | ||
| 376 | start_inode, start_offset, | ||
| 377 | end_inode, end_offset), | ||
| 378 | |||
| 379 | TP_STRUCT__entry( | ||
| 380 | __field(__u32, nr_found ) | ||
| 381 | __field(__u32, start_inode ) | ||
| 382 | __field(__u64, start_offset ) | ||
| 383 | __field(__u32, end_inode ) | ||
| 384 | __field(__u64, end_offset ) | ||
| 385 | ), | ||
| 386 | |||
| 387 | TP_fast_assign( | ||
| 388 | __entry->nr_found = nr_found; | ||
| 389 | __entry->start_inode = start_inode; | ||
| 390 | __entry->start_offset = start_offset; | ||
| 391 | __entry->end_inode = end_inode; | ||
| 392 | __entry->end_offset = end_offset; | ||
| 393 | ), | ||
| 394 | |||
| 395 | TP_printk("found %u keys from %u:%llu to %u:%llu", __entry->nr_found, | ||
| 396 | __entry->start_inode, __entry->start_offset, | ||
| 397 | __entry->end_inode, __entry->end_offset) | ||
| 398 | ); | ||
| 399 | |||
| 371 | /* Allocator */ | 400 | /* Allocator */ |
| 372 | 401 | ||
| 373 | TRACE_EVENT(bcache_alloc_invalidate, | 402 | TRACE_EVENT(bcache_alloc_invalidate, |
