diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-07-24 21:11:11 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2013-11-11 00:56:32 -0500 |
commit | 2599b53b7b0ea6103d1661dca74d35480cb8fa1f (patch) | |
tree | 0b277532e73924b68c3da43a999aa5df33824787 | |
parent | 220bb38c21b83e2f7b842f33220bf727093eca89 (diff) |
bcache: Move sector allocator to alloc.c
Just reorganizing things a bit.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r-- | drivers/md/bcache/alloc.c | 180 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 4 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 186 | ||||
-rw-r--r-- | drivers/md/bcache/request.h | 5 |
4 files changed, 189 insertions, 186 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b9bd5866055d..4970ddc6a7f6 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -487,8 +487,188 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, | |||
487 | return ret; | 487 | return ret; |
488 | } | 488 | } |
489 | 489 | ||
490 | /* Sector allocator */ | ||
491 | |||
492 | struct open_bucket { | ||
493 | struct list_head list; | ||
494 | unsigned last_write_point; | ||
495 | unsigned sectors_free; | ||
496 | BKEY_PADDED(key); | ||
497 | }; | ||
498 | |||
499 | /* | ||
500 | * We keep multiple buckets open for writes, and try to segregate different | ||
501 | * write streams for better cache utilization: first we look for a bucket where | ||
502 | * the last write to it was sequential with the current write, and failing that | ||
503 | * we look for a bucket that was last used by the same task. | ||
504 | * | ||
505 | * The ideas is if you've got multiple tasks pulling data into the cache at the | ||
506 | * same time, you'll get better cache utilization if you try to segregate their | ||
507 | * data and preserve locality. | ||
508 | * | ||
509 | * For example, say you've starting Firefox at the same time you're copying a | ||
510 | * bunch of files. Firefox will likely end up being fairly hot and stay in the | ||
511 | * cache awhile, but the data you copied might not be; if you wrote all that | ||
512 | * data to the same buckets it'd get invalidated at the same time. | ||
513 | * | ||
514 | * Both of those tasks will be doing fairly random IO so we can't rely on | ||
515 | * detecting sequential IO to segregate their data, but going off of the task | ||
516 | * should be a sane heuristic. | ||
517 | */ | ||
518 | static struct open_bucket *pick_data_bucket(struct cache_set *c, | ||
519 | const struct bkey *search, | ||
520 | unsigned write_point, | ||
521 | struct bkey *alloc) | ||
522 | { | ||
523 | struct open_bucket *ret, *ret_task = NULL; | ||
524 | |||
525 | list_for_each_entry_reverse(ret, &c->data_buckets, list) | ||
526 | if (!bkey_cmp(&ret->key, search)) | ||
527 | goto found; | ||
528 | else if (ret->last_write_point == write_point) | ||
529 | ret_task = ret; | ||
530 | |||
531 | ret = ret_task ?: list_first_entry(&c->data_buckets, | ||
532 | struct open_bucket, list); | ||
533 | found: | ||
534 | if (!ret->sectors_free && KEY_PTRS(alloc)) { | ||
535 | ret->sectors_free = c->sb.bucket_size; | ||
536 | bkey_copy(&ret->key, alloc); | ||
537 | bkey_init(alloc); | ||
538 | } | ||
539 | |||
540 | if (!ret->sectors_free) | ||
541 | ret = NULL; | ||
542 | |||
543 | return ret; | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * Allocates some space in the cache to write to, and k to point to the newly | ||
548 | * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the | ||
549 | * end of the newly allocated space). | ||
550 | * | ||
551 | * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many | ||
552 | * sectors were actually allocated. | ||
553 | * | ||
554 | * If s->writeback is true, will not fail. | ||
555 | */ | ||
556 | bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, | ||
557 | unsigned write_point, unsigned write_prio, bool wait) | ||
558 | { | ||
559 | struct open_bucket *b; | ||
560 | BKEY_PADDED(key) alloc; | ||
561 | unsigned i; | ||
562 | |||
563 | /* | ||
564 | * We might have to allocate a new bucket, which we can't do with a | ||
565 | * spinlock held. So if we have to allocate, we drop the lock, allocate | ||
566 | * and then retry. KEY_PTRS() indicates whether alloc points to | ||
567 | * allocated bucket(s). | ||
568 | */ | ||
569 | |||
570 | bkey_init(&alloc.key); | ||
571 | spin_lock(&c->data_bucket_lock); | ||
572 | |||
573 | while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { | ||
574 | unsigned watermark = write_prio | ||
575 | ? WATERMARK_MOVINGGC | ||
576 | : WATERMARK_NONE; | ||
577 | |||
578 | spin_unlock(&c->data_bucket_lock); | ||
579 | |||
580 | if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait)) | ||
581 | return false; | ||
582 | |||
583 | spin_lock(&c->data_bucket_lock); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * If we had to allocate, we might race and not need to allocate the | ||
588 | * second time we call find_data_bucket(). If we allocated a bucket but | ||
589 | * didn't use it, drop the refcount bch_bucket_alloc_set() took: | ||
590 | */ | ||
591 | if (KEY_PTRS(&alloc.key)) | ||
592 | __bkey_put(c, &alloc.key); | ||
593 | |||
594 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
595 | EBUG_ON(ptr_stale(c, &b->key, i)); | ||
596 | |||
597 | /* Set up the pointer to the space we're allocating: */ | ||
598 | |||
599 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
600 | k->ptr[i] = b->key.ptr[i]; | ||
601 | |||
602 | sectors = min(sectors, b->sectors_free); | ||
603 | |||
604 | SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); | ||
605 | SET_KEY_SIZE(k, sectors); | ||
606 | SET_KEY_PTRS(k, KEY_PTRS(&b->key)); | ||
607 | |||
608 | /* | ||
609 | * Move b to the end of the lru, and keep track of what this bucket was | ||
610 | * last used for: | ||
611 | */ | ||
612 | list_move_tail(&b->list, &c->data_buckets); | ||
613 | bkey_copy_key(&b->key, k); | ||
614 | b->last_write_point = write_point; | ||
615 | |||
616 | b->sectors_free -= sectors; | ||
617 | |||
618 | for (i = 0; i < KEY_PTRS(&b->key); i++) { | ||
619 | SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); | ||
620 | |||
621 | atomic_long_add(sectors, | ||
622 | &PTR_CACHE(c, &b->key, i)->sectors_written); | ||
623 | } | ||
624 | |||
625 | if (b->sectors_free < c->sb.block_size) | ||
626 | b->sectors_free = 0; | ||
627 | |||
628 | /* | ||
629 | * k takes refcounts on the buckets it points to until it's inserted | ||
630 | * into the btree, but if we're done with this bucket we just transfer | ||
631 | * get_data_bucket()'s refcount. | ||
632 | */ | ||
633 | if (b->sectors_free) | ||
634 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
635 | atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); | ||
636 | |||
637 | spin_unlock(&c->data_bucket_lock); | ||
638 | return true; | ||
639 | } | ||
640 | |||
490 | /* Init */ | 641 | /* Init */ |
491 | 642 | ||
643 | void bch_open_buckets_free(struct cache_set *c) | ||
644 | { | ||
645 | struct open_bucket *b; | ||
646 | |||
647 | while (!list_empty(&c->data_buckets)) { | ||
648 | b = list_first_entry(&c->data_buckets, | ||
649 | struct open_bucket, list); | ||
650 | list_del(&b->list); | ||
651 | kfree(b); | ||
652 | } | ||
653 | } | ||
654 | |||
655 | int bch_open_buckets_alloc(struct cache_set *c) | ||
656 | { | ||
657 | int i; | ||
658 | |||
659 | spin_lock_init(&c->data_bucket_lock); | ||
660 | |||
661 | for (i = 0; i < 6; i++) { | ||
662 | struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); | ||
663 | if (!b) | ||
664 | return -ENOMEM; | ||
665 | |||
666 | list_add(&b->list, &c->data_buckets); | ||
667 | } | ||
668 | |||
669 | return 0; | ||
670 | } | ||
671 | |||
492 | int bch_cache_allocator_start(struct cache *ca) | 672 | int bch_cache_allocator_start(struct cache *ca) |
493 | { | 673 | { |
494 | struct task_struct *k = kthread_run(bch_allocator_thread, | 674 | struct task_struct *k = kthread_run(bch_allocator_thread, |
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 20fe96c121d9..e32f6fd91755 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -1170,6 +1170,8 @@ int __bch_bucket_alloc_set(struct cache_set *, unsigned, | |||
1170 | struct bkey *, int, bool); | 1170 | struct bkey *, int, bool); |
1171 | int bch_bucket_alloc_set(struct cache_set *, unsigned, | 1171 | int bch_bucket_alloc_set(struct cache_set *, unsigned, |
1172 | struct bkey *, int, bool); | 1172 | struct bkey *, int, bool); |
1173 | bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned, | ||
1174 | unsigned, unsigned, bool); | ||
1173 | 1175 | ||
1174 | __printf(2, 3) | 1176 | __printf(2, 3) |
1175 | bool bch_cache_set_error(struct cache_set *, const char *, ...); | 1177 | bool bch_cache_set_error(struct cache_set *, const char *, ...); |
@@ -1210,6 +1212,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *); | |||
1210 | void bch_btree_cache_free(struct cache_set *); | 1212 | void bch_btree_cache_free(struct cache_set *); |
1211 | int bch_btree_cache_alloc(struct cache_set *); | 1213 | int bch_btree_cache_alloc(struct cache_set *); |
1212 | void bch_moving_init_cache_set(struct cache_set *); | 1214 | void bch_moving_init_cache_set(struct cache_set *); |
1215 | int bch_open_buckets_alloc(struct cache_set *); | ||
1216 | void bch_open_buckets_free(struct cache_set *); | ||
1213 | 1217 | ||
1214 | int bch_cache_allocator_start(struct cache *ca); | 1218 | int bch_cache_allocator_start(struct cache *ca); |
1215 | int bch_cache_allocator_init(struct cache *ca); | 1219 | int bch_cache_allocator_init(struct cache *ca); |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 05c7c216f65e..cf7850a7592c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -255,186 +255,6 @@ static void bch_data_insert_keys(struct closure *cl) | |||
255 | closure_return(cl); | 255 | closure_return(cl); |
256 | } | 256 | } |
257 | 257 | ||
258 | struct open_bucket { | ||
259 | struct list_head list; | ||
260 | struct task_struct *last; | ||
261 | unsigned sectors_free; | ||
262 | BKEY_PADDED(key); | ||
263 | }; | ||
264 | |||
265 | void bch_open_buckets_free(struct cache_set *c) | ||
266 | { | ||
267 | struct open_bucket *b; | ||
268 | |||
269 | while (!list_empty(&c->data_buckets)) { | ||
270 | b = list_first_entry(&c->data_buckets, | ||
271 | struct open_bucket, list); | ||
272 | list_del(&b->list); | ||
273 | kfree(b); | ||
274 | } | ||
275 | } | ||
276 | |||
277 | int bch_open_buckets_alloc(struct cache_set *c) | ||
278 | { | ||
279 | int i; | ||
280 | |||
281 | spin_lock_init(&c->data_bucket_lock); | ||
282 | |||
283 | for (i = 0; i < 6; i++) { | ||
284 | struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); | ||
285 | if (!b) | ||
286 | return -ENOMEM; | ||
287 | |||
288 | list_add(&b->list, &c->data_buckets); | ||
289 | } | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * We keep multiple buckets open for writes, and try to segregate different | ||
296 | * write streams for better cache utilization: first we look for a bucket where | ||
297 | * the last write to it was sequential with the current write, and failing that | ||
298 | * we look for a bucket that was last used by the same task. | ||
299 | * | ||
300 | * The ideas is if you've got multiple tasks pulling data into the cache at the | ||
301 | * same time, you'll get better cache utilization if you try to segregate their | ||
302 | * data and preserve locality. | ||
303 | * | ||
304 | * For example, say you've starting Firefox at the same time you're copying a | ||
305 | * bunch of files. Firefox will likely end up being fairly hot and stay in the | ||
306 | * cache awhile, but the data you copied might not be; if you wrote all that | ||
307 | * data to the same buckets it'd get invalidated at the same time. | ||
308 | * | ||
309 | * Both of those tasks will be doing fairly random IO so we can't rely on | ||
310 | * detecting sequential IO to segregate their data, but going off of the task | ||
311 | * should be a sane heuristic. | ||
312 | */ | ||
313 | static struct open_bucket *pick_data_bucket(struct cache_set *c, | ||
314 | const struct bkey *search, | ||
315 | struct task_struct *task, | ||
316 | struct bkey *alloc) | ||
317 | { | ||
318 | struct open_bucket *ret, *ret_task = NULL; | ||
319 | |||
320 | list_for_each_entry_reverse(ret, &c->data_buckets, list) | ||
321 | if (!bkey_cmp(&ret->key, search)) | ||
322 | goto found; | ||
323 | else if (ret->last == task) | ||
324 | ret_task = ret; | ||
325 | |||
326 | ret = ret_task ?: list_first_entry(&c->data_buckets, | ||
327 | struct open_bucket, list); | ||
328 | found: | ||
329 | if (!ret->sectors_free && KEY_PTRS(alloc)) { | ||
330 | ret->sectors_free = c->sb.bucket_size; | ||
331 | bkey_copy(&ret->key, alloc); | ||
332 | bkey_init(alloc); | ||
333 | } | ||
334 | |||
335 | if (!ret->sectors_free) | ||
336 | ret = NULL; | ||
337 | |||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * Allocates some space in the cache to write to, and k to point to the newly | ||
343 | * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the | ||
344 | * end of the newly allocated space). | ||
345 | * | ||
346 | * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many | ||
347 | * sectors were actually allocated. | ||
348 | * | ||
349 | * If s->writeback is true, will not fail. | ||
350 | */ | ||
351 | static bool bch_alloc_sectors(struct data_insert_op *op, | ||
352 | struct bkey *k, unsigned sectors) | ||
353 | { | ||
354 | struct cache_set *c = op->c; | ||
355 | struct open_bucket *b; | ||
356 | BKEY_PADDED(key) alloc; | ||
357 | unsigned i; | ||
358 | |||
359 | /* | ||
360 | * We might have to allocate a new bucket, which we can't do with a | ||
361 | * spinlock held. So if we have to allocate, we drop the lock, allocate | ||
362 | * and then retry. KEY_PTRS() indicates whether alloc points to | ||
363 | * allocated bucket(s). | ||
364 | */ | ||
365 | |||
366 | bkey_init(&alloc.key); | ||
367 | spin_lock(&c->data_bucket_lock); | ||
368 | |||
369 | while (!(b = pick_data_bucket(c, k, op->task, &alloc.key))) { | ||
370 | unsigned watermark = op->write_prio | ||
371 | ? WATERMARK_MOVINGGC | ||
372 | : WATERMARK_NONE; | ||
373 | |||
374 | spin_unlock(&c->data_bucket_lock); | ||
375 | |||
376 | if (bch_bucket_alloc_set(c, watermark, &alloc.key, | ||
377 | 1, op->writeback)) | ||
378 | return false; | ||
379 | |||
380 | spin_lock(&c->data_bucket_lock); | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * If we had to allocate, we might race and not need to allocate the | ||
385 | * second time we call find_data_bucket(). If we allocated a bucket but | ||
386 | * didn't use it, drop the refcount bch_bucket_alloc_set() took: | ||
387 | */ | ||
388 | if (KEY_PTRS(&alloc.key)) | ||
389 | __bkey_put(c, &alloc.key); | ||
390 | |||
391 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
392 | EBUG_ON(ptr_stale(c, &b->key, i)); | ||
393 | |||
394 | /* Set up the pointer to the space we're allocating: */ | ||
395 | |||
396 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
397 | k->ptr[i] = b->key.ptr[i]; | ||
398 | |||
399 | sectors = min(sectors, b->sectors_free); | ||
400 | |||
401 | SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); | ||
402 | SET_KEY_SIZE(k, sectors); | ||
403 | SET_KEY_PTRS(k, KEY_PTRS(&b->key)); | ||
404 | |||
405 | /* | ||
406 | * Move b to the end of the lru, and keep track of what this bucket was | ||
407 | * last used for: | ||
408 | */ | ||
409 | list_move_tail(&b->list, &c->data_buckets); | ||
410 | bkey_copy_key(&b->key, k); | ||
411 | b->last = op->task; | ||
412 | |||
413 | b->sectors_free -= sectors; | ||
414 | |||
415 | for (i = 0; i < KEY_PTRS(&b->key); i++) { | ||
416 | SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); | ||
417 | |||
418 | atomic_long_add(sectors, | ||
419 | &PTR_CACHE(c, &b->key, i)->sectors_written); | ||
420 | } | ||
421 | |||
422 | if (b->sectors_free < c->sb.block_size) | ||
423 | b->sectors_free = 0; | ||
424 | |||
425 | /* | ||
426 | * k takes refcounts on the buckets it points to until it's inserted | ||
427 | * into the btree, but if we're done with this bucket we just transfer | ||
428 | * get_data_bucket()'s refcount. | ||
429 | */ | ||
430 | if (b->sectors_free) | ||
431 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
432 | atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); | ||
433 | |||
434 | spin_unlock(&c->data_bucket_lock); | ||
435 | return true; | ||
436 | } | ||
437 | |||
438 | static void bch_data_invalidate(struct closure *cl) | 258 | static void bch_data_invalidate(struct closure *cl) |
439 | { | 259 | { |
440 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); | 260 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
@@ -545,7 +365,9 @@ static void bch_data_insert_start(struct closure *cl) | |||
545 | SET_KEY_INODE(k, op->inode); | 365 | SET_KEY_INODE(k, op->inode); |
546 | SET_KEY_OFFSET(k, bio->bi_sector); | 366 | SET_KEY_OFFSET(k, bio->bi_sector); |
547 | 367 | ||
548 | if (!bch_alloc_sectors(op, k, bio_sectors(bio))) | 368 | if (!bch_alloc_sectors(op->c, k, bio_sectors(bio), |
369 | op->write_point, op->write_prio, | ||
370 | op->writeback)) | ||
549 | goto err; | 371 | goto err; |
550 | 372 | ||
551 | n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); | 373 | n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); |
@@ -968,7 +790,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) | |||
968 | s->iop.c = d->c; | 790 | s->iop.c = d->c; |
969 | s->d = d; | 791 | s->d = d; |
970 | s->op.lock = -1; | 792 | s->op.lock = -1; |
971 | s->iop.task = current; | 793 | s->iop.write_point = hash_long((unsigned long) current, 16); |
972 | s->orig_bio = bio; | 794 | s->orig_bio = bio; |
973 | s->write = (bio->bi_rw & REQ_WRITE) != 0; | 795 | s->write = (bio->bi_rw & REQ_WRITE) != 0; |
974 | s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; | 796 | s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; |
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 54d7de27356f..2cd65bf073c2 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h | |||
@@ -6,10 +6,10 @@ | |||
6 | struct data_insert_op { | 6 | struct data_insert_op { |
7 | struct closure cl; | 7 | struct closure cl; |
8 | struct cache_set *c; | 8 | struct cache_set *c; |
9 | struct task_struct *task; | ||
10 | struct bio *bio; | 9 | struct bio *bio; |
11 | 10 | ||
12 | unsigned inode; | 11 | unsigned inode; |
12 | uint16_t write_point; | ||
13 | uint16_t write_prio; | 13 | uint16_t write_prio; |
14 | short error; | 14 | short error; |
15 | 15 | ||
@@ -31,9 +31,6 @@ struct data_insert_op { | |||
31 | unsigned bch_get_congested(struct cache_set *); | 31 | unsigned bch_get_congested(struct cache_set *); |
32 | void bch_data_insert(struct closure *cl); | 32 | void bch_data_insert(struct closure *cl); |
33 | 33 | ||
34 | void bch_open_buckets_free(struct cache_set *); | ||
35 | int bch_open_buckets_alloc(struct cache_set *); | ||
36 | |||
37 | void bch_cached_dev_request_init(struct cached_dev *dc); | 34 | void bch_cached_dev_request_init(struct cached_dev *dc); |
38 | void bch_flash_dev_request_init(struct bcache_device *d); | 35 | void bch_flash_dev_request_init(struct bcache_device *d); |
39 | 36 | ||