diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-07-24 21:11:11 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2013-11-11 00:56:32 -0500 |
commit | 2599b53b7b0ea6103d1661dca74d35480cb8fa1f (patch) | |
tree | 0b277532e73924b68c3da43a999aa5df33824787 /drivers/md/bcache/alloc.c | |
parent | 220bb38c21b83e2f7b842f33220bf727093eca89 (diff) |
bcache: Move sector allocator to alloc.c
Just reorganizing things a bit.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md/bcache/alloc.c')
-rw-r--r-- | drivers/md/bcache/alloc.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b9bd5866055d..4970ddc6a7f6 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -487,8 +487,188 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, | |||
487 | return ret; | 487 | return ret; |
488 | } | 488 | } |
489 | 489 | ||
490 | /* Sector allocator */ | ||
491 | |||
492 | struct open_bucket { | ||
493 | struct list_head list; | ||
494 | unsigned last_write_point; | ||
495 | unsigned sectors_free; | ||
496 | BKEY_PADDED(key); | ||
497 | }; | ||
498 | |||
499 | /* | ||
500 | * We keep multiple buckets open for writes, and try to segregate different | ||
501 | * write streams for better cache utilization: first we look for a bucket where | ||
502 | * the last write to it was sequential with the current write, and failing that | ||
503 | * we look for a bucket that was last used by the same task. | ||
504 | * | ||
505 | * The ideas is if you've got multiple tasks pulling data into the cache at the | ||
506 | * same time, you'll get better cache utilization if you try to segregate their | ||
507 | * data and preserve locality. | ||
508 | * | ||
509 | * For example, say you've starting Firefox at the same time you're copying a | ||
510 | * bunch of files. Firefox will likely end up being fairly hot and stay in the | ||
511 | * cache awhile, but the data you copied might not be; if you wrote all that | ||
512 | * data to the same buckets it'd get invalidated at the same time. | ||
513 | * | ||
514 | * Both of those tasks will be doing fairly random IO so we can't rely on | ||
515 | * detecting sequential IO to segregate their data, but going off of the task | ||
516 | * should be a sane heuristic. | ||
517 | */ | ||
518 | static struct open_bucket *pick_data_bucket(struct cache_set *c, | ||
519 | const struct bkey *search, | ||
520 | unsigned write_point, | ||
521 | struct bkey *alloc) | ||
522 | { | ||
523 | struct open_bucket *ret, *ret_task = NULL; | ||
524 | |||
525 | list_for_each_entry_reverse(ret, &c->data_buckets, list) | ||
526 | if (!bkey_cmp(&ret->key, search)) | ||
527 | goto found; | ||
528 | else if (ret->last_write_point == write_point) | ||
529 | ret_task = ret; | ||
530 | |||
531 | ret = ret_task ?: list_first_entry(&c->data_buckets, | ||
532 | struct open_bucket, list); | ||
533 | found: | ||
534 | if (!ret->sectors_free && KEY_PTRS(alloc)) { | ||
535 | ret->sectors_free = c->sb.bucket_size; | ||
536 | bkey_copy(&ret->key, alloc); | ||
537 | bkey_init(alloc); | ||
538 | } | ||
539 | |||
540 | if (!ret->sectors_free) | ||
541 | ret = NULL; | ||
542 | |||
543 | return ret; | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * Allocates some space in the cache to write to, and k to point to the newly | ||
548 | * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the | ||
549 | * end of the newly allocated space). | ||
550 | * | ||
551 | * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many | ||
552 | * sectors were actually allocated. | ||
553 | * | ||
554 | * If s->writeback is true, will not fail. | ||
555 | */ | ||
556 | bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, | ||
557 | unsigned write_point, unsigned write_prio, bool wait) | ||
558 | { | ||
559 | struct open_bucket *b; | ||
560 | BKEY_PADDED(key) alloc; | ||
561 | unsigned i; | ||
562 | |||
563 | /* | ||
564 | * We might have to allocate a new bucket, which we can't do with a | ||
565 | * spinlock held. So if we have to allocate, we drop the lock, allocate | ||
566 | * and then retry. KEY_PTRS() indicates whether alloc points to | ||
567 | * allocated bucket(s). | ||
568 | */ | ||
569 | |||
570 | bkey_init(&alloc.key); | ||
571 | spin_lock(&c->data_bucket_lock); | ||
572 | |||
573 | while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) { | ||
574 | unsigned watermark = write_prio | ||
575 | ? WATERMARK_MOVINGGC | ||
576 | : WATERMARK_NONE; | ||
577 | |||
578 | spin_unlock(&c->data_bucket_lock); | ||
579 | |||
580 | if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait)) | ||
581 | return false; | ||
582 | |||
583 | spin_lock(&c->data_bucket_lock); | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * If we had to allocate, we might race and not need to allocate the | ||
588 | * second time we call find_data_bucket(). If we allocated a bucket but | ||
589 | * didn't use it, drop the refcount bch_bucket_alloc_set() took: | ||
590 | */ | ||
591 | if (KEY_PTRS(&alloc.key)) | ||
592 | __bkey_put(c, &alloc.key); | ||
593 | |||
594 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
595 | EBUG_ON(ptr_stale(c, &b->key, i)); | ||
596 | |||
597 | /* Set up the pointer to the space we're allocating: */ | ||
598 | |||
599 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
600 | k->ptr[i] = b->key.ptr[i]; | ||
601 | |||
602 | sectors = min(sectors, b->sectors_free); | ||
603 | |||
604 | SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); | ||
605 | SET_KEY_SIZE(k, sectors); | ||
606 | SET_KEY_PTRS(k, KEY_PTRS(&b->key)); | ||
607 | |||
608 | /* | ||
609 | * Move b to the end of the lru, and keep track of what this bucket was | ||
610 | * last used for: | ||
611 | */ | ||
612 | list_move_tail(&b->list, &c->data_buckets); | ||
613 | bkey_copy_key(&b->key, k); | ||
614 | b->last_write_point = write_point; | ||
615 | |||
616 | b->sectors_free -= sectors; | ||
617 | |||
618 | for (i = 0; i < KEY_PTRS(&b->key); i++) { | ||
619 | SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); | ||
620 | |||
621 | atomic_long_add(sectors, | ||
622 | &PTR_CACHE(c, &b->key, i)->sectors_written); | ||
623 | } | ||
624 | |||
625 | if (b->sectors_free < c->sb.block_size) | ||
626 | b->sectors_free = 0; | ||
627 | |||
628 | /* | ||
629 | * k takes refcounts on the buckets it points to until it's inserted | ||
630 | * into the btree, but if we're done with this bucket we just transfer | ||
631 | * get_data_bucket()'s refcount. | ||
632 | */ | ||
633 | if (b->sectors_free) | ||
634 | for (i = 0; i < KEY_PTRS(&b->key); i++) | ||
635 | atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); | ||
636 | |||
637 | spin_unlock(&c->data_bucket_lock); | ||
638 | return true; | ||
639 | } | ||
640 | |||
490 | /* Init */ | 641 | /* Init */ |
491 | 642 | ||
643 | void bch_open_buckets_free(struct cache_set *c) | ||
644 | { | ||
645 | struct open_bucket *b; | ||
646 | |||
647 | while (!list_empty(&c->data_buckets)) { | ||
648 | b = list_first_entry(&c->data_buckets, | ||
649 | struct open_bucket, list); | ||
650 | list_del(&b->list); | ||
651 | kfree(b); | ||
652 | } | ||
653 | } | ||
654 | |||
655 | int bch_open_buckets_alloc(struct cache_set *c) | ||
656 | { | ||
657 | int i; | ||
658 | |||
659 | spin_lock_init(&c->data_bucket_lock); | ||
660 | |||
661 | for (i = 0; i < 6; i++) { | ||
662 | struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); | ||
663 | if (!b) | ||
664 | return -ENOMEM; | ||
665 | |||
666 | list_add(&b->list, &c->data_buckets); | ||
667 | } | ||
668 | |||
669 | return 0; | ||
670 | } | ||
671 | |||
492 | int bch_cache_allocator_start(struct cache *ca) | 672 | int bch_cache_allocator_start(struct cache *ca) |
493 | { | 673 | { |
494 | struct task_struct *k = kthread_run(bch_allocator_thread, | 674 | struct task_struct *k = kthread_run(bch_allocator_thread, |