bcache: Move sector allocator to alloc.c

Just reorganizing things a bit. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
author: Kent Overstreet <kmo@daterainc.com> 2013-07-24 21:11:11 -0400
committer: Kent Overstreet <kmo@daterainc.com> 2013-11-11 00:56:32 -0500
commit: 2599b53b7b0ea6103d1661dca74d35480cb8fa1f (patch)
tree: 0b277532e73924b68c3da43a999aa5df33824787 /drivers/md/bcache/alloc.c
parent: 220bb38c21b83e2f7b842f33220bf727093eca89 (diff)
1 files changed, 180 insertions, 0 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index b9bd5866055d..4970ddc6a7f6 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -487,8 +487,188 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
        return ret;
 }
+/* Sector allocator */
+struct open_bucket {
+        struct list_head        list;
+        unsigned                last_write_point;
+        unsigned                sectors_free;
+        BKEY_PADDED(key);
+};
+/*
+ * We keep multiple buckets open for writes, and try to segregate different
+ * write streams for better cache utilization: first we look for a bucket where
+ * the last write to it was sequential with the current write, and failing that
+ * we look for a bucket that was last used by the same task.
+ *
+ * The ideas is if you've got multiple tasks pulling data into the cache at the
+ * same time, you'll get better cache utilization if you try to segregate their
+ * data and preserve locality.
+ *
+ * For example, say you've starting Firefox at the same time you're copying a
+ * bunch of files. Firefox will likely end up being fairly hot and stay in the
+ * cache awhile, but the data you copied might not be; if you wrote all that
+ * data to the same buckets it'd get invalidated at the same time.
+ *
+ * Both of those tasks will be doing fairly random IO so we can't rely on
+ * detecting sequential IO to segregate their data, but going off of the task
+ * should be a sane heuristic.
+ */
+static struct open_bucket *pick_data_bucket(struct cache_set *c,
+                                            const struct bkey *search,
+                                            unsigned write_point,
+                                            struct bkey *alloc)
+{
+        struct open_bucket *ret, *ret_task = NULL;
+        list_for_each_entry_reverse(ret, &c->data_buckets, list)
+                if (!bkey_cmp(&ret->key, search))
+                        goto found;
+                else if (ret->last_write_point == write_point)
+                        ret_task = ret;
+        ret = ret_task ?: list_first_entry(&c->data_buckets,
+                                           struct open_bucket, list);
+found:
+        if (!ret->sectors_free && KEY_PTRS(alloc)) {
+                ret->sectors_free = c->sb.bucket_size;
+                bkey_copy(&ret->key, alloc);
+                bkey_init(alloc);
+        }
+        if (!ret->sectors_free)
+                ret = NULL;
+        return ret;
+}
+/*
+ * Allocates some space in the cache to write to, and k to point to the newly
+ * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the
+ * end of the newly allocated space).
+ *
+ * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many
+ * sectors were actually allocated.
+ *
+ * If s->writeback is true, will not fail.
+ */
+bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
+                       unsigned write_point, unsigned write_prio, bool wait)
+{
+        struct open_bucket *b;
+        BKEY_PADDED(key) alloc;
+        unsigned i;
+        /*
+         * We might have to allocate a new bucket, which we can't do with a
+         * spinlock held. So if we have to allocate, we drop the lock, allocate
+         * and then retry. KEY_PTRS() indicates whether alloc points to
+         * allocated bucket(s).
+         */
+        bkey_init(&alloc.key);
+        spin_lock(&c->data_bucket_lock);
+        while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
+                unsigned watermark = write_prio
+                        ? WATERMARK_MOVINGGC
+                        : WATERMARK_NONE;
+                spin_unlock(&c->data_bucket_lock);
+                if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
+                        return false;
+                spin_lock(&c->data_bucket_lock);
+        }
+        /*
+         * If we had to allocate, we might race and not need to allocate the
+         * second time we call find_data_bucket(). If we allocated a bucket but
+         * didn't use it, drop the refcount bch_bucket_alloc_set() took:
+         */
+        if (KEY_PTRS(&alloc.key))
+                __bkey_put(c, &alloc.key);
+        for (i = 0; i < KEY_PTRS(&b->key); i++)
+                EBUG_ON(ptr_stale(c, &b->key, i));
+        /* Set up the pointer to the space we're allocating: */
+        for (i = 0; i < KEY_PTRS(&b->key); i++)
+                k->ptr[i] = b->key.ptr[i];
+        sectors = min(sectors, b->sectors_free);
+        SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
+        SET_KEY_SIZE(k, sectors);
+        SET_KEY_PTRS(k, KEY_PTRS(&b->key));
+        /*
+         * Move b to the end of the lru, and keep track of what this bucket was
+         * last used for:
+         */
+        list_move_tail(&b->list, &c->data_buckets);
+        bkey_copy_key(&b->key, k);
+        b->last_write_point = write_point;
+        b->sectors_free -= sectors;
+        for (i = 0; i < KEY_PTRS(&b->key); i++) {
+                SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
+                atomic_long_add(sectors,
+                                &PTR_CACHE(c, &b->key, i)->sectors_written);
+        }
+        if (b->sectors_free < c->sb.block_size)
+                b->sectors_free = 0;
+        /*
+         * k takes refcounts on the buckets it points to until it's inserted
+         * into the btree, but if we're done with this bucket we just transfer
+         * get_data_bucket()'s refcount.
+         */
+        if (b->sectors_free)
+                for (i = 0; i < KEY_PTRS(&b->key); i++)
+                        atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
+        spin_unlock(&c->data_bucket_lock);
+        return true;
+}
 /* Init */
+void bch_open_buckets_free(struct cache_set *c)
+{
+        struct open_bucket *b;
+        while (!list_empty(&c->data_buckets)) {
+                b = list_first_entry(&c->data_buckets,
+                                     struct open_bucket, list);
+                list_del(&b->list);
+                kfree(b);
+        }
+}
+int bch_open_buckets_alloc(struct cache_set *c)
+{
+        int i;
+        spin_lock_init(&c->data_bucket_lock);
+        for (i = 0; i < 6; i++) {
+                struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
+                if (!b)
+                        return -ENOMEM;
+                list_add(&b->list, &c->data_buckets);
+        }
+        return 0;
+}
 int bch_cache_allocator_start(struct cache *ca)
 {
        struct task_struct *k = kthread_run(bch_allocator_thread,
author	Kent Overstreet <kmo@daterainc.com>	2013-07-24 21:11:11 -0400
committer	Kent Overstreet <kmo@daterainc.com>	2013-11-11 00:56:32 -0500
commit	2599b53b7b0ea6103d1661dca74d35480cb8fa1f (patch)
tree	0b277532e73924b68c3da43a999aa5df33824787 /drivers/md/bcache/alloc.c
parent	220bb38c21b83e2f7b842f33220bf727093eca89 (diff)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b9bd5866055d..4970ddc6a7f6 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c
@@ -487,8 +487,188 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
487	return ret;	487	return ret;
488	}	488	}
489		489
		490	/* Sector allocator */
		491
		492	struct open_bucket {
		493	struct list_head list;
		494	unsigned last_write_point;
		495	unsigned sectors_free;
		496	BKEY_PADDED(key);
		497	};
		498
		499	/*
		500	* We keep multiple buckets open for writes, and try to segregate different
		501	* write streams for better cache utilization: first we look for a bucket where
		502	* the last write to it was sequential with the current write, and failing that
		503	* we look for a bucket that was last used by the same task.
		504	*
		505	* The ideas is if you've got multiple tasks pulling data into the cache at the
		506	* same time, you'll get better cache utilization if you try to segregate their
		507	* data and preserve locality.
		508	*
		509	* For example, say you've starting Firefox at the same time you're copying a
		510	* bunch of files. Firefox will likely end up being fairly hot and stay in the
		511	* cache awhile, but the data you copied might not be; if you wrote all that
		512	* data to the same buckets it'd get invalidated at the same time.
		513	*
		514	* Both of those tasks will be doing fairly random IO so we can't rely on
		515	* detecting sequential IO to segregate their data, but going off of the task
		516	* should be a sane heuristic.
		517	*/
		518	static struct open_bucket pick_data_bucket(struct cache_set c,
		519	const struct bkey *search,
		520	unsigned write_point,
		521	struct bkey *alloc)
		522	{
		523	struct open_bucket ret, ret_task = NULL;
		524
		525	list_for_each_entry_reverse(ret, &c->data_buckets, list)
		526	if (!bkey_cmp(&ret->key, search))
		527	goto found;
		528	else if (ret->last_write_point == write_point)
		529	ret_task = ret;
		530
		531	ret = ret_task ?: list_first_entry(&c->data_buckets,
		532	struct open_bucket, list);
		533	found:
		534	if (!ret->sectors_free && KEY_PTRS(alloc)) {
		535	ret->sectors_free = c->sb.bucket_size;
		536	bkey_copy(&ret->key, alloc);
		537	bkey_init(alloc);
		538	}
		539
		540	if (!ret->sectors_free)
		541	ret = NULL;
		542
		543	return ret;
		544	}
		545
		546	/*
		547	* Allocates some space in the cache to write to, and k to point to the newly
		548	* allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the
		549	* end of the newly allocated space).
		550	*
		551	* May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many
		552	* sectors were actually allocated.
		553	*
		554	* If s->writeback is true, will not fail.
		555	*/
		556	bool bch_alloc_sectors(struct cache_set c, struct bkey k, unsigned sectors,
		557	unsigned write_point, unsigned write_prio, bool wait)
		558	{
		559	struct open_bucket *b;
		560	BKEY_PADDED(key) alloc;
		561	unsigned i;
		562
		563	/*
		564	* We might have to allocate a new bucket, which we can't do with a
		565	* spinlock held. So if we have to allocate, we drop the lock, allocate
		566	* and then retry. KEY_PTRS() indicates whether alloc points to
		567	* allocated bucket(s).
		568	*/
		569
		570	bkey_init(&alloc.key);
		571	spin_lock(&c->data_bucket_lock);
		572
		573	while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
		574	unsigned watermark = write_prio
		575	? WATERMARK_MOVINGGC
		576	: WATERMARK_NONE;
		577
		578	spin_unlock(&c->data_bucket_lock);
		579
		580	if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
		581	return false;
		582
		583	spin_lock(&c->data_bucket_lock);
		584	}
		585
		586	/*
		587	* If we had to allocate, we might race and not need to allocate the
		588	* second time we call find_data_bucket(). If we allocated a bucket but
		589	* didn't use it, drop the refcount bch_bucket_alloc_set() took:
		590	*/
		591	if (KEY_PTRS(&alloc.key))
		592	__bkey_put(c, &alloc.key);
		593
		594	for (i = 0; i < KEY_PTRS(&b->key); i++)
		595	EBUG_ON(ptr_stale(c, &b->key, i));
		596
		597	/* Set up the pointer to the space we're allocating: */
		598
		599	for (i = 0; i < KEY_PTRS(&b->key); i++)
		600	k->ptr[i] = b->key.ptr[i];
		601
		602	sectors = min(sectors, b->sectors_free);
		603
		604	SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
		605	SET_KEY_SIZE(k, sectors);
		606	SET_KEY_PTRS(k, KEY_PTRS(&b->key));
		607
		608	/*
		609	* Move b to the end of the lru, and keep track of what this bucket was
		610	* last used for:
		611	*/
		612	list_move_tail(&b->list, &c->data_buckets);
		613	bkey_copy_key(&b->key, k);
		614	b->last_write_point = write_point;
		615
		616	b->sectors_free -= sectors;
		617
		618	for (i = 0; i < KEY_PTRS(&b->key); i++) {
		619	SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
		620
		621	atomic_long_add(sectors,
		622	&PTR_CACHE(c, &b->key, i)->sectors_written);
		623	}
		624
		625	if (b->sectors_free < c->sb.block_size)
		626	b->sectors_free = 0;
		627
		628	/*
		629	* k takes refcounts on the buckets it points to until it's inserted
		630	* into the btree, but if we're done with this bucket we just transfer
		631	* get_data_bucket()'s refcount.
		632	*/
		633	if (b->sectors_free)
		634	for (i = 0; i < KEY_PTRS(&b->key); i++)
		635	atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
		636
		637	spin_unlock(&c->data_bucket_lock);
		638	return true;
		639	}
		640
490	/* Init */	641	/* Init */
491		642
		643	void bch_open_buckets_free(struct cache_set *c)
		644	{
		645	struct open_bucket *b;
		646
		647	while (!list_empty(&c->data_buckets)) {
		648	b = list_first_entry(&c->data_buckets,
		649	struct open_bucket, list);
		650	list_del(&b->list);
		651	kfree(b);
		652	}
		653	}
		654
		655	int bch_open_buckets_alloc(struct cache_set *c)
		656	{
		657	int i;
		658
		659	spin_lock_init(&c->data_bucket_lock);
		660
		661	for (i = 0; i < 6; i++) {
		662	struct open_bucket b = kzalloc(sizeof(b), GFP_KERNEL);
		663	if (!b)
		664	return -ENOMEM;
		665
		666	list_add(&b->list, &c->data_buckets);
		667	}
		668
		669	return 0;
		670	}
		671
492	int bch_cache_allocator_start(struct cache *ca)	672	int bch_cache_allocator_start(struct cache *ca)
493	{	673	{
494	struct task_struct *k = kthread_run(bch_allocator_thread,	674	struct task_struct *k = kthread_run(bch_allocator_thread,