1 files changed, 88 insertions, 25 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e238a0cdac67..94627c4cc193 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2977,10 +2977,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
        free_space = btrfs_super_total_bytes(disk_super);
        /*
-         * we allow the metadata to grow to a max of either 5gb or 5% of the
+         * we allow the metadata to grow to a max of either 10gb or 5% of the
         * space in the volume.
         */
-        min_metadata = min((u64)5 * 1024 * 1024 * 1024,
+        min_metadata = min((u64)10 * 1024 * 1024 * 1024,
                             div64_u64(free_space * 5, 100));
        if (info->total_bytes >= min_metadata) {
                spin_unlock(&info->lock);
@@ -4102,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 }
 enum btrfs_loop_type {
-        LOOP_CACHED_ONLY = 0,
+        LOOP_FIND_IDEAL = 0,
        LOOP_CACHING_NOWAIT = 1,
        LOOP_CACHING_WAIT = 2,
        LOOP_ALLOC_CHUNK = 3,
@@ -4131,12 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *block_group = NULL;
        int empty_cluster = 2 * 1024 * 1024;
        int allowed_chunk_alloc = 0;
+        int done_chunk_alloc = 0;
        struct btrfs_space_info *space_info;
        int last_ptr_loop = 0;
        int loop = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
+        u64 ideal_cache_percent = 0;
+        u64 ideal_cache_offset = 0;
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4172,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                empty_cluster = 0;
        if (search_start == hint_byte) {
+ideal_cache:
                block_group = btrfs_lookup_block_group(root->fs_info,
                                                       search_start);
                /*
                 * we don't want to use the block group if it doesn't match our
                 * allocation bits, or if its not cached.
+                 *
+                 * However if we are re-searching with an ideal block group
+                 * picked out then we don't care that the block group is cached.
                 */
                if (block_group && block_group_bits(block_group, data) &&
-                    block_group_cache_done(block_group)) {
+                    (block_group->cached != BTRFS_CACHE_NO ||
+                     search_start == ideal_cache_offset)) {
                        down_read(&space_info->groups_sem);
                        if (list_empty(&block_group->list) ||
                            block_group->ro) {
@@ -4191,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                 */
                                btrfs_put_block_group(block_group);
                                up_read(&space_info->groups_sem);
-                        } else
+                        } else {
                                goto have_block_group;
+                        }
                } else if (block_group) {
                        btrfs_put_block_group(block_group);
                }
        }
 search:
        down_read(&space_info->groups_sem);
        list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4209,28 +4217,45 @@ search:
 have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+                        u64 free_percent;
+                        free_percent = btrfs_block_group_used(&block_group->item);
+                        free_percent *= 100;
+                        free_percent = div64_u64(free_percent,
+                                                 block_group->key.offset);
+                        free_percent = 100 - free_percent;
+                        if (free_percent > ideal_cache_percent &&
+                            likely(!block_group->ro)) {
+                                ideal_cache_offset = block_group->key.objectid;
+                                ideal_cache_percent = free_percent;
+                        }
                        /*
-                         * we want to start caching kthreads, but not too many
+                         * We only want to start kthread caching if we are at
-                         * right off the bat so we don't overwhelm the system,
+                         * the point where we will wait for caching to make
-                         * so only start them if there are less than 2 and we're
+                         * progress, or if our ideal search is over and we've
-                         * in the initial allocation phase.
+                         * found somebody to start caching.
                         */
                        if (loop > LOOP_CACHING_NOWAIT ||
-                            atomic_read(&space_info->caching_threads) < 2) {
+                            (loop > LOOP_FIND_IDEAL &&
+                             atomic_read(&space_info->caching_threads) < 2)) {
                                ret = cache_block_group(block_group);
                                BUG_ON(ret);
                        }
-                }
-                cached = block_group_cache_done(block_group);
-                if (unlikely(!cached)) {
                        found_uncached_bg = true;
-                        /* if we only want cached bgs, loop */
+                        /*
-                        if (loop == LOOP_CACHED_ONLY)
+                         * If loop is set for cached only, try the next block
+                         * group.
+                         */
+                        if (loop == LOOP_FIND_IDEAL)
                                goto loop;
                }
+                cached = block_group_cache_done(block_group);
+                if (unlikely(!cached))
+                        found_uncached_bg = true;
                if (unlikely(block_group->ro))
                        goto loop;
@@ -4410,9 +4435,11 @@ loop:
        }
        up_read(&space_info->groups_sem);
-        /* LOOP_CACHED_ONLY, only search fully cached block groups
+        /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
-         * LOOP_CACHING_NOWAIT, search partially cached block groups, but
+         *                      for them to make caching progress.  Also
-         *                      dont wait foR them to finish caching
+         *                      determine the best possible bg to cache
+         * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
+         *                      caching kthreads as we move along
         * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
         * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
         * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4421,12 +4448,47 @@ loop:
        if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
            (found_uncached_bg || empty_size || empty_cluster ||
             allowed_chunk_alloc)) {
-                if (found_uncached_bg) {
+                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
-                        if (loop < LOOP_CACHING_WAIT) {
+                        loop++;
-                                loop++;
+                        if (!ideal_cache_percent &&
+                            atomic_read(&space_info->caching_threads))
                                goto search;
-                        }
+                        /*
+                         * 1 of the following 2 things have happened so far
+                         *
+                         * 1) We found an ideal block group for caching that
+                         * is mostly full and will cache quickly, so we might
+                         * as well wait for it.
+                         *
+                         * 2) We searched for cached only and we didn't find
+                         * anything, and we didn't start any caching kthreads
+                         * either, so chances are we will loop through and
+                         * start a couple caching kthreads, and then come back
+                         * around and just wait for them.  This will be slower
+                         * because we will have 2 caching kthreads reading at
+                         * the same time when we could have just started one
+                         * and waited for it to get far enough to give us an
+                         * allocation, so go ahead and go to the wait caching
+                         * loop.
+                         */
+                        loop = LOOP_CACHING_WAIT;
+                        search_start = ideal_cache_offset;
+                        ideal_cache_percent = 0;
+                        goto ideal_cache;
+                } else if (loop == LOOP_FIND_IDEAL) {
+                        /*
+                         * Didn't find a uncached bg, wait on anything we find
+                         * next.
+                         */
+                        loop = LOOP_CACHING_WAIT;
+                        goto search;
+                }
+                if (loop < LOOP_CACHING_WAIT) {
+                        loop++;
+                        goto search;
                }
                if (loop == LOOP_ALLOC_CHUNK) {
@@ -4438,7 +4500,8 @@ loop:
                        ret = do_chunk_alloc(trans, root, num_bytes +
                                             2 * 1024 * 1024, data, 1);
                        allowed_chunk_alloc = 0;
-                } else {
+                        done_chunk_alloc = 1;
+                } else if (!done_chunk_alloc) {
                        space_info->force_alloc = 1;
                }

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e238a0cdac67..94627c4cc193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c
@@ -2977,10 +2977,10 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
2977		2977
2978	free_space = btrfs_super_total_bytes(disk_super);	2978	free_space = btrfs_super_total_bytes(disk_super);
2979	/*	2979	/*
2980	* we allow the metadata to grow to a max of either 5gb or 5% of the	2980	* we allow the metadata to grow to a max of either 10gb or 5% of the
2981	* space in the volume.	2981	* space in the volume.
2982	*/	2982	*/
2983	min_metadata = min((u64)5 * 1024 * 1024 * 1024,	2983	min_metadata = min((u64)10 * 1024 * 1024 * 1024,
2984	div64_u64(free_space * 5, 100));	2984	div64_u64(free_space * 5, 100));
2985	if (info->total_bytes >= min_metadata) {	2985	if (info->total_bytes >= min_metadata) {
2986	spin_unlock(&info->lock);	2986	spin_unlock(&info->lock);
@@ -4102,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4102	}	4102	}
4103		4103
4104	enum btrfs_loop_type {	4104	enum btrfs_loop_type {
4105	LOOP_CACHED_ONLY = 0,	4105	LOOP_FIND_IDEAL = 0,
4106	LOOP_CACHING_NOWAIT = 1,	4106	LOOP_CACHING_NOWAIT = 1,
4107	LOOP_CACHING_WAIT = 2,	4107	LOOP_CACHING_WAIT = 2,
4108	LOOP_ALLOC_CHUNK = 3,	4108	LOOP_ALLOC_CHUNK = 3,
@@ -4131,12 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4131	struct btrfs_block_group_cache *block_group = NULL;	4131	struct btrfs_block_group_cache *block_group = NULL;
4132	int empty_cluster = 2 * 1024 * 1024;	4132	int empty_cluster = 2 * 1024 * 1024;
4133	int allowed_chunk_alloc = 0;	4133	int allowed_chunk_alloc = 0;
		4134	int done_chunk_alloc = 0;
4134	struct btrfs_space_info *space_info;	4135	struct btrfs_space_info *space_info;
4135	int last_ptr_loop = 0;	4136	int last_ptr_loop = 0;
4136	int loop = 0;	4137	int loop = 0;
4137	bool found_uncached_bg = false;	4138	bool found_uncached_bg = false;
4138	bool failed_cluster_refill = false;	4139	bool failed_cluster_refill = false;
4139	bool failed_alloc = false;	4140	bool failed_alloc = false;
		4141	u64 ideal_cache_percent = 0;
		4142	u64 ideal_cache_offset = 0;
4140		4143
4141	WARN_ON(num_bytes < root->sectorsize);	4144	WARN_ON(num_bytes < root->sectorsize);
4142	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);	4145	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4172,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4172	empty_cluster = 0;	4175	empty_cluster = 0;
4173		4176
4174	if (search_start == hint_byte) {	4177	if (search_start == hint_byte) {
		4178	ideal_cache:
4175	block_group = btrfs_lookup_block_group(root->fs_info,	4179	block_group = btrfs_lookup_block_group(root->fs_info,
4176	search_start);	4180	search_start);
4177	/*	4181	/*
4178	* we don't want to use the block group if it doesn't match our	4182	* we don't want to use the block group if it doesn't match our
4179	* allocation bits, or if its not cached.	4183	* allocation bits, or if its not cached.
		4184	*
		4185	* However if we are re-searching with an ideal block group
		4186	* picked out then we don't care that the block group is cached.
4180	*/	4187	*/
4181	if (block_group && block_group_bits(block_group, data) &&	4188	if (block_group && block_group_bits(block_group, data) &&
4182	block_group_cache_done(block_group)) {	4189	(block_group->cached != BTRFS_CACHE_NO \|\|
		4190	search_start == ideal_cache_offset)) {
4183	down_read(&space_info->groups_sem);	4191	down_read(&space_info->groups_sem);
4184	if (list_empty(&block_group->list) \|\|	4192	if (list_empty(&block_group->list) \|\|
4185	block_group->ro) {	4193	block_group->ro) {
@@ -4191,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4191	*/	4199	*/
4192	btrfs_put_block_group(block_group);	4200	btrfs_put_block_group(block_group);
4193	up_read(&space_info->groups_sem);	4201	up_read(&space_info->groups_sem);
4194	} else	4202	} else {
4195	goto have_block_group;	4203	goto have_block_group;
		4204	}
4196	} else if (block_group) {	4205	} else if (block_group) {
4197	btrfs_put_block_group(block_group);	4206	btrfs_put_block_group(block_group);
4198	}	4207	}
4199	}	4208	}
4200
4201	search:	4209	search:
4202	down_read(&space_info->groups_sem);	4210	down_read(&space_info->groups_sem);
4203	list_for_each_entry(block_group, &space_info->block_groups, list) {	4211	list_for_each_entry(block_group, &space_info->block_groups, list) {
@@ -4209,28 +4217,45 @@ search:
4209		4217
4210	have_block_group:	4218	have_block_group:
4211	if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {	4219	if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
		4220	u64 free_percent;
		4221
		4222	free_percent = btrfs_block_group_used(&block_group->item);
		4223	free_percent *= 100;
		4224	free_percent = div64_u64(free_percent,
		4225	block_group->key.offset);
		4226	free_percent = 100 - free_percent;
		4227	if (free_percent > ideal_cache_percent &&
		4228	likely(!block_group->ro)) {
		4229	ideal_cache_offset = block_group->key.objectid;
		4230	ideal_cache_percent = free_percent;
		4231	}
		4232
4212	/*	4233	/*
4213	* we want to start caching kthreads, but not too many	4234	* We only want to start kthread caching if we are at
4214	* right off the bat so we don't overwhelm the system,	4235	* the point where we will wait for caching to make
4215	* so only start them if there are less than 2 and we're	4236	* progress, or if our ideal search is over and we've
4216	* in the initial allocation phase.	4237	* found somebody to start caching.
4217	*/	4238	*/
4218	if (loop > LOOP_CACHING_NOWAIT \|\|	4239	if (loop > LOOP_CACHING_NOWAIT \|\|
4219	atomic_read(&space_info->caching_threads) < 2) {	4240	(loop > LOOP_FIND_IDEAL &&
		4241	atomic_read(&space_info->caching_threads) < 2)) {
4220	ret = cache_block_group(block_group);	4242	ret = cache_block_group(block_group);
4221	BUG_ON(ret);	4243	BUG_ON(ret);
4222	}	4244	}
4223	}
4224
4225	cached = block_group_cache_done(block_group);
4226	if (unlikely(!cached)) {
4227	found_uncached_bg = true;	4245	found_uncached_bg = true;
4228		4246
4229	/* if we only want cached bgs, loop */	4247	/*
4230	if (loop == LOOP_CACHED_ONLY)	4248	* If loop is set for cached only, try the next block
		4249	* group.
		4250	*/
		4251	if (loop == LOOP_FIND_IDEAL)
4231	goto loop;	4252	goto loop;
4232	}	4253	}
4233		4254
		4255	cached = block_group_cache_done(block_group);
		4256	if (unlikely(!cached))
		4257	found_uncached_bg = true;
		4258
4234	if (unlikely(block_group->ro))	4259	if (unlikely(block_group->ro))
4235	goto loop;	4260	goto loop;
4236		4261
@@ -4410,9 +4435,11 @@ loop:
4410	}	4435	}
4411	up_read(&space_info->groups_sem);	4436	up_read(&space_info->groups_sem);
4412		4437
4413	/* LOOP_CACHED_ONLY, only search fully cached block groups	4438	/* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4414	* LOOP_CACHING_NOWAIT, search partially cached block groups, but	4439	* for them to make caching progress. Also
4415	* dont wait foR them to finish caching	4440	* determine the best possible bg to cache
		4441	* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
		4442	* caching kthreads as we move along
4416	* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching	4443	* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
4417	* LOOP_ALLOC_CHUNK, force a chunk allocation and try again	4444	* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
4418	* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try	4445	* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
@@ -4421,12 +4448,47 @@ loop:
4421	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&	4448	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4422	(found_uncached_bg \|\| empty_size \|\| empty_cluster \|\|	4449	(found_uncached_bg \|\| empty_size \|\| empty_cluster \|\|
4423	allowed_chunk_alloc)) {	4450	allowed_chunk_alloc)) {
4424	if (found_uncached_bg) {	4451	if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4425	found_uncached_bg = false;	4452	found_uncached_bg = false;
4426	if (loop < LOOP_CACHING_WAIT) {	4453	loop++;
4427	loop++;	4454	if (!ideal_cache_percent &&
		4455	atomic_read(&space_info->caching_threads))
4428	goto search;	4456	goto search;
4429	}	4457
		4458	/*
		4459	* 1 of the following 2 things have happened so far
		4460	*
		4461	* 1) We found an ideal block group for caching that
		4462	* is mostly full and will cache quickly, so we might
		4463	* as well wait for it.
		4464	*
		4465	* 2) We searched for cached only and we didn't find
		4466	* anything, and we didn't start any caching kthreads
		4467	* either, so chances are we will loop through and
		4468	* start a couple caching kthreads, and then come back
		4469	* around and just wait for them. This will be slower
		4470	* because we will have 2 caching kthreads reading at
		4471	* the same time when we could have just started one
		4472	* and waited for it to get far enough to give us an
		4473	* allocation, so go ahead and go to the wait caching
		4474	* loop.
		4475	*/
		4476	loop = LOOP_CACHING_WAIT;
		4477	search_start = ideal_cache_offset;
		4478	ideal_cache_percent = 0;
		4479	goto ideal_cache;
		4480	} else if (loop == LOOP_FIND_IDEAL) {
		4481	/*
		4482	* Didn't find a uncached bg, wait on anything we find
		4483	* next.
		4484	*/
		4485	loop = LOOP_CACHING_WAIT;
		4486	goto search;
		4487	}
		4488
		4489	if (loop < LOOP_CACHING_WAIT) {
		4490	loop++;
		4491	goto search;
4430	}	4492	}
4431		4493
4432	if (loop == LOOP_ALLOC_CHUNK) {	4494	if (loop == LOOP_ALLOC_CHUNK) {
@@ -4438,7 +4500,8 @@ loop:
4438	ret = do_chunk_alloc(trans, root, num_bytes +	4500	ret = do_chunk_alloc(trans, root, num_bytes +
4439	2 * 1024 * 1024, data, 1);	4501	2 * 1024 * 1024, data, 1);
4440	allowed_chunk_alloc = 0;	4502	allowed_chunk_alloc = 0;
4441	} else {	4503	done_chunk_alloc = 1;
		4504	} else if (!done_chunk_alloc) {
4442	space_info->force_alloc = 1;	4505	space_info->force_alloc = 1;
4443	}	4506	}
4444		4507