aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c533
1 files changed, 405 insertions, 128 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a5aca3997d42..535f85ba104f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -21,6 +21,7 @@
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
24#include "compat.h" 25#include "compat.h"
25#include "hash.h" 26#include "hash.h"
26#include "ctree.h" 27#include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 struct btrfs_root *extent_root, u64 alloc_bytes,
62 u64 flags, int force); 63 u64 flags, int force);
63 64
65static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache)
67{
68 smp_mb();
69 return cache->cached == BTRFS_CACHE_FINISHED;
70}
71
64static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 72static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65{ 73{
66 return (cache->flags & bits) == bits; 74 return (cache->flags & bits) == bits;
@@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
146} 154}
147 155
148/* 156/*
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{
164 u64 start, end, last = 0;
165 int ret;
166
167 while (1) {
168 ret = find_first_extent_bit(&info->pinned_extents, last,
169 &start, &end,
170 EXTENT_LOCKED|EXTENT_DIRTY);
171 if (ret)
172 break;
173
174 clear_extent_bits(&info->pinned_extents, start, end,
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
176 last = end+1;
177 }
178}
179
180static int remove_sb_from_cache(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache)
182{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr;
185 u64 *logical;
186 int stripe_len;
187 int i, nr, ret;
188
189 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
190 bytenr = btrfs_sb_offset(i);
191 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
192 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret);
195 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents,
197 logical[nr],
198 logical[nr] + stripe_len - 1, GFP_NOFS);
199 }
200 kfree(logical);
201 }
202
203 return 0;
204}
205
206/*
149 * this is only called by cache_block_group, since we could have freed extents 207 * this is only called by cache_block_group, since we could have freed extents
150 * we need to check the pinned_extents for any extents that can't be used yet 208 * we need to check the pinned_extents for any extents that can't be used yet
151 * since their free space will be released as soon as the transaction commits. 209 * since their free space will be released as soon as the transaction commits.
152 */ 210 */
153static int add_new_free_space(struct btrfs_block_group_cache *block_group, 211static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
154 struct btrfs_fs_info *info, u64 start, u64 end) 212 struct btrfs_fs_info *info, u64 start, u64 end)
155{ 213{
156 u64 extent_start, extent_end, size; 214 u64 extent_start, extent_end, size, total_added = 0;
157 int ret; 215 int ret;
158 216
159 while (start < end) { 217 while (start < end) {
160 ret = find_first_extent_bit(&info->pinned_extents, start, 218 ret = find_first_extent_bit(&info->pinned_extents, start,
161 &extent_start, &extent_end, 219 &extent_start, &extent_end,
162 EXTENT_DIRTY); 220 EXTENT_DIRTY|EXTENT_LOCKED);
163 if (ret) 221 if (ret)
164 break; 222 break;
165 223
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
167 start = extent_end + 1; 225 start = extent_end + 1;
168 } else if (extent_start > start && extent_start < end) { 226 } else if (extent_start > start && extent_start < end) {
169 size = extent_start - start; 227 size = extent_start - start;
228 total_added += size;
170 ret = btrfs_add_free_space(block_group, start, 229 ret = btrfs_add_free_space(block_group, start,
171 size); 230 size);
172 BUG_ON(ret); 231 BUG_ON(ret);
@@ -178,84 +237,93 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
178 237
179 if (start < end) { 238 if (start < end) {
180 size = end - start; 239 size = end - start;
240 total_added += size;
181 ret = btrfs_add_free_space(block_group, start, size); 241 ret = btrfs_add_free_space(block_group, start, size);
182 BUG_ON(ret); 242 BUG_ON(ret);
183 } 243 }
184 244
185 return 0; 245 return total_added;
186}
187
188static int remove_sb_from_cache(struct btrfs_root *root,
189 struct btrfs_block_group_cache *cache)
190{
191 u64 bytenr;
192 u64 *logical;
193 int stripe_len;
194 int i, nr, ret;
195
196 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
197 bytenr = btrfs_sb_offset(i);
198 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
199 cache->key.objectid, bytenr, 0,
200 &logical, &nr, &stripe_len);
201 BUG_ON(ret);
202 while (nr--) {
203 btrfs_remove_free_space(cache, logical[nr],
204 stripe_len);
205 }
206 kfree(logical);
207 }
208 return 0;
209} 246}
210 247
211static int cache_block_group(struct btrfs_root *root, 248static int caching_kthread(void *data)
212 struct btrfs_block_group_cache *block_group)
213{ 249{
250 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0;
214 struct btrfs_path *path; 253 struct btrfs_path *path;
215 int ret = 0; 254 int ret = 0;
216 struct btrfs_key key; 255 struct btrfs_key key;
217 struct extent_buffer *leaf; 256 struct extent_buffer *leaf;
218 int slot; 257 int slot;
219 u64 last; 258 u64 total_found = 0;
220
221 if (!block_group)
222 return 0;
223
224 root = root->fs_info->extent_root;
225 259
226 if (block_group->cached) 260 BUG_ON(!fs_info);
227 return 0;
228 261
229 path = btrfs_alloc_path(); 262 path = btrfs_alloc_path();
230 if (!path) 263 if (!path)
231 return -ENOMEM; 264 return -ENOMEM;
232 265
233 path->reada = 2; 266 atomic_inc(&block_group->space_info->caching_threads);
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
234 /* 268 /*
235 * we get into deadlocks with paths held by callers of this function. 269 * We don't want to deadlock with somebody trying to allocate a new
236 * since the alloc_mutex is protecting things right now, just 270 * extent for the extent root while also trying to search the extent
237 * skip the locking here 271 * root to add free space. So we skip locking and search the commit
272 * root, since its read-only
238 */ 273 */
239 path->skip_locking = 1; 274 path->skip_locking = 1;
240 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 275 path->search_commit_root = 1;
276 path->reada = 2;
277
241 key.objectid = last; 278 key.objectid = last;
242 key.offset = 0; 279 key.offset = 0;
243 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 281again:
282 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem);
284
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
245 if (ret < 0) 286 if (ret < 0)
246 goto err; 287 goto err;
247 288
248 while (1) { 289 while (1) {
290 smp_mb();
291 if (block_group->fs_info->closing > 1) {
292 last = (u64)-1;
293 break;
294 }
295
249 leaf = path->nodes[0]; 296 leaf = path->nodes[0];
250 slot = path->slots[0]; 297 slot = path->slots[0];
251 if (slot >= btrfs_header_nritems(leaf)) { 298 if (slot >= btrfs_header_nritems(leaf)) {
252 ret = btrfs_next_leaf(root, path); 299 ret = btrfs_next_leaf(fs_info->extent_root, path);
253 if (ret < 0) 300 if (ret < 0)
254 goto err; 301 goto err;
255 if (ret == 0) 302 else if (ret)
256 continue;
257 else
258 break; 303 break;
304
305 if (need_resched() ||
306 btrfs_transaction_in_commit(fs_info)) {
307 leaf = path->nodes[0];
308
309 /* this shouldn't happen, but if the
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1);
323 goto again;
324 }
325
326 continue;
259 } 327 }
260 btrfs_item_key_to_cpu(leaf, &key, slot); 328 btrfs_item_key_to_cpu(leaf, &key, slot);
261 if (key.objectid < block_group->key.objectid) 329 if (key.objectid < block_group->key.objectid)
@@ -266,24 +334,59 @@ static int cache_block_group(struct btrfs_root *root,
266 break; 334 break;
267 335
268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
269 add_new_free_space(block_group, root->fs_info, last, 337 total_found += add_new_free_space(block_group,
270 key.objectid); 338 fs_info, last,
271 339 key.objectid);
272 last = key.objectid + key.offset; 340 last = key.objectid + key.offset;
273 } 341 }
342
343 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0;
345 wake_up(&block_group->caching_q);
346 }
274next: 347next:
275 path->slots[0]++; 348 path->slots[0]++;
276 } 349 }
350 ret = 0;
277 351
278 add_new_free_space(block_group, root->fs_info, last, 352 total_found += add_new_free_space(block_group, fs_info, last,
279 block_group->key.objectid + 353 block_group->key.objectid +
280 block_group->key.offset); 354 block_group->key.offset);
355
356 spin_lock(&block_group->lock);
357 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock);
281 359
282 block_group->cached = 1;
283 remove_sb_from_cache(root, block_group);
284 ret = 0;
285err: 360err:
286 btrfs_free_path(path); 361 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365
366 return 0;
367}
368
369static int cache_block_group(struct btrfs_block_group_cache *cache)
370{
371 struct task_struct *tsk;
372 int ret = 0;
373
374 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock);
377 return ret;
378 }
379 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock);
381
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid);
384 if (IS_ERR(tsk)) {
385 ret = PTR_ERR(tsk);
386 printk(KERN_ERR "error running thread %d\n", ret);
387 BUG();
388 }
389
287 return ret; 390 return ret;
288} 391}
289 392
@@ -1408,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1408static void btrfs_issue_discard(struct block_device *bdev, 1511static void btrfs_issue_discard(struct block_device *bdev,
1409 u64 start, u64 len) 1512 u64 start, u64 len)
1410{ 1513{
1411 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1515 DISCARD_FL_BARRIER);
1412} 1516}
1413#endif 1517#endif
1414 1518
@@ -2387,13 +2491,29 @@ fail:
2387 2491
2388} 2492}
2389 2493
2494static struct btrfs_block_group_cache *
2495next_block_group(struct btrfs_root *root,
2496 struct btrfs_block_group_cache *cache)
2497{
2498 struct rb_node *node;
2499 spin_lock(&root->fs_info->block_group_cache_lock);
2500 node = rb_next(&cache->cache_node);
2501 btrfs_put_block_group(cache);
2502 if (node) {
2503 cache = rb_entry(node, struct btrfs_block_group_cache,
2504 cache_node);
2505 atomic_inc(&cache->count);
2506 } else
2507 cache = NULL;
2508 spin_unlock(&root->fs_info->block_group_cache_lock);
2509 return cache;
2510}
2511
2390int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2512int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2391 struct btrfs_root *root) 2513 struct btrfs_root *root)
2392{ 2514{
2393 struct btrfs_block_group_cache *cache, *entry; 2515 struct btrfs_block_group_cache *cache;
2394 struct rb_node *n;
2395 int err = 0; 2516 int err = 0;
2396 int werr = 0;
2397 struct btrfs_path *path; 2517 struct btrfs_path *path;
2398 u64 last = 0; 2518 u64 last = 0;
2399 2519
@@ -2402,39 +2522,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2402 return -ENOMEM; 2522 return -ENOMEM;
2403 2523
2404 while (1) { 2524 while (1) {
2405 cache = NULL; 2525 if (last == 0) {
2406 spin_lock(&root->fs_info->block_group_cache_lock); 2526 err = btrfs_run_delayed_refs(trans, root,
2407 for (n = rb_first(&root->fs_info->block_group_cache_tree); 2527 (unsigned long)-1);
2408 n; n = rb_next(n)) { 2528 BUG_ON(err);
2409 entry = rb_entry(n, struct btrfs_block_group_cache,
2410 cache_node);
2411 if (entry->dirty) {
2412 cache = entry;
2413 break;
2414 }
2415 } 2529 }
2416 spin_unlock(&root->fs_info->block_group_cache_lock);
2417 2530
2418 if (!cache) 2531 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2419 break; 2532 while (cache) {
2533 if (cache->dirty)
2534 break;
2535 cache = next_block_group(root, cache);
2536 }
2537 if (!cache) {
2538 if (last == 0)
2539 break;
2540 last = 0;
2541 continue;
2542 }
2420 2543
2421 cache->dirty = 0; 2544 cache->dirty = 0;
2422 last += cache->key.offset; 2545 last = cache->key.objectid + cache->key.offset;
2423 2546
2424 err = write_one_cache_group(trans, root, 2547 err = write_one_cache_group(trans, root, path, cache);
2425 path, cache); 2548 BUG_ON(err);
2426 /* 2549 btrfs_put_block_group(cache);
2427 * if we fail to write the cache group, we want
2428 * to keep it marked dirty in hopes that a later
2429 * write will work
2430 */
2431 if (err) {
2432 werr = err;
2433 continue;
2434 }
2435 } 2550 }
2551
2436 btrfs_free_path(path); 2552 btrfs_free_path(path);
2437 return werr; 2553 return 0;
2438} 2554}
2439 2555
2440int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2556int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2484,6 +2600,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2484 found->force_alloc = 0; 2600 found->force_alloc = 0;
2485 *space_info = found; 2601 *space_info = found;
2486 list_add_rcu(&found->list, &info->space_info); 2602 list_add_rcu(&found->list, &info->space_info);
2603 atomic_set(&found->caching_threads, 0);
2487 return 0; 2604 return 0;
2488} 2605}
2489 2606
@@ -2947,13 +3064,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2947 struct btrfs_block_group_cache *cache; 3064 struct btrfs_block_group_cache *cache;
2948 struct btrfs_fs_info *fs_info = root->fs_info; 3065 struct btrfs_fs_info *fs_info = root->fs_info;
2949 3066
2950 if (pin) { 3067 if (pin)
2951 set_extent_dirty(&fs_info->pinned_extents, 3068 set_extent_dirty(&fs_info->pinned_extents,
2952 bytenr, bytenr + num - 1, GFP_NOFS); 3069 bytenr, bytenr + num - 1, GFP_NOFS);
2953 } else {
2954 clear_extent_dirty(&fs_info->pinned_extents,
2955 bytenr, bytenr + num - 1, GFP_NOFS);
2956 }
2957 3070
2958 while (num > 0) { 3071 while (num > 0) {
2959 cache = btrfs_lookup_block_group(fs_info, bytenr); 3072 cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2969,14 +3082,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2969 spin_unlock(&cache->space_info->lock); 3082 spin_unlock(&cache->space_info->lock);
2970 fs_info->total_pinned += len; 3083 fs_info->total_pinned += len;
2971 } else { 3084 } else {
3085 int unpin = 0;
3086
3087 /*
3088 * in order to not race with the block group caching, we
3089 * only want to unpin the extent if we are cached. If
3090 * we aren't cached, we want to start async caching this
3091 * block group so we can free the extent the next time
3092 * around.
3093 */
2972 spin_lock(&cache->space_info->lock); 3094 spin_lock(&cache->space_info->lock);
2973 spin_lock(&cache->lock); 3095 spin_lock(&cache->lock);
2974 cache->pinned -= len; 3096 unpin = (cache->cached == BTRFS_CACHE_FINISHED);
2975 cache->space_info->bytes_pinned -= len; 3097 if (likely(unpin)) {
3098 cache->pinned -= len;
3099 cache->space_info->bytes_pinned -= len;
3100 fs_info->total_pinned -= len;
3101 }
2976 spin_unlock(&cache->lock); 3102 spin_unlock(&cache->lock);
2977 spin_unlock(&cache->space_info->lock); 3103 spin_unlock(&cache->space_info->lock);
2978 fs_info->total_pinned -= len; 3104
2979 if (cache->cached) 3105 if (likely(unpin))
3106 clear_extent_dirty(&fs_info->pinned_extents,
3107 bytenr, bytenr + len -1,
3108 GFP_NOFS);
3109 else
3110 cache_block_group(cache);
3111
3112 if (unpin)
2980 btrfs_add_free_space(cache, bytenr, len); 3113 btrfs_add_free_space(cache, bytenr, len);
2981 } 3114 }
2982 btrfs_put_block_group(cache); 3115 btrfs_put_block_group(cache);
@@ -3030,6 +3163,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3030 &start, &end, EXTENT_DIRTY); 3163 &start, &end, EXTENT_DIRTY);
3031 if (ret) 3164 if (ret)
3032 break; 3165 break;
3166
3033 set_extent_dirty(copy, start, end, GFP_NOFS); 3167 set_extent_dirty(copy, start, end, GFP_NOFS);
3034 last = end + 1; 3168 last = end + 1;
3035 } 3169 }
@@ -3058,6 +3192,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3058 3192
3059 cond_resched(); 3193 cond_resched();
3060 } 3194 }
3195
3061 return ret; 3196 return ret;
3062} 3197}
3063 3198
@@ -3436,6 +3571,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
3436} 3571}
3437 3572
3438/* 3573/*
3574 * when we wait for progress in the block group caching, its because
3575 * our allocation attempt failed at least once. So, we must sleep
3576 * and let some progress happen before we try again.
3577 *
3578 * This function will sleep at least once waiting for new free space to
3579 * show up, and then it will check the block group free space numbers
3580 * for our min num_bytes. Another option is to have it go ahead
3581 * and look in the rbtree for a free extent of a given size, but this
3582 * is a good start.
3583 */
3584static noinline int
3585wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3586 u64 num_bytes)
3587{
3588 DEFINE_WAIT(wait);
3589
3590 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
3591
3592 if (block_group_cache_done(cache)) {
3593 finish_wait(&cache->caching_q, &wait);
3594 return 0;
3595 }
3596 schedule();
3597 finish_wait(&cache->caching_q, &wait);
3598
3599 wait_event(cache->caching_q, block_group_cache_done(cache) ||
3600 (cache->free_space >= num_bytes));
3601 return 0;
3602}
3603
3604enum btrfs_loop_type {
3605 LOOP_CACHED_ONLY = 0,
3606 LOOP_CACHING_NOWAIT = 1,
3607 LOOP_CACHING_WAIT = 2,
3608 LOOP_ALLOC_CHUNK = 3,
3609 LOOP_NO_EMPTY_SIZE = 4,
3610};
3611
3612/*
3439 * walks the btree of allocated extents and find a hole of a given size. 3613 * walks the btree of allocated extents and find a hole of a given size.
3440 * The key ins is changed to record the hole: 3614 * The key ins is changed to record the hole:
3441 * ins->objectid == block start 3615 * ins->objectid == block start
@@ -3460,6 +3634,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3460 struct btrfs_space_info *space_info; 3634 struct btrfs_space_info *space_info;
3461 int last_ptr_loop = 0; 3635 int last_ptr_loop = 0;
3462 int loop = 0; 3636 int loop = 0;
3637 bool found_uncached_bg = false;
3463 3638
3464 WARN_ON(num_bytes < root->sectorsize); 3639 WARN_ON(num_bytes < root->sectorsize);
3465 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3640 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3491,15 +3666,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3491 search_start = max(search_start, first_logical_byte(root, 0)); 3666 search_start = max(search_start, first_logical_byte(root, 0));
3492 search_start = max(search_start, hint_byte); 3667 search_start = max(search_start, hint_byte);
3493 3668
3494 if (!last_ptr) { 3669 if (!last_ptr)
3495 empty_cluster = 0; 3670 empty_cluster = 0;
3496 loop = 1;
3497 }
3498 3671
3499 if (search_start == hint_byte) { 3672 if (search_start == hint_byte) {
3500 block_group = btrfs_lookup_block_group(root->fs_info, 3673 block_group = btrfs_lookup_block_group(root->fs_info,
3501 search_start); 3674 search_start);
3502 if (block_group && block_group_bits(block_group, data)) { 3675 /*
3676 * we don't want to use the block group if it doesn't match our
3677 * allocation bits, or if its not cached.
3678 */
3679 if (block_group && block_group_bits(block_group, data) &&
3680 block_group_cache_done(block_group)) {
3503 down_read(&space_info->groups_sem); 3681 down_read(&space_info->groups_sem);
3504 if (list_empty(&block_group->list) || 3682 if (list_empty(&block_group->list) ||
3505 block_group->ro) { 3683 block_group->ro) {
@@ -3522,21 +3700,35 @@ search:
3522 down_read(&space_info->groups_sem); 3700 down_read(&space_info->groups_sem);
3523 list_for_each_entry(block_group, &space_info->block_groups, list) { 3701 list_for_each_entry(block_group, &space_info->block_groups, list) {
3524 u64 offset; 3702 u64 offset;
3703 int cached;
3525 3704
3526 atomic_inc(&block_group->count); 3705 atomic_inc(&block_group->count);
3527 search_start = block_group->key.objectid; 3706 search_start = block_group->key.objectid;
3528 3707
3529have_block_group: 3708have_block_group:
3530 if (unlikely(!block_group->cached)) { 3709 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
3531 mutex_lock(&block_group->cache_mutex); 3710 /*
3532 ret = cache_block_group(root, block_group); 3711 * we want to start caching kthreads, but not too many
3533 mutex_unlock(&block_group->cache_mutex); 3712 * right off the bat so we don't overwhelm the system,
3534 if (ret) { 3713 * so only start them if there are less than 2 and we're
3535 btrfs_put_block_group(block_group); 3714 * in the initial allocation phase.
3536 break; 3715 */
3716 if (loop > LOOP_CACHING_NOWAIT ||
3717 atomic_read(&space_info->caching_threads) < 2) {
3718 ret = cache_block_group(block_group);
3719 BUG_ON(ret);
3537 } 3720 }
3538 } 3721 }
3539 3722
3723 cached = block_group_cache_done(block_group);
3724 if (unlikely(!cached)) {
3725 found_uncached_bg = true;
3726
3727 /* if we only want cached bgs, loop */
3728 if (loop == LOOP_CACHED_ONLY)
3729 goto loop;
3730 }
3731
3540 if (unlikely(block_group->ro)) 3732 if (unlikely(block_group->ro))
3541 goto loop; 3733 goto loop;
3542 3734
@@ -3615,14 +3807,21 @@ refill_cluster:
3615 spin_unlock(&last_ptr->refill_lock); 3807 spin_unlock(&last_ptr->refill_lock);
3616 goto checks; 3808 goto checks;
3617 } 3809 }
3810 } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
3811 spin_unlock(&last_ptr->refill_lock);
3812
3813 wait_block_group_cache_progress(block_group,
3814 num_bytes + empty_cluster + empty_size);
3815 goto have_block_group;
3618 } 3816 }
3817
3619 /* 3818 /*
3620 * at this point we either didn't find a cluster 3819 * at this point we either didn't find a cluster
3621 * or we weren't able to allocate a block from our 3820 * or we weren't able to allocate a block from our
3622 * cluster. Free the cluster we've been trying 3821 * cluster. Free the cluster we've been trying
3623 * to use, and go to the next block group 3822 * to use, and go to the next block group
3624 */ 3823 */
3625 if (loop < 2) { 3824 if (loop < LOOP_NO_EMPTY_SIZE) {
3626 btrfs_return_cluster_to_free_space(NULL, 3825 btrfs_return_cluster_to_free_space(NULL,
3627 last_ptr); 3826 last_ptr);
3628 spin_unlock(&last_ptr->refill_lock); 3827 spin_unlock(&last_ptr->refill_lock);
@@ -3633,11 +3832,17 @@ refill_cluster:
3633 3832
3634 offset = btrfs_find_space_for_alloc(block_group, search_start, 3833 offset = btrfs_find_space_for_alloc(block_group, search_start,
3635 num_bytes, empty_size); 3834 num_bytes, empty_size);
3636 if (!offset) 3835 if (!offset && (cached || (!cached &&
3836 loop == LOOP_CACHING_NOWAIT))) {
3637 goto loop; 3837 goto loop;
3838 } else if (!offset && (!cached &&
3839 loop > LOOP_CACHING_NOWAIT)) {
3840 wait_block_group_cache_progress(block_group,
3841 num_bytes + empty_size);
3842 goto have_block_group;
3843 }
3638checks: 3844checks:
3639 search_start = stripe_align(root, offset); 3845 search_start = stripe_align(root, offset);
3640
3641 /* move on to the next group */ 3846 /* move on to the next group */
3642 if (search_start + num_bytes >= search_end) { 3847 if (search_start + num_bytes >= search_end) {
3643 btrfs_add_free_space(block_group, offset, num_bytes); 3848 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -3683,13 +3888,26 @@ loop:
3683 } 3888 }
3684 up_read(&space_info->groups_sem); 3889 up_read(&space_info->groups_sem);
3685 3890
3686 /* loop == 0, try to find a clustered alloc in every block group 3891 /* LOOP_CACHED_ONLY, only search fully cached block groups
3687 * loop == 1, try again after forcing a chunk allocation 3892 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
3688 * loop == 2, set empty_size and empty_cluster to 0 and try again 3893 * dont wait foR them to finish caching
3894 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
3895 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
3896 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
3897 * again
3689 */ 3898 */
3690 if (!ins->objectid && loop < 3 && 3899 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
3691 (empty_size || empty_cluster || allowed_chunk_alloc)) { 3900 (found_uncached_bg || empty_size || empty_cluster ||
3692 if (loop >= 2) { 3901 allowed_chunk_alloc)) {
3902 if (found_uncached_bg) {
3903 found_uncached_bg = false;
3904 if (loop < LOOP_CACHING_WAIT) {
3905 loop++;
3906 goto search;
3907 }
3908 }
3909
3910 if (loop == LOOP_ALLOC_CHUNK) {
3693 empty_size = 0; 3911 empty_size = 0;
3694 empty_cluster = 0; 3912 empty_cluster = 0;
3695 } 3913 }
@@ -3702,7 +3920,7 @@ loop:
3702 space_info->force_alloc = 1; 3920 space_info->force_alloc = 1;
3703 } 3921 }
3704 3922
3705 if (loop < 3) { 3923 if (loop < LOOP_NO_EMPTY_SIZE) {
3706 loop++; 3924 loop++;
3707 goto search; 3925 goto search;
3708 } 3926 }
@@ -3798,7 +4016,7 @@ again:
3798 num_bytes, data, 1); 4016 num_bytes, data, 1);
3799 goto again; 4017 goto again;
3800 } 4018 }
3801 if (ret) { 4019 if (ret == -ENOSPC) {
3802 struct btrfs_space_info *sinfo; 4020 struct btrfs_space_info *sinfo;
3803 4021
3804 sinfo = __find_space_info(root->fs_info, data); 4022 sinfo = __find_space_info(root->fs_info, data);
@@ -3806,7 +4024,6 @@ again:
3806 "wanted %llu\n", (unsigned long long)data, 4024 "wanted %llu\n", (unsigned long long)data,
3807 (unsigned long long)num_bytes); 4025 (unsigned long long)num_bytes);
3808 dump_space_info(sinfo, num_bytes); 4026 dump_space_info(sinfo, num_bytes);
3809 BUG();
3810 } 4027 }
3811 4028
3812 return ret; 4029 return ret;
@@ -3844,7 +4061,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3844 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4061 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3845 empty_size, hint_byte, search_end, ins, 4062 empty_size, hint_byte, search_end, ins,
3846 data); 4063 data);
3847 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4064 if (!ret)
4065 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4066
3848 return ret; 4067 return ret;
3849} 4068}
3850 4069
@@ -4006,9 +4225,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4006 struct btrfs_block_group_cache *block_group; 4225 struct btrfs_block_group_cache *block_group;
4007 4226
4008 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4227 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4009 mutex_lock(&block_group->cache_mutex); 4228 cache_block_group(block_group);
4010 cache_block_group(root, block_group); 4229 wait_event(block_group->caching_q,
4011 mutex_unlock(&block_group->cache_mutex); 4230 block_group_cache_done(block_group));
4012 4231
4013 ret = btrfs_remove_free_space(block_group, ins->objectid, 4232 ret = btrfs_remove_free_space(block_group, ins->objectid,
4014 ins->offset); 4233 ins->offset);
@@ -4039,7 +4258,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4039 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4258 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4040 empty_size, hint_byte, search_end, 4259 empty_size, hint_byte, search_end,
4041 ins, 0); 4260 ins, 0);
4042 BUG_ON(ret); 4261 if (ret)
4262 return ret;
4043 4263
4044 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4264 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4045 if (parent == 0) 4265 if (parent == 0)
@@ -6955,11 +7175,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6955 &info->block_group_cache_tree); 7175 &info->block_group_cache_tree);
6956 spin_unlock(&info->block_group_cache_lock); 7176 spin_unlock(&info->block_group_cache_lock);
6957 7177
6958 btrfs_remove_free_space_cache(block_group);
6959 down_write(&block_group->space_info->groups_sem); 7178 down_write(&block_group->space_info->groups_sem);
6960 list_del(&block_group->list); 7179 list_del(&block_group->list);
6961 up_write(&block_group->space_info->groups_sem); 7180 up_write(&block_group->space_info->groups_sem);
6962 7181
7182 if (block_group->cached == BTRFS_CACHE_STARTED)
7183 wait_event(block_group->caching_q,
7184 block_group_cache_done(block_group));
7185
7186 btrfs_remove_free_space_cache(block_group);
7187
6963 WARN_ON(atomic_read(&block_group->count) != 1); 7188 WARN_ON(atomic_read(&block_group->count) != 1);
6964 kfree(block_group); 7189 kfree(block_group);
6965 7190
@@ -7025,9 +7250,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7025 atomic_set(&cache->count, 1); 7250 atomic_set(&cache->count, 1);
7026 spin_lock_init(&cache->lock); 7251 spin_lock_init(&cache->lock);
7027 spin_lock_init(&cache->tree_lock); 7252 spin_lock_init(&cache->tree_lock);
7028 mutex_init(&cache->cache_mutex); 7253 cache->fs_info = info;
7254 init_waitqueue_head(&cache->caching_q);
7029 INIT_LIST_HEAD(&cache->list); 7255 INIT_LIST_HEAD(&cache->list);
7030 INIT_LIST_HEAD(&cache->cluster_list); 7256 INIT_LIST_HEAD(&cache->cluster_list);
7257
7258 /*
7259 * we only want to have 32k of ram per block group for keeping
7260 * track of free space, and if we pass 1/2 of that we want to
7261 * start converting things over to using bitmaps
7262 */
7263 cache->extents_thresh = ((1024 * 32) / 2) /
7264 sizeof(struct btrfs_free_space);
7265
7031 read_extent_buffer(leaf, &cache->item, 7266 read_extent_buffer(leaf, &cache->item,
7032 btrfs_item_ptr_offset(leaf, path->slots[0]), 7267 btrfs_item_ptr_offset(leaf, path->slots[0]),
7033 sizeof(cache->item)); 7268 sizeof(cache->item));
@@ -7036,6 +7271,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7036 key.objectid = found_key.objectid + found_key.offset; 7271 key.objectid = found_key.objectid + found_key.offset;
7037 btrfs_release_path(root, path); 7272 btrfs_release_path(root, path);
7038 cache->flags = btrfs_block_group_flags(&cache->item); 7273 cache->flags = btrfs_block_group_flags(&cache->item);
7274 cache->sectorsize = root->sectorsize;
7275
7276 remove_sb_from_cache(root, cache);
7277
7278 /*
7279 * check for two cases, either we are full, and therefore
7280 * don't need to bother with the caching work since we won't
7281 * find any space, or we are empty, and we can just add all
7282 * the space in and be done with it. This saves us _alot_ of
7283 * time, particularly in the full case.
7284 */
7285 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7286 cache->cached = BTRFS_CACHE_FINISHED;
7287 } else if (btrfs_block_group_used(&cache->item) == 0) {
7288 cache->cached = BTRFS_CACHE_FINISHED;
7289 add_new_free_space(cache, root->fs_info,
7290 found_key.objectid,
7291 found_key.objectid +
7292 found_key.offset);
7293 }
7039 7294
7040 ret = update_space_info(info, cache->flags, found_key.offset, 7295 ret = update_space_info(info, cache->flags, found_key.offset,
7041 btrfs_block_group_used(&cache->item), 7296 btrfs_block_group_used(&cache->item),
@@ -7079,10 +7334,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7079 cache->key.objectid = chunk_offset; 7334 cache->key.objectid = chunk_offset;
7080 cache->key.offset = size; 7335 cache->key.offset = size;
7081 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7336 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
7337 cache->sectorsize = root->sectorsize;
7338
7339 /*
7340 * we only want to have 32k of ram per block group for keeping track
7341 * of free space, and if we pass 1/2 of that we want to start
7342 * converting things over to using bitmaps
7343 */
7344 cache->extents_thresh = ((1024 * 32) / 2) /
7345 sizeof(struct btrfs_free_space);
7082 atomic_set(&cache->count, 1); 7346 atomic_set(&cache->count, 1);
7083 spin_lock_init(&cache->lock); 7347 spin_lock_init(&cache->lock);
7084 spin_lock_init(&cache->tree_lock); 7348 spin_lock_init(&cache->tree_lock);
7085 mutex_init(&cache->cache_mutex); 7349 init_waitqueue_head(&cache->caching_q);
7086 INIT_LIST_HEAD(&cache->list); 7350 INIT_LIST_HEAD(&cache->list);
7087 INIT_LIST_HEAD(&cache->cluster_list); 7351 INIT_LIST_HEAD(&cache->cluster_list);
7088 7352
@@ -7091,6 +7355,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7091 cache->flags = type; 7355 cache->flags = type;
7092 btrfs_set_block_group_flags(&cache->item, type); 7356 btrfs_set_block_group_flags(&cache->item, type);
7093 7357
7358 cache->cached = BTRFS_CACHE_FINISHED;
7359 remove_sb_from_cache(root, cache);
7360
7361 add_new_free_space(cache, root->fs_info, chunk_offset,
7362 chunk_offset + size);
7363
7094 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7364 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7095 &cache->space_info); 7365 &cache->space_info);
7096 BUG_ON(ret); 7366 BUG_ON(ret);
@@ -7149,7 +7419,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7149 rb_erase(&block_group->cache_node, 7419 rb_erase(&block_group->cache_node,
7150 &root->fs_info->block_group_cache_tree); 7420 &root->fs_info->block_group_cache_tree);
7151 spin_unlock(&root->fs_info->block_group_cache_lock); 7421 spin_unlock(&root->fs_info->block_group_cache_lock);
7152 btrfs_remove_free_space_cache(block_group); 7422
7153 down_write(&block_group->space_info->groups_sem); 7423 down_write(&block_group->space_info->groups_sem);
7154 /* 7424 /*
7155 * we must use list_del_init so people can check to see if they 7425 * we must use list_del_init so people can check to see if they
@@ -7158,11 +7428,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7158 list_del_init(&block_group->list); 7428 list_del_init(&block_group->list);
7159 up_write(&block_group->space_info->groups_sem); 7429 up_write(&block_group->space_info->groups_sem);
7160 7430
7431 if (block_group->cached == BTRFS_CACHE_STARTED)
7432 wait_event(block_group->caching_q,
7433 block_group_cache_done(block_group));
7434
7435 btrfs_remove_free_space_cache(block_group);
7436
7161 spin_lock(&block_group->space_info->lock); 7437 spin_lock(&block_group->space_info->lock);
7162 block_group->space_info->total_bytes -= block_group->key.offset; 7438 block_group->space_info->total_bytes -= block_group->key.offset;
7163 block_group->space_info->bytes_readonly -= block_group->key.offset; 7439 block_group->space_info->bytes_readonly -= block_group->key.offset;
7164 spin_unlock(&block_group->space_info->lock); 7440 spin_unlock(&block_group->space_info->lock);
7165 block_group->space_info->full = 0; 7441
7442 btrfs_clear_space_info_full(root->fs_info);
7166 7443
7167 btrfs_put_block_group(block_group); 7444 btrfs_put_block_group(block_group);
7168 btrfs_put_block_group(block_group); 7445 btrfs_put_block_group(block_group);