aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c1662
1 files changed, 683 insertions, 979 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 535f85ba104f..993f93ff7ba6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
32#include "locking.h" 32#include "locking.h"
33#include "free-space-cache.h" 33#include "free-space-cache.h"
34 34
35static int update_reserved_extents(struct btrfs_root *root,
36 u64 bytenr, u64 num, int reserve);
37static int update_block_group(struct btrfs_trans_handle *trans, 35static int update_block_group(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 36 struct btrfs_root *root,
39 u64 bytenr, u64 num_bytes, int alloc, 37 u64 bytenr, u64 num_bytes, int alloc,
40 int mark_free); 38 int mark_free);
39static int update_reserved_extents(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 42 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
57 u64 parent, u64 root_objectid, 57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key, 58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins); 59 int level, struct btrfs_key *ins);
60
61static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force); 62 u64 flags, int force);
63static int pin_down_bytes(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 struct btrfs_path *path,
66 u64 bytenr, u64 num_bytes,
67 int is_data, int reserved,
68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key);
64 71
65static noinline int 72static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache) 73block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
153 return ret; 160 return ret;
154} 161}
155 162
156/* 163static int add_excluded_extent(struct btrfs_root *root,
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 164 u64 start, u64 num_bytes)
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{ 165{
164 u64 start, end, last = 0; 166 u64 end = start + num_bytes - 1;
165 int ret; 167 set_extent_bits(&root->fs_info->freed_extents[0],
168 start, end, EXTENT_UPTODATE, GFP_NOFS);
169 set_extent_bits(&root->fs_info->freed_extents[1],
170 start, end, EXTENT_UPTODATE, GFP_NOFS);
171 return 0;
172}
166 173
167 while (1) { 174static void free_excluded_extents(struct btrfs_root *root,
168 ret = find_first_extent_bit(&info->pinned_extents, last, 175 struct btrfs_block_group_cache *cache)
169 &start, &end, 176{
170 EXTENT_LOCKED|EXTENT_DIRTY); 177 u64 start, end;
171 if (ret)
172 break;
173 178
174 clear_extent_bits(&info->pinned_extents, start, end, 179 start = cache->key.objectid;
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 180 end = start + cache->key.offset - 1;
176 last = end+1; 181
177 } 182 clear_extent_bits(&root->fs_info->freed_extents[0],
183 start, end, EXTENT_UPTODATE, GFP_NOFS);
184 clear_extent_bits(&root->fs_info->freed_extents[1],
185 start, end, EXTENT_UPTODATE, GFP_NOFS);
178} 186}
179 187
180static int remove_sb_from_cache(struct btrfs_root *root, 188static int exclude_super_stripes(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache) 189 struct btrfs_block_group_cache *cache)
182{ 190{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr; 191 u64 bytenr;
185 u64 *logical; 192 u64 *logical;
186 int stripe_len; 193 int stripe_len;
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
192 cache->key.objectid, bytenr, 199 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len); 200 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret); 201 BUG_ON(ret);
202
195 while (nr--) { 203 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents, 204 cache->bytes_super += stripe_len;
197 logical[nr], 205 ret = add_excluded_extent(root, logical[nr],
198 logical[nr] + stripe_len - 1, GFP_NOFS); 206 stripe_len);
207 BUG_ON(ret);
199 } 208 }
209
200 kfree(logical); 210 kfree(logical);
201 } 211 }
202
203 return 0; 212 return 0;
204} 213}
205 214
215static struct btrfs_caching_control *
216get_caching_control(struct btrfs_block_group_cache *cache)
217{
218 struct btrfs_caching_control *ctl;
219
220 spin_lock(&cache->lock);
221 if (cache->cached != BTRFS_CACHE_STARTED) {
222 spin_unlock(&cache->lock);
223 return NULL;
224 }
225
226 ctl = cache->caching_ctl;
227 atomic_inc(&ctl->count);
228 spin_unlock(&cache->lock);
229 return ctl;
230}
231
232static void put_caching_control(struct btrfs_caching_control *ctl)
233{
234 if (atomic_dec_and_test(&ctl->count))
235 kfree(ctl);
236}
237
206/* 238/*
207 * this is only called by cache_block_group, since we could have freed extents 239 * this is only called by cache_block_group, since we could have freed extents
208 * we need to check the pinned_extents for any extents that can't be used yet 240 * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
215 int ret; 247 int ret;
216 248
217 while (start < end) { 249 while (start < end) {
218 ret = find_first_extent_bit(&info->pinned_extents, start, 250 ret = find_first_extent_bit(info->pinned_extents, start,
219 &extent_start, &extent_end, 251 &extent_start, &extent_end,
220 EXTENT_DIRTY|EXTENT_LOCKED); 252 EXTENT_DIRTY | EXTENT_UPTODATE);
221 if (ret) 253 if (ret)
222 break; 254 break;
223 255
@@ -249,22 +281,27 @@ static int caching_kthread(void *data)
249{ 281{
250 struct btrfs_block_group_cache *block_group = data; 282 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info; 283 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0; 284 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
285 struct btrfs_root *extent_root = fs_info->extent_root;
253 struct btrfs_path *path; 286 struct btrfs_path *path;
254 int ret = 0;
255 struct btrfs_key key;
256 struct extent_buffer *leaf; 287 struct extent_buffer *leaf;
257 int slot; 288 struct btrfs_key key;
258 u64 total_found = 0; 289 u64 total_found = 0;
259 290 u64 last = 0;
260 BUG_ON(!fs_info); 291 u32 nritems;
292 int ret = 0;
261 293
262 path = btrfs_alloc_path(); 294 path = btrfs_alloc_path();
263 if (!path) 295 if (!path)
264 return -ENOMEM; 296 return -ENOMEM;
265 297
266 atomic_inc(&block_group->space_info->caching_threads); 298 exclude_super_stripes(extent_root, block_group);
299 spin_lock(&block_group->space_info->lock);
300 block_group->space_info->bytes_super += block_group->bytes_super;
301 spin_unlock(&block_group->space_info->lock);
302
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 303 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
304
268 /* 305 /*
269 * We don't want to deadlock with somebody trying to allocate a new 306 * We don't want to deadlock with somebody trying to allocate a new
270 * extent for the extent root while also trying to search the extent 307 * extent for the extent root while also trying to search the extent
@@ -277,74 +314,64 @@ static int caching_kthread(void *data)
277 314
278 key.objectid = last; 315 key.objectid = last;
279 key.offset = 0; 316 key.offset = 0;
280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 317 key.type = BTRFS_EXTENT_ITEM_KEY;
281again: 318again:
319 mutex_lock(&caching_ctl->mutex);
282 /* need to make sure the commit_root doesn't disappear */ 320 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem); 321 down_read(&fs_info->extent_commit_sem);
284 322
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 323 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
286 if (ret < 0) 324 if (ret < 0)
287 goto err; 325 goto err;
288 326
327 leaf = path->nodes[0];
328 nritems = btrfs_header_nritems(leaf);
329
289 while (1) { 330 while (1) {
290 smp_mb(); 331 smp_mb();
291 if (block_group->fs_info->closing > 1) { 332 if (fs_info->closing > 1) {
292 last = (u64)-1; 333 last = (u64)-1;
293 break; 334 break;
294 } 335 }
295 336
296 leaf = path->nodes[0]; 337 if (path->slots[0] < nritems) {
297 slot = path->slots[0]; 338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
298 if (slot >= btrfs_header_nritems(leaf)) { 339 } else {
299 ret = btrfs_next_leaf(fs_info->extent_root, path); 340 ret = find_next_key(path, 0, &key);
300 if (ret < 0) 341 if (ret)
301 goto err;
302 else if (ret)
303 break; 342 break;
304 343
305 if (need_resched() || 344 caching_ctl->progress = last;
306 btrfs_transaction_in_commit(fs_info)) { 345 btrfs_release_path(extent_root, path);
307 leaf = path->nodes[0]; 346 up_read(&fs_info->extent_commit_sem);
308 347 mutex_unlock(&caching_ctl->mutex);
309 /* this shouldn't happen, but if the 348 if (btrfs_transaction_in_commit(fs_info))
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1); 349 schedule_timeout(1);
323 goto again; 350 else
324 } 351 cond_resched();
352 goto again;
353 }
325 354
355 if (key.objectid < block_group->key.objectid) {
356 path->slots[0]++;
326 continue; 357 continue;
327 } 358 }
328 btrfs_item_key_to_cpu(leaf, &key, slot);
329 if (key.objectid < block_group->key.objectid)
330 goto next;
331 359
332 if (key.objectid >= block_group->key.objectid + 360 if (key.objectid >= block_group->key.objectid +
333 block_group->key.offset) 361 block_group->key.offset)
334 break; 362 break;
335 363
336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 364 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
337 total_found += add_new_free_space(block_group, 365 total_found += add_new_free_space(block_group,
338 fs_info, last, 366 fs_info, last,
339 key.objectid); 367 key.objectid);
340 last = key.objectid + key.offset; 368 last = key.objectid + key.offset;
341 }
342 369
343 if (total_found > (1024 * 1024 * 2)) { 370 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0; 371 total_found = 0;
345 wake_up(&block_group->caching_q); 372 wake_up(&caching_ctl->wait);
373 }
346 } 374 }
347next:
348 path->slots[0]++; 375 path->slots[0]++;
349 } 376 }
350 ret = 0; 377 ret = 0;
@@ -352,33 +379,65 @@ next:
352 total_found += add_new_free_space(block_group, fs_info, last, 379 total_found += add_new_free_space(block_group, fs_info, last,
353 block_group->key.objectid + 380 block_group->key.objectid +
354 block_group->key.offset); 381 block_group->key.offset);
382 caching_ctl->progress = (u64)-1;
355 383
356 spin_lock(&block_group->lock); 384 spin_lock(&block_group->lock);
385 block_group->caching_ctl = NULL;
357 block_group->cached = BTRFS_CACHE_FINISHED; 386 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock); 387 spin_unlock(&block_group->lock);
359 388
360err: 389err:
361 btrfs_free_path(path); 390 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem); 391 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365 392
393 free_excluded_extents(extent_root, block_group);
394
395 mutex_unlock(&caching_ctl->mutex);
396 wake_up(&caching_ctl->wait);
397
398 put_caching_control(caching_ctl);
399 atomic_dec(&block_group->space_info->caching_threads);
366 return 0; 400 return 0;
367} 401}
368 402
369static int cache_block_group(struct btrfs_block_group_cache *cache) 403static int cache_block_group(struct btrfs_block_group_cache *cache)
370{ 404{
405 struct btrfs_fs_info *fs_info = cache->fs_info;
406 struct btrfs_caching_control *caching_ctl;
371 struct task_struct *tsk; 407 struct task_struct *tsk;
372 int ret = 0; 408 int ret = 0;
373 409
410 smp_mb();
411 if (cache->cached != BTRFS_CACHE_NO)
412 return 0;
413
414 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
415 BUG_ON(!caching_ctl);
416
417 INIT_LIST_HEAD(&caching_ctl->list);
418 mutex_init(&caching_ctl->mutex);
419 init_waitqueue_head(&caching_ctl->wait);
420 caching_ctl->block_group = cache;
421 caching_ctl->progress = cache->key.objectid;
422 /* one for caching kthread, one for caching block group list */
423 atomic_set(&caching_ctl->count, 2);
424
374 spin_lock(&cache->lock); 425 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) { 426 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock); 427 spin_unlock(&cache->lock);
377 return ret; 428 kfree(caching_ctl);
429 return 0;
378 } 430 }
431 cache->caching_ctl = caching_ctl;
379 cache->cached = BTRFS_CACHE_STARTED; 432 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock); 433 spin_unlock(&cache->lock);
381 434
435 down_write(&fs_info->extent_commit_sem);
436 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
437 up_write(&fs_info->extent_commit_sem);
438
439 atomic_inc(&cache->space_info->caching_threads);
440
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 441 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid); 442 cache->key.objectid);
384 if (IS_ERR(tsk)) { 443 if (IS_ERR(tsk)) {
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1657 parent, ref_root, flags, 1716 parent, ref_root, flags,
1658 ref->objectid, ref->offset, 1717 ref->objectid, ref->offset,
1659 &ins, node->ref_mod); 1718 &ins, node->ref_mod);
1660 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1661 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1719 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1662 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1720 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1663 node->num_bytes, parent, 1721 node->num_bytes, parent,
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1783 extent_op->flags_to_set, 1841 extent_op->flags_to_set,
1784 &extent_op->key, 1842 &extent_op->key,
1785 ref->level, &ins); 1843 ref->level, &ins);
1786 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1787 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1844 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1788 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1845 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1789 node->num_bytes, parent, ref_root, 1846 node->num_bytes, parent, ref_root,
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1818 BUG_ON(extent_op); 1875 BUG_ON(extent_op);
1819 head = btrfs_delayed_node_to_head(node); 1876 head = btrfs_delayed_node_to_head(node);
1820 if (insert_reserved) { 1877 if (insert_reserved) {
1878 int mark_free = 0;
1879 struct extent_buffer *must_clean = NULL;
1880
1881 ret = pin_down_bytes(trans, root, NULL,
1882 node->bytenr, node->num_bytes,
1883 head->is_data, 1, &must_clean);
1884 if (ret > 0)
1885 mark_free = 1;
1886
1887 if (must_clean) {
1888 clean_tree_block(NULL, root, must_clean);
1889 btrfs_tree_unlock(must_clean);
1890 free_extent_buffer(must_clean);
1891 }
1821 if (head->is_data) { 1892 if (head->is_data) {
1822 ret = btrfs_del_csums(trans, root, 1893 ret = btrfs_del_csums(trans, root,
1823 node->bytenr, 1894 node->bytenr,
1824 node->num_bytes); 1895 node->num_bytes);
1825 BUG_ON(ret); 1896 BUG_ON(ret);
1826 } 1897 }
1827 btrfs_update_pinned_extents(root, node->bytenr, 1898 if (mark_free) {
1828 node->num_bytes, 1); 1899 ret = btrfs_free_reserved_extent(root,
1829 update_reserved_extents(root, node->bytenr, 1900 node->bytenr,
1830 node->num_bytes, 0); 1901 node->num_bytes);
1902 BUG_ON(ret);
1903 }
1831 } 1904 }
1832 mutex_unlock(&head->mutex); 1905 mutex_unlock(&head->mutex);
1833 return 0; 1906 return 0;
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root)
2706 /* get the space info for where the metadata will live */ 2779 /* get the space info for where the metadata will live */
2707 alloc_target = btrfs_get_alloc_profile(root, 0); 2780 alloc_target = btrfs_get_alloc_profile(root, 0);
2708 meta_sinfo = __find_space_info(info, alloc_target); 2781 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2709 2784
2710again: 2785again:
2711 spin_lock(&meta_sinfo->lock); 2786 spin_lock(&meta_sinfo->lock);
@@ -2717,12 +2792,13 @@ again:
2717 do_div(thresh, 100); 2792 do_div(thresh, 100);
2718 2793
2719 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2720 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2796 meta_sinfo->bytes_super > thresh) {
2721 struct btrfs_trans_handle *trans; 2797 struct btrfs_trans_handle *trans;
2722 if (!meta_sinfo->full) { 2798 if (!meta_sinfo->full) {
2723 meta_sinfo->force_alloc = 1; 2799 meta_sinfo->force_alloc = 1;
2724 spin_unlock(&meta_sinfo->lock); 2800 spin_unlock(&meta_sinfo->lock);
2725 2801alloc:
2726 trans = btrfs_start_transaction(root, 1); 2802 trans = btrfs_start_transaction(root, 1);
2727 if (!trans) 2803 if (!trans)
2728 return -ENOMEM; 2804 return -ENOMEM;
@@ -2730,6 +2806,10 @@ again:
2730 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2731 2 * 1024 * 1024, alloc_target, 0); 2807 2 * 1024 * 1024, alloc_target, 0);
2732 btrfs_end_transaction(trans, root); 2808 btrfs_end_transaction(trans, root);
2809 if (!meta_sinfo) {
2810 meta_sinfo = __find_space_info(info,
2811 alloc_target);
2812 }
2733 goto again; 2813 goto again;
2734 } 2814 }
2735 spin_unlock(&meta_sinfo->lock); 2815 spin_unlock(&meta_sinfo->lock);
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2765 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2845 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2766 2846
2767 data_sinfo = BTRFS_I(inode)->space_info; 2847 data_sinfo = BTRFS_I(inode)->space_info;
2848 if (!data_sinfo)
2849 goto alloc;
2850
2768again: 2851again:
2769 /* make sure we have enough space to handle the data first */ 2852 /* make sure we have enough space to handle the data first */
2770 spin_lock(&data_sinfo->lock); 2853 spin_lock(&data_sinfo->lock);
2771 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2854 if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2772 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2855 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2773 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2856 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2774 data_sinfo->bytes_may_use < bytes) { 2857 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
2775 struct btrfs_trans_handle *trans; 2858 struct btrfs_trans_handle *trans;
2776 2859
2777 /* 2860 /*
@@ -2783,7 +2866,7 @@ again:
2783 2866
2784 data_sinfo->force_alloc = 1; 2867 data_sinfo->force_alloc = 1;
2785 spin_unlock(&data_sinfo->lock); 2868 spin_unlock(&data_sinfo->lock);
2786 2869alloc:
2787 alloc_target = btrfs_get_alloc_profile(root, 1); 2870 alloc_target = btrfs_get_alloc_profile(root, 1);
2788 trans = btrfs_start_transaction(root, 1); 2871 trans = btrfs_start_transaction(root, 1);
2789 if (!trans) 2872 if (!trans)
@@ -2795,6 +2878,11 @@ again:
2795 btrfs_end_transaction(trans, root); 2878 btrfs_end_transaction(trans, root);
2796 if (ret) 2879 if (ret)
2797 return ret; 2880 return ret;
2881
2882 if (!data_sinfo) {
2883 btrfs_set_inode_space_info(root, inode);
2884 data_sinfo = BTRFS_I(inode)->space_info;
2885 }
2798 goto again; 2886 goto again;
2799 } 2887 }
2800 spin_unlock(&data_sinfo->lock); 2888 spin_unlock(&data_sinfo->lock);
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3009 num_bytes = min(total, cache->key.offset - byte_in_group); 3097 num_bytes = min(total, cache->key.offset - byte_in_group);
3010 if (alloc) { 3098 if (alloc) {
3011 old_val += num_bytes; 3099 old_val += num_bytes;
3100 btrfs_set_block_group_used(&cache->item, old_val);
3101 cache->reserved -= num_bytes;
3012 cache->space_info->bytes_used += num_bytes; 3102 cache->space_info->bytes_used += num_bytes;
3103 cache->space_info->bytes_reserved -= num_bytes;
3013 if (cache->ro) 3104 if (cache->ro)
3014 cache->space_info->bytes_readonly -= num_bytes; 3105 cache->space_info->bytes_readonly -= num_bytes;
3015 btrfs_set_block_group_used(&cache->item, old_val);
3016 spin_unlock(&cache->lock); 3106 spin_unlock(&cache->lock);
3017 spin_unlock(&cache->space_info->lock); 3107 spin_unlock(&cache->space_info->lock);
3018 } else { 3108 } else {
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3057 return bytenr; 3147 return bytenr;
3058} 3148}
3059 3149
3060int btrfs_update_pinned_extents(struct btrfs_root *root, 3150/*
3061 u64 bytenr, u64 num, int pin) 3151 * this function must be called within transaction
3152 */
3153int btrfs_pin_extent(struct btrfs_root *root,
3154 u64 bytenr, u64 num_bytes, int reserved)
3062{ 3155{
3063 u64 len;
3064 struct btrfs_block_group_cache *cache;
3065 struct btrfs_fs_info *fs_info = root->fs_info; 3156 struct btrfs_fs_info *fs_info = root->fs_info;
3157 struct btrfs_block_group_cache *cache;
3066 3158
3067 if (pin) 3159 cache = btrfs_lookup_block_group(fs_info, bytenr);
3068 set_extent_dirty(&fs_info->pinned_extents, 3160 BUG_ON(!cache);
3069 bytenr, bytenr + num - 1, GFP_NOFS);
3070
3071 while (num > 0) {
3072 cache = btrfs_lookup_block_group(fs_info, bytenr);
3073 BUG_ON(!cache);
3074 len = min(num, cache->key.offset -
3075 (bytenr - cache->key.objectid));
3076 if (pin) {
3077 spin_lock(&cache->space_info->lock);
3078 spin_lock(&cache->lock);
3079 cache->pinned += len;
3080 cache->space_info->bytes_pinned += len;
3081 spin_unlock(&cache->lock);
3082 spin_unlock(&cache->space_info->lock);
3083 fs_info->total_pinned += len;
3084 } else {
3085 int unpin = 0;
3086 3161
3087 /* 3162 spin_lock(&cache->space_info->lock);
3088 * in order to not race with the block group caching, we 3163 spin_lock(&cache->lock);
3089 * only want to unpin the extent if we are cached. If 3164 cache->pinned += num_bytes;
3090 * we aren't cached, we want to start async caching this 3165 cache->space_info->bytes_pinned += num_bytes;
3091 * block group so we can free the extent the next time 3166 if (reserved) {
3092 * around. 3167 cache->reserved -= num_bytes;
3093 */ 3168 cache->space_info->bytes_reserved -= num_bytes;
3094 spin_lock(&cache->space_info->lock); 3169 }
3095 spin_lock(&cache->lock); 3170 spin_unlock(&cache->lock);
3096 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3171 spin_unlock(&cache->space_info->lock);
3097 if (likely(unpin)) {
3098 cache->pinned -= len;
3099 cache->space_info->bytes_pinned -= len;
3100 fs_info->total_pinned -= len;
3101 }
3102 spin_unlock(&cache->lock);
3103 spin_unlock(&cache->space_info->lock);
3104 3172
3105 if (likely(unpin)) 3173 btrfs_put_block_group(cache);
3106 clear_extent_dirty(&fs_info->pinned_extents,
3107 bytenr, bytenr + len -1,
3108 GFP_NOFS);
3109 else
3110 cache_block_group(cache);
3111 3174
3112 if (unpin) 3175 set_extent_dirty(fs_info->pinned_extents,
3113 btrfs_add_free_space(cache, bytenr, len); 3176 bytenr, bytenr + num_bytes - 1, GFP_NOFS);
3114 } 3177 return 0;
3115 btrfs_put_block_group(cache); 3178}
3116 bytenr += len; 3179
3117 num -= len; 3180static int update_reserved_extents(struct btrfs_block_group_cache *cache,
3181 u64 num_bytes, int reserve)
3182{
3183 spin_lock(&cache->space_info->lock);
3184 spin_lock(&cache->lock);
3185 if (reserve) {
3186 cache->reserved += num_bytes;
3187 cache->space_info->bytes_reserved += num_bytes;
3188 } else {
3189 cache->reserved -= num_bytes;
3190 cache->space_info->bytes_reserved -= num_bytes;
3118 } 3191 }
3192 spin_unlock(&cache->lock);
3193 spin_unlock(&cache->space_info->lock);
3119 return 0; 3194 return 0;
3120} 3195}
3121 3196
3122static int update_reserved_extents(struct btrfs_root *root, 3197int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3123 u64 bytenr, u64 num, int reserve) 3198 struct btrfs_root *root)
3124{ 3199{
3125 u64 len;
3126 struct btrfs_block_group_cache *cache;
3127 struct btrfs_fs_info *fs_info = root->fs_info; 3200 struct btrfs_fs_info *fs_info = root->fs_info;
3201 struct btrfs_caching_control *next;
3202 struct btrfs_caching_control *caching_ctl;
3203 struct btrfs_block_group_cache *cache;
3128 3204
3129 while (num > 0) { 3205 down_write(&fs_info->extent_commit_sem);
3130 cache = btrfs_lookup_block_group(fs_info, bytenr);
3131 BUG_ON(!cache);
3132 len = min(num, cache->key.offset -
3133 (bytenr - cache->key.objectid));
3134 3206
3135 spin_lock(&cache->space_info->lock); 3207 list_for_each_entry_safe(caching_ctl, next,
3136 spin_lock(&cache->lock); 3208 &fs_info->caching_block_groups, list) {
3137 if (reserve) { 3209 cache = caching_ctl->block_group;
3138 cache->reserved += len; 3210 if (block_group_cache_done(cache)) {
3139 cache->space_info->bytes_reserved += len; 3211 cache->last_byte_to_unpin = (u64)-1;
3212 list_del_init(&caching_ctl->list);
3213 put_caching_control(caching_ctl);
3140 } else { 3214 } else {
3141 cache->reserved -= len; 3215 cache->last_byte_to_unpin = caching_ctl->progress;
3142 cache->space_info->bytes_reserved -= len;
3143 } 3216 }
3144 spin_unlock(&cache->lock);
3145 spin_unlock(&cache->space_info->lock);
3146 btrfs_put_block_group(cache);
3147 bytenr += len;
3148 num -= len;
3149 } 3217 }
3218
3219 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3220 fs_info->pinned_extents = &fs_info->freed_extents[1];
3221 else
3222 fs_info->pinned_extents = &fs_info->freed_extents[0];
3223
3224 up_write(&fs_info->extent_commit_sem);
3150 return 0; 3225 return 0;
3151} 3226}
3152 3227
3153int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3228static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3154{ 3229{
3155 u64 last = 0; 3230 struct btrfs_fs_info *fs_info = root->fs_info;
3156 u64 start; 3231 struct btrfs_block_group_cache *cache = NULL;
3157 u64 end; 3232 u64 len;
3158 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3159 int ret;
3160 3233
3161 while (1) { 3234 while (start <= end) {
3162 ret = find_first_extent_bit(pinned_extents, last, 3235 if (!cache ||
3163 &start, &end, EXTENT_DIRTY); 3236 start >= cache->key.objectid + cache->key.offset) {
3164 if (ret) 3237 if (cache)
3165 break; 3238 btrfs_put_block_group(cache);
3239 cache = btrfs_lookup_block_group(fs_info, start);
3240 BUG_ON(!cache);
3241 }
3242
3243 len = cache->key.objectid + cache->key.offset - start;
3244 len = min(len, end + 1 - start);
3166 3245
3167 set_extent_dirty(copy, start, end, GFP_NOFS); 3246 if (start < cache->last_byte_to_unpin) {
3168 last = end + 1; 3247 len = min(len, cache->last_byte_to_unpin - start);
3248 btrfs_add_free_space(cache, start, len);
3249 }
3250
3251 spin_lock(&cache->space_info->lock);
3252 spin_lock(&cache->lock);
3253 cache->pinned -= len;
3254 cache->space_info->bytes_pinned -= len;
3255 spin_unlock(&cache->lock);
3256 spin_unlock(&cache->space_info->lock);
3257
3258 start += len;
3169 } 3259 }
3260
3261 if (cache)
3262 btrfs_put_block_group(cache);
3170 return 0; 3263 return 0;
3171} 3264}
3172 3265
3173int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3266int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3174 struct btrfs_root *root, 3267 struct btrfs_root *root)
3175 struct extent_io_tree *unpin)
3176{ 3268{
3269 struct btrfs_fs_info *fs_info = root->fs_info;
3270 struct extent_io_tree *unpin;
3177 u64 start; 3271 u64 start;
3178 u64 end; 3272 u64 end;
3179 int ret; 3273 int ret;
3180 3274
3275 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3276 unpin = &fs_info->freed_extents[1];
3277 else
3278 unpin = &fs_info->freed_extents[0];
3279
3181 while (1) { 3280 while (1) {
3182 ret = find_first_extent_bit(unpin, 0, &start, &end, 3281 ret = find_first_extent_bit(unpin, 0, &start, &end,
3183 EXTENT_DIRTY); 3282 EXTENT_DIRTY);
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3186 3285
3187 ret = btrfs_discard_extent(root, start, end + 1 - start); 3286 ret = btrfs_discard_extent(root, start, end + 1 - start);
3188 3287
3189 /* unlocks the pinned mutex */
3190 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3191 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3288 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3192 3289 unpin_extent_range(root, start, end);
3193 cond_resched(); 3290 cond_resched();
3194 } 3291 }
3195 3292
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3199static int pin_down_bytes(struct btrfs_trans_handle *trans, 3296static int pin_down_bytes(struct btrfs_trans_handle *trans,
3200 struct btrfs_root *root, 3297 struct btrfs_root *root,
3201 struct btrfs_path *path, 3298 struct btrfs_path *path,
3202 u64 bytenr, u64 num_bytes, int is_data, 3299 u64 bytenr, u64 num_bytes,
3300 int is_data, int reserved,
3203 struct extent_buffer **must_clean) 3301 struct extent_buffer **must_clean)
3204{ 3302{
3205 int err = 0; 3303 int err = 0;
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3231 } 3329 }
3232 free_extent_buffer(buf); 3330 free_extent_buffer(buf);
3233pinit: 3331pinit:
3234 btrfs_set_path_blocking(path); 3332 if (path)
3333 btrfs_set_path_blocking(path);
3235 /* unlocks the pinned mutex */ 3334 /* unlocks the pinned mutex */
3236 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3335 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3237 3336
3238 BUG_ON(err < 0); 3337 BUG_ON(err < 0);
3239 return 0; 3338 return 0;
3240} 3339}
3241 3340
3242
3243static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3341static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3244 struct btrfs_root *root, 3342 struct btrfs_root *root,
3245 u64 bytenr, u64 num_bytes, u64 parent, 3343 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3413 } 3511 }
3414 3512
3415 ret = pin_down_bytes(trans, root, path, bytenr, 3513 ret = pin_down_bytes(trans, root, path, bytenr,
3416 num_bytes, is_data, &must_clean); 3514 num_bytes, is_data, 0, &must_clean);
3417 if (ret > 0) 3515 if (ret > 0)
3418 mark_free = 1; 3516 mark_free = 1;
3419 BUG_ON(ret < 0); 3517 BUG_ON(ret < 0);
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3544 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3642 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3545 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3643 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3546 /* unlocks the pinned mutex */ 3644 /* unlocks the pinned mutex */
3547 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3645 btrfs_pin_extent(root, bytenr, num_bytes, 1);
3548 update_reserved_extents(root, bytenr, num_bytes, 0);
3549 ret = 0; 3646 ret = 0;
3550 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3647 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
3551 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3648 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3585,19 +3682,33 @@ static noinline int
3585wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3682wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3586 u64 num_bytes) 3683 u64 num_bytes)
3587{ 3684{
3685 struct btrfs_caching_control *caching_ctl;
3588 DEFINE_WAIT(wait); 3686 DEFINE_WAIT(wait);
3589 3687
3590 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3688 caching_ctl = get_caching_control(cache);
3591 3689 if (!caching_ctl)
3592 if (block_group_cache_done(cache)) {
3593 finish_wait(&cache->caching_q, &wait);
3594 return 0; 3690 return 0;
3595 }
3596 schedule();
3597 finish_wait(&cache->caching_q, &wait);
3598 3691
3599 wait_event(cache->caching_q, block_group_cache_done(cache) || 3692 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
3600 (cache->free_space >= num_bytes)); 3693 (cache->free_space >= num_bytes));
3694
3695 put_caching_control(caching_ctl);
3696 return 0;
3697}
3698
3699static noinline int
3700wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3701{
3702 struct btrfs_caching_control *caching_ctl;
3703 DEFINE_WAIT(wait);
3704
3705 caching_ctl = get_caching_control(cache);
3706 if (!caching_ctl)
3707 return 0;
3708
3709 wait_event(caching_ctl->wait, block_group_cache_done(cache));
3710
3711 put_caching_control(caching_ctl);
3601 return 0; 3712 return 0;
3602} 3713}
3603 3714
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3635 int last_ptr_loop = 0; 3746 int last_ptr_loop = 0;
3636 int loop = 0; 3747 int loop = 0;
3637 bool found_uncached_bg = false; 3748 bool found_uncached_bg = false;
3749 bool failed_cluster_refill = false;
3638 3750
3639 WARN_ON(num_bytes < root->sectorsize); 3751 WARN_ON(num_bytes < root->sectorsize);
3640 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3752 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3732,7 +3844,16 @@ have_block_group:
3732 if (unlikely(block_group->ro)) 3844 if (unlikely(block_group->ro))
3733 goto loop; 3845 goto loop;
3734 3846
3735 if (last_ptr) { 3847 /*
3848 * Ok we want to try and use the cluster allocator, so lets look
3849 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
3850 * have tried the cluster allocator plenty of times at this
3851 * point and not have found anything, so we are likely way too
3852 * fragmented for the clustering stuff to find anything, so lets
3853 * just skip it and let the allocator find whatever block it can
3854 * find
3855 */
3856 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
3736 /* 3857 /*
3737 * the refill lock keeps out other 3858 * the refill lock keeps out other
3738 * people trying to start a new cluster 3859 * people trying to start a new cluster
@@ -3807,9 +3928,11 @@ refill_cluster:
3807 spin_unlock(&last_ptr->refill_lock); 3928 spin_unlock(&last_ptr->refill_lock);
3808 goto checks; 3929 goto checks;
3809 } 3930 }
3810 } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 3931 } else if (!cached && loop > LOOP_CACHING_NOWAIT
3932 && !failed_cluster_refill) {
3811 spin_unlock(&last_ptr->refill_lock); 3933 spin_unlock(&last_ptr->refill_lock);
3812 3934
3935 failed_cluster_refill = true;
3813 wait_block_group_cache_progress(block_group, 3936 wait_block_group_cache_progress(block_group,
3814 num_bytes + empty_cluster + empty_size); 3937 num_bytes + empty_cluster + empty_size);
3815 goto have_block_group; 3938 goto have_block_group;
@@ -3821,13 +3944,9 @@ refill_cluster:
3821 * cluster. Free the cluster we've been trying 3944 * cluster. Free the cluster we've been trying
3822 * to use, and go to the next block group 3945 * to use, and go to the next block group
3823 */ 3946 */
3824 if (loop < LOOP_NO_EMPTY_SIZE) { 3947 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3825 btrfs_return_cluster_to_free_space(NULL,
3826 last_ptr);
3827 spin_unlock(&last_ptr->refill_lock);
3828 goto loop;
3829 }
3830 spin_unlock(&last_ptr->refill_lock); 3948 spin_unlock(&last_ptr->refill_lock);
3949 goto loop;
3831 } 3950 }
3832 3951
3833 offset = btrfs_find_space_for_alloc(block_group, search_start, 3952 offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3881,9 +4000,12 @@ checks:
3881 search_start - offset); 4000 search_start - offset);
3882 BUG_ON(offset > search_start); 4001 BUG_ON(offset > search_start);
3883 4002
4003 update_reserved_extents(block_group, num_bytes, 1);
4004
3884 /* we are all good, lets return */ 4005 /* we are all good, lets return */
3885 break; 4006 break;
3886loop: 4007loop:
4008 failed_cluster_refill = false;
3887 btrfs_put_block_group(block_group); 4009 btrfs_put_block_group(block_group);
3888 } 4010 }
3889 up_read(&space_info->groups_sem); 4011 up_read(&space_info->groups_sem);
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3973 up_read(&info->groups_sem); 4095 up_read(&info->groups_sem);
3974} 4096}
3975 4097
3976static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4098int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3977 struct btrfs_root *root, 4099 struct btrfs_root *root,
3978 u64 num_bytes, u64 min_alloc_size, 4100 u64 num_bytes, u64 min_alloc_size,
3979 u64 empty_size, u64 hint_byte, 4101 u64 empty_size, u64 hint_byte,
3980 u64 search_end, struct btrfs_key *ins, 4102 u64 search_end, struct btrfs_key *ins,
3981 u64 data) 4103 u64 data)
3982{ 4104{
3983 int ret; 4105 int ret;
3984 u64 search_start = 0; 4106 u64 search_start = 0;
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4044 ret = btrfs_discard_extent(root, start, len); 4166 ret = btrfs_discard_extent(root, start, len);
4045 4167
4046 btrfs_add_free_space(cache, start, len); 4168 btrfs_add_free_space(cache, start, len);
4169 update_reserved_extents(cache, len, 0);
4047 btrfs_put_block_group(cache); 4170 btrfs_put_block_group(cache);
4048 update_reserved_extents(root, start, len, 0);
4049
4050 return ret;
4051}
4052
4053int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4054 struct btrfs_root *root,
4055 u64 num_bytes, u64 min_alloc_size,
4056 u64 empty_size, u64 hint_byte,
4057 u64 search_end, struct btrfs_key *ins,
4058 u64 data)
4059{
4060 int ret;
4061 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
4062 empty_size, hint_byte, search_end, ins,
4063 data);
4064 if (!ret)
4065 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4066 4171
4067 return ret; 4172 return ret;
4068} 4173}
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4223{ 4328{
4224 int ret; 4329 int ret;
4225 struct btrfs_block_group_cache *block_group; 4330 struct btrfs_block_group_cache *block_group;
4331 struct btrfs_caching_control *caching_ctl;
4332 u64 start = ins->objectid;
4333 u64 num_bytes = ins->offset;
4226 4334
4227 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4335 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4228 cache_block_group(block_group); 4336 cache_block_group(block_group);
4229 wait_event(block_group->caching_q, 4337 caching_ctl = get_caching_control(block_group);
4230 block_group_cache_done(block_group));
4231 4338
4232 ret = btrfs_remove_free_space(block_group, ins->objectid, 4339 if (!caching_ctl) {
4233 ins->offset); 4340 BUG_ON(!block_group_cache_done(block_group));
4234 BUG_ON(ret); 4341 ret = btrfs_remove_free_space(block_group, start, num_bytes);
4342 BUG_ON(ret);
4343 } else {
4344 mutex_lock(&caching_ctl->mutex);
4345
4346 if (start >= caching_ctl->progress) {
4347 ret = add_excluded_extent(root, start, num_bytes);
4348 BUG_ON(ret);
4349 } else if (start + num_bytes <= caching_ctl->progress) {
4350 ret = btrfs_remove_free_space(block_group,
4351 start, num_bytes);
4352 BUG_ON(ret);
4353 } else {
4354 num_bytes = caching_ctl->progress - start;
4355 ret = btrfs_remove_free_space(block_group,
4356 start, num_bytes);
4357 BUG_ON(ret);
4358
4359 start = caching_ctl->progress;
4360 num_bytes = ins->objectid + ins->offset -
4361 caching_ctl->progress;
4362 ret = add_excluded_extent(root, start, num_bytes);
4363 BUG_ON(ret);
4364 }
4365
4366 mutex_unlock(&caching_ctl->mutex);
4367 put_caching_control(caching_ctl);
4368 }
4369
4370 update_reserved_extents(block_group, ins->offset, 1);
4235 btrfs_put_block_group(block_group); 4371 btrfs_put_block_group(block_group);
4236 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4372 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4237 0, owner, offset, ins, 1); 4373 0, owner, offset, ins, 1);
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4255 int ret; 4391 int ret;
4256 u64 flags = 0; 4392 u64 flags = 0;
4257 4393
4258 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4394 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4259 empty_size, hint_byte, search_end, 4395 empty_size, hint_byte, search_end,
4260 ins, 0); 4396 ins, 0);
4261 if (ret) 4397 if (ret)
4262 return ret; 4398 return ret;
4263 4399
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4268 } else 4404 } else
4269 BUG_ON(parent > 0); 4405 BUG_ON(parent > 0);
4270 4406
4271 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4272 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4407 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4273 struct btrfs_delayed_extent_op *extent_op; 4408 struct btrfs_delayed_extent_op *extent_op;
4274 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4409 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4347 return buf; 4482 return buf;
4348} 4483}
4349 4484
4350#if 0 4485struct walk_control {
4351int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4486 u64 refs[BTRFS_MAX_LEVEL];
4352 struct btrfs_root *root, struct extent_buffer *leaf) 4487 u64 flags[BTRFS_MAX_LEVEL];
4353{ 4488 struct btrfs_key update_progress;
4354 u64 disk_bytenr; 4489 int stage;
4355 u64 num_bytes; 4490 int level;
4356 struct btrfs_key key; 4491 int shared_level;
4357 struct btrfs_file_extent_item *fi; 4492 int update_ref;
4358 u32 nritems; 4493 int keep_locks;
4359 int i; 4494 int reada_slot;
4360 int ret; 4495 int reada_count;
4361 4496};
4362 BUG_ON(!btrfs_is_leaf(leaf));
4363 nritems = btrfs_header_nritems(leaf);
4364
4365 for (i = 0; i < nritems; i++) {
4366 cond_resched();
4367 btrfs_item_key_to_cpu(leaf, &key, i);
4368
4369 /* only extents have references, skip everything else */
4370 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4371 continue;
4372
4373 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4374
4375 /* inline extents live in the btree, they don't have refs */
4376 if (btrfs_file_extent_type(leaf, fi) ==
4377 BTRFS_FILE_EXTENT_INLINE)
4378 continue;
4379
4380 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4381
4382 /* holes don't have refs */
4383 if (disk_bytenr == 0)
4384 continue;
4385
4386 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4387 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
4388 leaf->start, 0, key.objectid, 0);
4389 BUG_ON(ret);
4390 }
4391 return 0;
4392}
4393
4394static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4395 struct btrfs_root *root,
4396 struct btrfs_leaf_ref *ref)
4397{
4398 int i;
4399 int ret;
4400 struct btrfs_extent_info *info;
4401 struct refsort *sorted;
4402
4403 if (ref->nritems == 0)
4404 return 0;
4405
4406 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
4407 for (i = 0; i < ref->nritems; i++) {
4408 sorted[i].bytenr = ref->extents[i].bytenr;
4409 sorted[i].slot = i;
4410 }
4411 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
4412
4413 /*
4414 * the items in the ref were sorted when the ref was inserted
4415 * into the ref cache, so this is already in order
4416 */
4417 for (i = 0; i < ref->nritems; i++) {
4418 info = ref->extents + sorted[i].slot;
4419 ret = btrfs_free_extent(trans, root, info->bytenr,
4420 info->num_bytes, ref->bytenr,
4421 ref->owner, ref->generation,
4422 info->objectid, 0);
4423
4424 atomic_inc(&root->fs_info->throttle_gen);
4425 wake_up(&root->fs_info->transaction_throttle);
4426 cond_resched();
4427
4428 BUG_ON(ret);
4429 info++;
4430 }
4431
4432 kfree(sorted);
4433 return 0;
4434}
4435
4436
4437static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
4438 struct btrfs_root *root, u64 start,
4439 u64 len, u32 *refs)
4440{
4441 int ret;
4442
4443 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
4444 BUG_ON(ret);
4445
4446#if 0 /* some debugging code in case we see problems here */
4447 /* if the refs count is one, it won't get increased again. But
4448 * if the ref count is > 1, someone may be decreasing it at
4449 * the same time we are.
4450 */
4451 if (*refs != 1) {
4452 struct extent_buffer *eb = NULL;
4453 eb = btrfs_find_create_tree_block(root, start, len);
4454 if (eb)
4455 btrfs_tree_lock(eb);
4456
4457 mutex_lock(&root->fs_info->alloc_mutex);
4458 ret = lookup_extent_ref(NULL, root, start, len, refs);
4459 BUG_ON(ret);
4460 mutex_unlock(&root->fs_info->alloc_mutex);
4461
4462 if (eb) {
4463 btrfs_tree_unlock(eb);
4464 free_extent_buffer(eb);
4465 }
4466 if (*refs == 1) {
4467 printk(KERN_ERR "btrfs block %llu went down to one "
4468 "during drop_snap\n", (unsigned long long)start);
4469 }
4470
4471 }
4472#endif
4473
4474 cond_resched();
4475 return ret;
4476}
4477 4497
4498#define DROP_REFERENCE 1
4499#define UPDATE_BACKREF 2
4478 4500
4479/* 4501static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4480 * this is used while deleting old snapshots, and it drops the refs 4502 struct btrfs_root *root,
4481 * on a whole subtree starting from a level 1 node. 4503 struct walk_control *wc,
4482 * 4504 struct btrfs_path *path)
4483 * The idea is to sort all the leaf pointers, and then drop the
4484 * ref on all the leaves in order. Most of the time the leaves
4485 * will have ref cache entries, so no leaf IOs will be required to
4486 * find the extents they have references on.
4487 *
4488 * For each leaf, any references it has are also dropped in order
4489 *
4490 * This ends up dropping the references in something close to optimal
4491 * order for reading and modifying the extent allocation tree.
4492 */
4493static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4494 struct btrfs_root *root,
4495 struct btrfs_path *path)
4496{ 4505{
4497 u64 bytenr; 4506 u64 bytenr;
4498 u64 root_owner; 4507 u64 generation;
4499 u64 root_gen; 4508 u64 refs;
4500 struct extent_buffer *eb = path->nodes[1]; 4509 u64 last = 0;
4501 struct extent_buffer *leaf; 4510 u32 nritems;
4502 struct btrfs_leaf_ref *ref; 4511 u32 blocksize;
4503 struct refsort *sorted = NULL; 4512 struct btrfs_key key;
4504 int nritems = btrfs_header_nritems(eb); 4513 struct extent_buffer *eb;
4505 int ret; 4514 int ret;
4506 int i; 4515 int slot;
4507 int refi = 0; 4516 int nread = 0;
4508 int slot = path->slots[1];
4509 u32 blocksize = btrfs_level_size(root, 0);
4510 u32 refs;
4511
4512 if (nritems == 0)
4513 goto out;
4514
4515 root_owner = btrfs_header_owner(eb);
4516 root_gen = btrfs_header_generation(eb);
4517 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
4518 4517
4519 /* 4518 if (path->slots[wc->level] < wc->reada_slot) {
4520 * step one, sort all the leaf pointers so we don't scribble 4519 wc->reada_count = wc->reada_count * 2 / 3;
4521 * randomly into the extent allocation tree 4520 wc->reada_count = max(wc->reada_count, 2);
4522 */ 4521 } else {
4523 for (i = slot; i < nritems; i++) { 4522 wc->reada_count = wc->reada_count * 3 / 2;
4524 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4523 wc->reada_count = min_t(int, wc->reada_count,
4525 sorted[refi].slot = i; 4524 BTRFS_NODEPTRS_PER_BLOCK(root));
4526 refi++;
4527 } 4525 }
4528 4526
4529 /* 4527 eb = path->nodes[wc->level];
4530 * nritems won't be zero, but if we're picking up drop_snapshot 4528 nritems = btrfs_header_nritems(eb);
4531 * after a crash, slot might be > 0, so double check things 4529 blocksize = btrfs_level_size(root, wc->level - 1);
4532 * just in case.
4533 */
4534 if (refi == 0)
4535 goto out;
4536 4530
4537 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4531 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4532 if (nread >= wc->reada_count)
4533 break;
4538 4534
4539 /* 4535 cond_resched();
4540 * the first loop frees everything the leaves point to 4536 bytenr = btrfs_node_blockptr(eb, slot);
4541 */ 4537 generation = btrfs_node_ptr_generation(eb, slot);
4542 for (i = 0; i < refi; i++) {
4543 u64 ptr_gen;
4544 4538
4545 bytenr = sorted[i].bytenr; 4539 if (slot == path->slots[wc->level])
4540 goto reada;
4546 4541
4547 /* 4542 if (wc->stage == UPDATE_BACKREF &&
4548 * check the reference count on this leaf. If it is > 1 4543 generation <= root->root_key.offset)
4549 * we just decrement it below and don't update any
4550 * of the refs the leaf points to.
4551 */
4552 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4553 blocksize, &refs);
4554 BUG_ON(ret);
4555 if (refs != 1)
4556 continue; 4544 continue;
4557 4545
4558 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4546 if (wc->stage == DROP_REFERENCE) {
4559 4547 ret = btrfs_lookup_extent_info(trans, root,
4560 /* 4548 bytenr, blocksize,
4561 * the leaf only had one reference, which means the 4549 &refs, NULL);
4562 * only thing pointing to this leaf is the snapshot
4563 * we're deleting. It isn't possible for the reference
4564 * count to increase again later
4565 *
4566 * The reference cache is checked for the leaf,
4567 * and if found we'll be able to drop any refs held by
4568 * the leaf without needing to read it in.
4569 */
4570 ref = btrfs_lookup_leaf_ref(root, bytenr);
4571 if (ref && ref->generation != ptr_gen) {
4572 btrfs_free_leaf_ref(root, ref);
4573 ref = NULL;
4574 }
4575 if (ref) {
4576 ret = cache_drop_leaf_ref(trans, root, ref);
4577 BUG_ON(ret);
4578 btrfs_remove_leaf_ref(root, ref);
4579 btrfs_free_leaf_ref(root, ref);
4580 } else {
4581 /*
4582 * the leaf wasn't in the reference cache, so
4583 * we have to read it.
4584 */
4585 leaf = read_tree_block(root, bytenr, blocksize,
4586 ptr_gen);
4587 ret = btrfs_drop_leaf_ref(trans, root, leaf);
4588 BUG_ON(ret); 4550 BUG_ON(ret);
4589 free_extent_buffer(leaf); 4551 BUG_ON(refs == 0);
4590 } 4552 if (refs == 1)
4591 atomic_inc(&root->fs_info->throttle_gen); 4553 goto reada;
4592 wake_up(&root->fs_info->transaction_throttle);
4593 cond_resched();
4594 }
4595
4596 /*
4597 * run through the loop again to free the refs on the leaves.
4598 * This is faster than doing it in the loop above because
4599 * the leaves are likely to be clustered together. We end up
4600 * working in nice chunks on the extent allocation tree.
4601 */
4602 for (i = 0; i < refi; i++) {
4603 bytenr = sorted[i].bytenr;
4604 ret = btrfs_free_extent(trans, root, bytenr,
4605 blocksize, eb->start,
4606 root_owner, root_gen, 0, 1);
4607 BUG_ON(ret);
4608
4609 atomic_inc(&root->fs_info->throttle_gen);
4610 wake_up(&root->fs_info->transaction_throttle);
4611 cond_resched();
4612 }
4613out:
4614 kfree(sorted);
4615
4616 /*
4617 * update the path to show we've processed the entire level 1
4618 * node. This will get saved into the root's drop_snapshot_progress
4619 * field so these drops are not repeated again if this transaction
4620 * commits.
4621 */
4622 path->slots[1] = nritems;
4623 return 0;
4624}
4625
4626/*
4627 * helper function for drop_snapshot, this walks down the tree dropping ref
4628 * counts as it goes.
4629 */
4630static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4631 struct btrfs_root *root,
4632 struct btrfs_path *path, int *level)
4633{
4634 u64 root_owner;
4635 u64 root_gen;
4636 u64 bytenr;
4637 u64 ptr_gen;
4638 struct extent_buffer *next;
4639 struct extent_buffer *cur;
4640 struct extent_buffer *parent;
4641 u32 blocksize;
4642 int ret;
4643 u32 refs;
4644
4645 WARN_ON(*level < 0);
4646 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4647 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4648 path->nodes[*level]->len, &refs);
4649 BUG_ON(ret);
4650 if (refs > 1)
4651 goto out;
4652
4653 /*
4654 * walk down to the last node level and free all the leaves
4655 */
4656 while (*level >= 0) {
4657 WARN_ON(*level < 0);
4658 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4659 cur = path->nodes[*level];
4660
4661 if (btrfs_header_level(cur) != *level)
4662 WARN_ON(1);
4663 4554
4664 if (path->slots[*level] >= 4555 if (!wc->update_ref ||
4665 btrfs_header_nritems(cur)) 4556 generation <= root->root_key.offset)
4666 break; 4557 continue;
4667 4558 btrfs_node_key_to_cpu(eb, &key, slot);
4668 /* the new code goes down to level 1 and does all the 4559 ret = btrfs_comp_cpu_keys(&key,
4669 * leaves pointed to that node in bulk. So, this check 4560 &wc->update_progress);
4670 * for level 0 will always be false. 4561 if (ret < 0)
4671 * 4562 continue;
4672 * But, the disk format allows the drop_snapshot_progress
4673 * field in the root to leave things in a state where
4674 * a leaf will need cleaning up here. If someone crashes
4675 * with the old code and then boots with the new code,
4676 * we might find a leaf here.
4677 */
4678 if (*level == 0) {
4679 ret = btrfs_drop_leaf_ref(trans, root, cur);
4680 BUG_ON(ret);
4681 break;
4682 } 4563 }
4683 4564reada:
4684 /* 4565 ret = readahead_tree_block(root, bytenr, blocksize,
4685 * once we get to level one, process the whole node 4566 generation);
4686 * at once, including everything below it. 4567 if (ret)
4687 */
4688 if (*level == 1) {
4689 ret = drop_level_one_refs(trans, root, path);
4690 BUG_ON(ret);
4691 break; 4568 break;
4692 } 4569 last = bytenr + blocksize;
4693 4570 nread++;
4694 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
4695 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4696 blocksize = btrfs_level_size(root, *level - 1);
4697
4698 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4699 blocksize, &refs);
4700 BUG_ON(ret);
4701
4702 /*
4703 * if there is more than one reference, we don't need
4704 * to read that node to drop any references it has. We
4705 * just drop the ref we hold on that node and move on to the
4706 * next slot in this level.
4707 */
4708 if (refs != 1) {
4709 parent = path->nodes[*level];
4710 root_owner = btrfs_header_owner(parent);
4711 root_gen = btrfs_header_generation(parent);
4712 path->slots[*level]++;
4713
4714 ret = btrfs_free_extent(trans, root, bytenr,
4715 blocksize, parent->start,
4716 root_owner, root_gen,
4717 *level - 1, 1);
4718 BUG_ON(ret);
4719
4720 atomic_inc(&root->fs_info->throttle_gen);
4721 wake_up(&root->fs_info->transaction_throttle);
4722 cond_resched();
4723
4724 continue;
4725 }
4726
4727 /*
4728 * we need to keep freeing things in the next level down.
4729 * read the block and loop around to process it
4730 */
4731 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4732 WARN_ON(*level <= 0);
4733 if (path->nodes[*level-1])
4734 free_extent_buffer(path->nodes[*level-1]);
4735 path->nodes[*level-1] = next;
4736 *level = btrfs_header_level(next);
4737 path->slots[*level] = 0;
4738 cond_resched();
4739 } 4571 }
4740out: 4572 wc->reada_slot = slot;
4741 WARN_ON(*level < 0);
4742 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4743
4744 if (path->nodes[*level] == root->node) {
4745 parent = path->nodes[*level];
4746 bytenr = path->nodes[*level]->start;
4747 } else {
4748 parent = path->nodes[*level + 1];
4749 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
4750 }
4751
4752 blocksize = btrfs_level_size(root, *level);
4753 root_owner = btrfs_header_owner(parent);
4754 root_gen = btrfs_header_generation(parent);
4755
4756 /*
4757 * cleanup and free the reference on the last node
4758 * we processed
4759 */
4760 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4761 parent->start, root_owner, root_gen,
4762 *level, 1);
4763 free_extent_buffer(path->nodes[*level]);
4764 path->nodes[*level] = NULL;
4765
4766 *level += 1;
4767 BUG_ON(ret);
4768
4769 cond_resched();
4770 return 0;
4771} 4573}
4772#endif
4773
4774struct walk_control {
4775 u64 refs[BTRFS_MAX_LEVEL];
4776 u64 flags[BTRFS_MAX_LEVEL];
4777 struct btrfs_key update_progress;
4778 int stage;
4779 int level;
4780 int shared_level;
4781 int update_ref;
4782 int keep_locks;
4783};
4784
4785#define DROP_REFERENCE 1
4786#define UPDATE_BACKREF 2
4787 4574
4788/* 4575/*
4789 * hepler to process tree block while walking down the tree. 4576 * hepler to process tree block while walking down the tree.
4790 * 4577 *
4791 * when wc->stage == DROP_REFERENCE, this function checks
4792 * reference count of the block. if the block is shared and
4793 * we need update back refs for the subtree rooted at the
4794 * block, this function changes wc->stage to UPDATE_BACKREF
4795 *
4796 * when wc->stage == UPDATE_BACKREF, this function updates 4578 * when wc->stage == UPDATE_BACKREF, this function updates
4797 * back refs for pointers in the block. 4579 * back refs for pointers in the block.
4798 * 4580 *
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4805{ 4587{
4806 int level = wc->level; 4588 int level = wc->level;
4807 struct extent_buffer *eb = path->nodes[level]; 4589 struct extent_buffer *eb = path->nodes[level];
4808 struct btrfs_key key;
4809 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4590 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4810 int ret; 4591 int ret;
4811 4592
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4828 BUG_ON(wc->refs[level] == 0); 4609 BUG_ON(wc->refs[level] == 0);
4829 } 4610 }
4830 4611
4831 if (wc->stage == DROP_REFERENCE &&
4832 wc->update_ref && wc->refs[level] > 1) {
4833 BUG_ON(eb == root->node);
4834 BUG_ON(path->slots[level] > 0);
4835 if (level == 0)
4836 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4837 else
4838 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4839 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4840 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4841 wc->stage = UPDATE_BACKREF;
4842 wc->shared_level = level;
4843 }
4844 }
4845
4846 if (wc->stage == DROP_REFERENCE) { 4612 if (wc->stage == DROP_REFERENCE) {
4847 if (wc->refs[level] > 1) 4613 if (wc->refs[level] > 1)
4848 return 1; 4614 return 1;
@@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4879} 4645}
4880 4646
4881/* 4647/*
4648 * hepler to process tree block pointer.
4649 *
4650 * when wc->stage == DROP_REFERENCE, this function checks
4651 * reference count of the block pointed to. if the block
4652 * is shared and we need update back refs for the subtree
4653 * rooted at the block, this function changes wc->stage to
4654 * UPDATE_BACKREF. if the block is shared and there is no
4655 * need to update back, this function drops the reference
4656 * to the block.
4657 *
4658 * NOTE: return value 1 means we should stop walking down.
4659 */
4660static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4661 struct btrfs_root *root,
4662 struct btrfs_path *path,
4663 struct walk_control *wc)
4664{
4665 u64 bytenr;
4666 u64 generation;
4667 u64 parent;
4668 u32 blocksize;
4669 struct btrfs_key key;
4670 struct extent_buffer *next;
4671 int level = wc->level;
4672 int reada = 0;
4673 int ret = 0;
4674
4675 generation = btrfs_node_ptr_generation(path->nodes[level],
4676 path->slots[level]);
4677 /*
4678 * if the lower level block was created before the snapshot
4679 * was created, we know there is no need to update back refs
4680 * for the subtree
4681 */
4682 if (wc->stage == UPDATE_BACKREF &&
4683 generation <= root->root_key.offset)
4684 return 1;
4685
4686 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4687 blocksize = btrfs_level_size(root, level - 1);
4688
4689 next = btrfs_find_tree_block(root, bytenr, blocksize);
4690 if (!next) {
4691 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
4692 reada = 1;
4693 }
4694 btrfs_tree_lock(next);
4695 btrfs_set_lock_blocking(next);
4696
4697 if (wc->stage == DROP_REFERENCE) {
4698 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4699 &wc->refs[level - 1],
4700 &wc->flags[level - 1]);
4701 BUG_ON(ret);
4702 BUG_ON(wc->refs[level - 1] == 0);
4703
4704 if (wc->refs[level - 1] > 1) {
4705 if (!wc->update_ref ||
4706 generation <= root->root_key.offset)
4707 goto skip;
4708
4709 btrfs_node_key_to_cpu(path->nodes[level], &key,
4710 path->slots[level]);
4711 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4712 if (ret < 0)
4713 goto skip;
4714
4715 wc->stage = UPDATE_BACKREF;
4716 wc->shared_level = level - 1;
4717 }
4718 }
4719
4720 if (!btrfs_buffer_uptodate(next, generation)) {
4721 btrfs_tree_unlock(next);
4722 free_extent_buffer(next);
4723 next = NULL;
4724 }
4725
4726 if (!next) {
4727 if (reada && level == 1)
4728 reada_walk_down(trans, root, wc, path);
4729 next = read_tree_block(root, bytenr, blocksize, generation);
4730 btrfs_tree_lock(next);
4731 btrfs_set_lock_blocking(next);
4732 }
4733
4734 level--;
4735 BUG_ON(level != btrfs_header_level(next));
4736 path->nodes[level] = next;
4737 path->slots[level] = 0;
4738 path->locks[level] = 1;
4739 wc->level = level;
4740 if (wc->level == 1)
4741 wc->reada_slot = 0;
4742 return 0;
4743skip:
4744 wc->refs[level - 1] = 0;
4745 wc->flags[level - 1] = 0;
4746
4747 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4748 parent = path->nodes[level]->start;
4749 } else {
4750 BUG_ON(root->root_key.objectid !=
4751 btrfs_header_owner(path->nodes[level]));
4752 parent = 0;
4753 }
4754
4755 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4756 root->root_key.objectid, level - 1, 0);
4757 BUG_ON(ret);
4758
4759 btrfs_tree_unlock(next);
4760 free_extent_buffer(next);
4761 return 1;
4762}
4763
4764/*
4882 * hepler to process tree block while walking up the tree. 4765 * hepler to process tree block while walking up the tree.
4883 * 4766 *
4884 * when wc->stage == DROP_REFERENCE, this function drops 4767 * when wc->stage == DROP_REFERENCE, this function drops
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4905 if (level < wc->shared_level) 4788 if (level < wc->shared_level)
4906 goto out; 4789 goto out;
4907 4790
4908 BUG_ON(wc->refs[level] <= 1);
4909 ret = find_next_key(path, level + 1, &wc->update_progress); 4791 ret = find_next_key(path, level + 1, &wc->update_progress);
4910 if (ret > 0) 4792 if (ret > 0)
4911 wc->update_ref = 0; 4793 wc->update_ref = 0;
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4936 path->locks[level] = 0; 4818 path->locks[level] = 0;
4937 return 1; 4819 return 1;
4938 } 4820 }
4939 } else {
4940 BUG_ON(level != 0);
4941 } 4821 }
4942 } 4822 }
4943 4823
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4990 struct btrfs_path *path, 4870 struct btrfs_path *path,
4991 struct walk_control *wc) 4871 struct walk_control *wc)
4992{ 4872{
4993 struct extent_buffer *next;
4994 struct extent_buffer *cur;
4995 u64 bytenr;
4996 u64 ptr_gen;
4997 u32 blocksize;
4998 int level = wc->level; 4873 int level = wc->level;
4999 int ret; 4874 int ret;
5000 4875
5001 while (level >= 0) { 4876 while (level >= 0) {
5002 cur = path->nodes[level]; 4877 if (path->slots[level] >=
5003 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 4878 btrfs_header_nritems(path->nodes[level]))
4879 break;
5004 4880
5005 ret = walk_down_proc(trans, root, path, wc); 4881 ret = walk_down_proc(trans, root, path, wc);
5006 if (ret > 0) 4882 if (ret > 0)
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5009 if (level == 0) 4885 if (level == 0)
5010 break; 4886 break;
5011 4887
5012 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 4888 ret = do_walk_down(trans, root, path, wc);
5013 blocksize = btrfs_level_size(root, level - 1); 4889 if (ret > 0) {
5014 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 4890 path->slots[level]++;
5015 4891 continue;
5016 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4892 }
5017 btrfs_tree_lock(next); 4893 level = wc->level;
5018 btrfs_set_lock_blocking(next);
5019
5020 level--;
5021 BUG_ON(level != btrfs_header_level(next));
5022 path->nodes[level] = next;
5023 path->slots[level] = 0;
5024 path->locks[level] = 1;
5025 wc->level = level;
5026 } 4894 }
5027 return 0; 4895 return 0;
5028} 4896}
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5112 err = ret; 4980 err = ret;
5113 goto out; 4981 goto out;
5114 } 4982 }
5115 btrfs_node_key_to_cpu(path->nodes[level], &key, 4983 WARN_ON(ret > 0);
5116 path->slots[level]);
5117 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
5118 4984
5119 /* 4985 /*
5120 * unlock our path, this is safe because only this 4986 * unlock our path, this is safe because only this
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5149 wc->stage = DROP_REFERENCE; 5015 wc->stage = DROP_REFERENCE;
5150 wc->update_ref = update_ref; 5016 wc->update_ref = update_ref;
5151 wc->keep_locks = 0; 5017 wc->keep_locks = 0;
5018 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5152 5019
5153 while (1) { 5020 while (1) {
5154 ret = walk_down_tree(trans, root, path, wc); 5021 ret = walk_down_tree(trans, root, path, wc);
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5201 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5068 ret = btrfs_del_root(trans, tree_root, &root->root_key);
5202 BUG_ON(ret); 5069 BUG_ON(ret);
5203 5070
5204 free_extent_buffer(root->node); 5071 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5205 free_extent_buffer(root->commit_root); 5072 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
5206 kfree(root); 5073 NULL, NULL);
5074 BUG_ON(ret < 0);
5075 if (ret > 0) {
5076 ret = btrfs_del_orphan_item(trans, tree_root,
5077 root->root_key.objectid);
5078 BUG_ON(ret);
5079 }
5080 }
5081
5082 if (root->in_radix) {
5083 btrfs_free_fs_root(tree_root->fs_info, root);
5084 } else {
5085 free_extent_buffer(root->node);
5086 free_extent_buffer(root->commit_root);
5087 kfree(root);
5088 }
5207out: 5089out:
5208 btrfs_end_transaction(trans, tree_root); 5090 btrfs_end_transaction(trans, tree_root);
5209 kfree(wc); 5091 kfree(wc);
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5255 wc->stage = DROP_REFERENCE; 5137 wc->stage = DROP_REFERENCE;
5256 wc->update_ref = 0; 5138 wc->update_ref = 0;
5257 wc->keep_locks = 1; 5139 wc->keep_locks = 1;
5140 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5258 5141
5259 while (1) { 5142 while (1) {
5260 wret = walk_down_tree(trans, root, path, wc); 5143 wret = walk_down_tree(trans, root, path, wc);
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
5397 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5280 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
5398 while (1) { 5281 while (1) {
5399 int ret; 5282 int ret;
5400 spin_lock(&em_tree->lock); 5283 write_lock(&em_tree->lock);
5401 ret = add_extent_mapping(em_tree, em); 5284 ret = add_extent_mapping(em_tree, em);
5402 spin_unlock(&em_tree->lock); 5285 write_unlock(&em_tree->lock);
5403 if (ret != -EEXIST) { 5286 if (ret != -EEXIST) {
5404 free_extent_map(em); 5287 free_extent_map(em);
5405 break; 5288 break;
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6842 return 0; 6725 return 0;
6843} 6726}
6844 6727
6845#if 0 6728/*
6846static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6729 * checks to see if its even possible to relocate this block group.
6847 struct btrfs_root *root, 6730 *
6848 u64 objectid, u64 size) 6731 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
6849{ 6732 * ok to go ahead and try.
6850 struct btrfs_path *path; 6733 */
6851 struct btrfs_inode_item *item; 6734int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6852 struct extent_buffer *leaf;
6853 int ret;
6854
6855 path = btrfs_alloc_path();
6856 if (!path)
6857 return -ENOMEM;
6858
6859 path->leave_spinning = 1;
6860 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
6861 if (ret)
6862 goto out;
6863
6864 leaf = path->nodes[0];
6865 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
6866 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
6867 btrfs_set_inode_generation(leaf, item, 1);
6868 btrfs_set_inode_size(leaf, item, size);
6869 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
6870 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
6871 btrfs_mark_buffer_dirty(leaf);
6872 btrfs_release_path(root, path);
6873out:
6874 btrfs_free_path(path);
6875 return ret;
6876}
6877
6878static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
6879 struct btrfs_block_group_cache *group)
6880{ 6735{
6881 struct inode *inode = NULL; 6736 struct btrfs_block_group_cache *block_group;
6882 struct btrfs_trans_handle *trans; 6737 struct btrfs_space_info *space_info;
6883 struct btrfs_root *root; 6738 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
6884 struct btrfs_key root_key; 6739 struct btrfs_device *device;
6885 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6740 int full = 0;
6886 int err = 0; 6741 int ret = 0;
6887 6742
6888 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6743 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
6889 root_key.type = BTRFS_ROOT_ITEM_KEY;
6890 root_key.offset = (u64)-1;
6891 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
6892 if (IS_ERR(root))
6893 return ERR_CAST(root);
6894 6744
6895 trans = btrfs_start_transaction(root, 1); 6745 /* odd, couldn't find the block group, leave it alone */
6896 BUG_ON(!trans); 6746 if (!block_group)
6747 return -1;
6897 6748
6898 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6749 /* no bytes used, we're good */
6899 if (err) 6750 if (!btrfs_block_group_used(&block_group->item))
6900 goto out; 6751 goto out;
6901 6752
6902 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6753 space_info = block_group->space_info;
6903 BUG_ON(err); 6754 spin_lock(&space_info->lock);
6904
6905 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
6906 group->key.offset, 0, group->key.offset,
6907 0, 0, 0);
6908 BUG_ON(err);
6909
6910 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
6911 if (inode->i_state & I_NEW) {
6912 BTRFS_I(inode)->root = root;
6913 BTRFS_I(inode)->location.objectid = objectid;
6914 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
6915 BTRFS_I(inode)->location.offset = 0;
6916 btrfs_read_locked_inode(inode);
6917 unlock_new_inode(inode);
6918 BUG_ON(is_bad_inode(inode));
6919 } else {
6920 BUG_ON(1);
6921 }
6922 BTRFS_I(inode)->index_cnt = group->key.objectid;
6923
6924 err = btrfs_orphan_add(trans, inode);
6925out:
6926 btrfs_end_transaction(trans, root);
6927 if (err) {
6928 if (inode)
6929 iput(inode);
6930 inode = ERR_PTR(err);
6931 }
6932 return inode;
6933}
6934
6935int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
6936{
6937
6938 struct btrfs_ordered_sum *sums;
6939 struct btrfs_sector_sum *sector_sum;
6940 struct btrfs_ordered_extent *ordered;
6941 struct btrfs_root *root = BTRFS_I(inode)->root;
6942 struct list_head list;
6943 size_t offset;
6944 int ret;
6945 u64 disk_bytenr;
6946
6947 INIT_LIST_HEAD(&list);
6948
6949 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
6950 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
6951
6952 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
6953 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
6954 disk_bytenr + len - 1, &list);
6955
6956 while (!list_empty(&list)) {
6957 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
6958 list_del_init(&sums->list);
6959
6960 sector_sum = sums->sums;
6961 sums->bytenr = ordered->start;
6962 6755
6963 offset = 0; 6756 full = space_info->full;
6964 while (offset < sums->len) {
6965 sector_sum->bytenr += ordered->start - disk_bytenr;
6966 sector_sum++;
6967 offset += root->sectorsize;
6968 }
6969 6757
6970 btrfs_add_ordered_sum(inode, ordered, sums); 6758 /*
6759 * if this is the last block group we have in this space, we can't
6760 * relocate it unless we're able to allocate a new chunk below.
6761 *
6762 * Otherwise, we need to make sure we have room in the space to handle
6763 * all of the extents from this block group. If we can, we're good
6764 */
6765 if ((space_info->total_bytes != block_group->key.offset) &&
6766 (space_info->bytes_used + space_info->bytes_reserved +
6767 space_info->bytes_pinned + space_info->bytes_readonly +
6768 btrfs_block_group_used(&block_group->item) <
6769 space_info->total_bytes)) {
6770 spin_unlock(&space_info->lock);
6771 goto out;
6971 } 6772 }
6972 btrfs_put_ordered_extent(ordered); 6773 spin_unlock(&space_info->lock);
6973 return 0;
6974}
6975
6976int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
6977{
6978 struct btrfs_trans_handle *trans;
6979 struct btrfs_path *path;
6980 struct btrfs_fs_info *info = root->fs_info;
6981 struct extent_buffer *leaf;
6982 struct inode *reloc_inode;
6983 struct btrfs_block_group_cache *block_group;
6984 struct btrfs_key key;
6985 u64 skipped;
6986 u64 cur_byte;
6987 u64 total_found;
6988 u32 nritems;
6989 int ret;
6990 int progress;
6991 int pass = 0;
6992
6993 root = root->fs_info->extent_root;
6994
6995 block_group = btrfs_lookup_block_group(info, group_start);
6996 BUG_ON(!block_group);
6997
6998 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
6999 (unsigned long long)block_group->key.objectid,
7000 (unsigned long long)block_group->flags);
7001
7002 path = btrfs_alloc_path();
7003 BUG_ON(!path);
7004
7005 reloc_inode = create_reloc_inode(info, block_group);
7006 BUG_ON(IS_ERR(reloc_inode));
7007
7008 __alloc_chunk_for_shrink(root, block_group, 1);
7009 set_block_group_readonly(block_group);
7010
7011 btrfs_start_delalloc_inodes(info->tree_root);
7012 btrfs_wait_ordered_extents(info->tree_root, 0);
7013again:
7014 skipped = 0;
7015 total_found = 0;
7016 progress = 0;
7017 key.objectid = block_group->key.objectid;
7018 key.offset = 0;
7019 key.type = 0;
7020 cur_byte = key.objectid;
7021
7022 trans = btrfs_start_transaction(info->tree_root, 1);
7023 btrfs_commit_transaction(trans, info->tree_root);
7024 6774
7025 mutex_lock(&root->fs_info->cleaner_mutex); 6775 /*
7026 btrfs_clean_old_snapshots(info->tree_root); 6776 * ok we don't have enough space, but maybe we have free space on our
7027 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 6777 * devices to allocate new chunks for relocation, so loop through our
7028 mutex_unlock(&root->fs_info->cleaner_mutex); 6778 * alloc devices and guess if we have enough space. However, if we
6779 * were marked as full, then we know there aren't enough chunks, and we
6780 * can just return.
6781 */
6782 ret = -1;
6783 if (full)
6784 goto out;
7029 6785
7030 trans = btrfs_start_transaction(info->tree_root, 1); 6786 mutex_lock(&root->fs_info->chunk_mutex);
7031 btrfs_commit_transaction(trans, info->tree_root); 6787 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
6788 u64 min_free = btrfs_block_group_used(&block_group->item);
6789 u64 dev_offset, max_avail;
7032 6790
7033 while (1) { 6791 /*
7034 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6792 * check to make sure we can actually find a chunk with enough
7035 if (ret < 0) 6793 * space to fit our block group in.
7036 goto out; 6794 */
7037next: 6795 if (device->total_bytes > device->bytes_used + min_free) {
7038 leaf = path->nodes[0]; 6796 ret = find_free_dev_extent(NULL, device, min_free,
7039 nritems = btrfs_header_nritems(leaf); 6797 &dev_offset, &max_avail);
7040 if (path->slots[0] >= nritems) { 6798 if (!ret)
7041 ret = btrfs_next_leaf(root, path);
7042 if (ret < 0)
7043 goto out;
7044 if (ret == 1) {
7045 ret = 0;
7046 break; 6799 break;
7047 } 6800 ret = -1;
7048 leaf = path->nodes[0];
7049 nritems = btrfs_header_nritems(leaf);
7050 } 6801 }
7051
7052 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7053
7054 if (key.objectid >= block_group->key.objectid +
7055 block_group->key.offset)
7056 break;
7057
7058 if (progress && need_resched()) {
7059 btrfs_release_path(root, path);
7060 cond_resched();
7061 progress = 0;
7062 continue;
7063 }
7064 progress = 1;
7065
7066 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
7067 key.objectid + key.offset <= cur_byte) {
7068 path->slots[0]++;
7069 goto next;
7070 }
7071
7072 total_found++;
7073 cur_byte = key.objectid + key.offset;
7074 btrfs_release_path(root, path);
7075
7076 __alloc_chunk_for_shrink(root, block_group, 0);
7077 ret = relocate_one_extent(root, path, &key, block_group,
7078 reloc_inode, pass);
7079 BUG_ON(ret < 0);
7080 if (ret > 0)
7081 skipped++;
7082
7083 key.objectid = cur_byte;
7084 key.type = 0;
7085 key.offset = 0;
7086 }
7087
7088 btrfs_release_path(root, path);
7089
7090 if (pass == 0) {
7091 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
7092 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
7093 }
7094
7095 if (total_found > 0) {
7096 printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
7097 (unsigned long long)total_found, pass);
7098 pass++;
7099 if (total_found == skipped && pass > 2) {
7100 iput(reloc_inode);
7101 reloc_inode = create_reloc_inode(info, block_group);
7102 pass = 0;
7103 }
7104 goto again;
7105 } 6802 }
7106 6803 mutex_unlock(&root->fs_info->chunk_mutex);
7107 /* delete reloc_inode */
7108 iput(reloc_inode);
7109
7110 /* unpin extents in this range */
7111 trans = btrfs_start_transaction(info->tree_root, 1);
7112 btrfs_commit_transaction(trans, info->tree_root);
7113
7114 spin_lock(&block_group->lock);
7115 WARN_ON(block_group->pinned > 0);
7116 WARN_ON(block_group->reserved > 0);
7117 WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
7118 spin_unlock(&block_group->lock);
7119 btrfs_put_block_group(block_group);
7120 ret = 0;
7121out: 6804out:
7122 btrfs_free_path(path); 6805 btrfs_put_block_group(block_group);
7123 return ret; 6806 return ret;
7124} 6807}
7125#endif
7126 6808
7127static int find_first_block_group(struct btrfs_root *root, 6809static int find_first_block_group(struct btrfs_root *root,
7128 struct btrfs_path *path, struct btrfs_key *key) 6810 struct btrfs_path *path, struct btrfs_key *key)
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7165{ 6847{
7166 struct btrfs_block_group_cache *block_group; 6848 struct btrfs_block_group_cache *block_group;
7167 struct btrfs_space_info *space_info; 6849 struct btrfs_space_info *space_info;
6850 struct btrfs_caching_control *caching_ctl;
7168 struct rb_node *n; 6851 struct rb_node *n;
7169 6852
6853 down_write(&info->extent_commit_sem);
6854 while (!list_empty(&info->caching_block_groups)) {
6855 caching_ctl = list_entry(info->caching_block_groups.next,
6856 struct btrfs_caching_control, list);
6857 list_del(&caching_ctl->list);
6858 put_caching_control(caching_ctl);
6859 }
6860 up_write(&info->extent_commit_sem);
6861
7170 spin_lock(&info->block_group_cache_lock); 6862 spin_lock(&info->block_group_cache_lock);
7171 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6863 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
7172 block_group = rb_entry(n, struct btrfs_block_group_cache, 6864 block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7180 up_write(&block_group->space_info->groups_sem); 6872 up_write(&block_group->space_info->groups_sem);
7181 6873
7182 if (block_group->cached == BTRFS_CACHE_STARTED) 6874 if (block_group->cached == BTRFS_CACHE_STARTED)
7183 wait_event(block_group->caching_q, 6875 wait_block_group_cache_done(block_group);
7184 block_group_cache_done(block_group));
7185 6876
7186 btrfs_remove_free_space_cache(block_group); 6877 btrfs_remove_free_space_cache(block_group);
7187 6878
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7251 spin_lock_init(&cache->lock); 6942 spin_lock_init(&cache->lock);
7252 spin_lock_init(&cache->tree_lock); 6943 spin_lock_init(&cache->tree_lock);
7253 cache->fs_info = info; 6944 cache->fs_info = info;
7254 init_waitqueue_head(&cache->caching_q);
7255 INIT_LIST_HEAD(&cache->list); 6945 INIT_LIST_HEAD(&cache->list);
7256 INIT_LIST_HEAD(&cache->cluster_list); 6946 INIT_LIST_HEAD(&cache->cluster_list);
7257 6947
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7273 cache->flags = btrfs_block_group_flags(&cache->item); 6963 cache->flags = btrfs_block_group_flags(&cache->item);
7274 cache->sectorsize = root->sectorsize; 6964 cache->sectorsize = root->sectorsize;
7275 6965
7276 remove_sb_from_cache(root, cache);
7277
7278 /* 6966 /*
7279 * check for two cases, either we are full, and therefore 6967 * check for two cases, either we are full, and therefore
7280 * don't need to bother with the caching work since we won't 6968 * don't need to bother with the caching work since we won't
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7283 * time, particularly in the full case. 6971 * time, particularly in the full case.
7284 */ 6972 */
7285 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6973 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
6974 exclude_super_stripes(root, cache);
6975 cache->last_byte_to_unpin = (u64)-1;
7286 cache->cached = BTRFS_CACHE_FINISHED; 6976 cache->cached = BTRFS_CACHE_FINISHED;
6977 free_excluded_extents(root, cache);
7287 } else if (btrfs_block_group_used(&cache->item) == 0) { 6978 } else if (btrfs_block_group_used(&cache->item) == 0) {
6979 exclude_super_stripes(root, cache);
6980 cache->last_byte_to_unpin = (u64)-1;
7288 cache->cached = BTRFS_CACHE_FINISHED; 6981 cache->cached = BTRFS_CACHE_FINISHED;
7289 add_new_free_space(cache, root->fs_info, 6982 add_new_free_space(cache, root->fs_info,
7290 found_key.objectid, 6983 found_key.objectid,
7291 found_key.objectid + 6984 found_key.objectid +
7292 found_key.offset); 6985 found_key.offset);
6986 free_excluded_extents(root, cache);
7293 } 6987 }
7294 6988
7295 ret = update_space_info(info, cache->flags, found_key.offset, 6989 ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7297 &space_info); 6991 &space_info);
7298 BUG_ON(ret); 6992 BUG_ON(ret);
7299 cache->space_info = space_info; 6993 cache->space_info = space_info;
6994 spin_lock(&cache->space_info->lock);
6995 cache->space_info->bytes_super += cache->bytes_super;
6996 spin_unlock(&cache->space_info->lock);
6997
7300 down_write(&space_info->groups_sem); 6998 down_write(&space_info->groups_sem);
7301 list_add_tail(&cache->list, &space_info->block_groups); 6999 list_add_tail(&cache->list, &space_info->block_groups);
7302 up_write(&space_info->groups_sem); 7000 up_write(&space_info->groups_sem);
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7346 atomic_set(&cache->count, 1); 7044 atomic_set(&cache->count, 1);
7347 spin_lock_init(&cache->lock); 7045 spin_lock_init(&cache->lock);
7348 spin_lock_init(&cache->tree_lock); 7046 spin_lock_init(&cache->tree_lock);
7349 init_waitqueue_head(&cache->caching_q);
7350 INIT_LIST_HEAD(&cache->list); 7047 INIT_LIST_HEAD(&cache->list);
7351 INIT_LIST_HEAD(&cache->cluster_list); 7048 INIT_LIST_HEAD(&cache->cluster_list);
7352 7049
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7355 cache->flags = type; 7052 cache->flags = type;
7356 btrfs_set_block_group_flags(&cache->item, type); 7053 btrfs_set_block_group_flags(&cache->item, type);
7357 7054
7055 cache->last_byte_to_unpin = (u64)-1;
7358 cache->cached = BTRFS_CACHE_FINISHED; 7056 cache->cached = BTRFS_CACHE_FINISHED;
7359 remove_sb_from_cache(root, cache); 7057 exclude_super_stripes(root, cache);
7360 7058
7361 add_new_free_space(cache, root->fs_info, chunk_offset, 7059 add_new_free_space(cache, root->fs_info, chunk_offset,
7362 chunk_offset + size); 7060 chunk_offset + size);
7363 7061
7062 free_excluded_extents(root, cache);
7063
7364 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7064 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7365 &cache->space_info); 7065 &cache->space_info);
7366 BUG_ON(ret); 7066 BUG_ON(ret);
7067
7068 spin_lock(&cache->space_info->lock);
7069 cache->space_info->bytes_super += cache->bytes_super;
7070 spin_unlock(&cache->space_info->lock);
7071
7367 down_write(&cache->space_info->groups_sem); 7072 down_write(&cache->space_info->groups_sem);
7368 list_add_tail(&cache->list, &cache->space_info->block_groups); 7073 list_add_tail(&cache->list, &cache->space_info->block_groups);
7369 up_write(&cache->space_info->groups_sem); 7074 up_write(&cache->space_info->groups_sem);
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7429 up_write(&block_group->space_info->groups_sem); 7134 up_write(&block_group->space_info->groups_sem);
7430 7135
7431 if (block_group->cached == BTRFS_CACHE_STARTED) 7136 if (block_group->cached == BTRFS_CACHE_STARTED)
7432 wait_event(block_group->caching_q, 7137 wait_block_group_cache_done(block_group);
7433 block_group_cache_done(block_group));
7434 7138
7435 btrfs_remove_free_space_cache(block_group); 7139 btrfs_remove_free_space_cache(block_group);
7436 7140