aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2038
1 files changed, 1018 insertions, 1020 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..359a754c782c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
32#include "locking.h" 32#include "locking.h"
33#include "free-space-cache.h" 33#include "free-space-cache.h"
34 34
35static int update_reserved_extents(struct btrfs_root *root,
36 u64 bytenr, u64 num, int reserve);
37static int update_block_group(struct btrfs_trans_handle *trans, 35static int update_block_group(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 36 struct btrfs_root *root,
39 u64 bytenr, u64 num_bytes, int alloc, 37 u64 bytenr, u64 num_bytes, int alloc,
40 int mark_free); 38 int mark_free);
39static int update_reserved_extents(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 42 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
57 u64 parent, u64 root_objectid, 57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key, 58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins); 59 int level, struct btrfs_key *ins);
60
61static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force); 62 u64 flags, int force);
63static int pin_down_bytes(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 struct btrfs_path *path,
66 u64 bytenr, u64 num_bytes,
67 int is_data, int reserved,
68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
64 73
65static noinline int 74static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
153 return ret; 162 return ret;
154} 163}
155 164
156/* 165static int add_excluded_extent(struct btrfs_root *root,
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 166 u64 start, u64 num_bytes)
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{ 167{
164 u64 start, end, last = 0; 168 u64 end = start + num_bytes - 1;
165 int ret; 169 set_extent_bits(&root->fs_info->freed_extents[0],
170 start, end, EXTENT_UPTODATE, GFP_NOFS);
171 set_extent_bits(&root->fs_info->freed_extents[1],
172 start, end, EXTENT_UPTODATE, GFP_NOFS);
173 return 0;
174}
166 175
167 while (1) { 176static void free_excluded_extents(struct btrfs_root *root,
168 ret = find_first_extent_bit(&info->pinned_extents, last, 177 struct btrfs_block_group_cache *cache)
169 &start, &end, 178{
170 EXTENT_LOCKED|EXTENT_DIRTY); 179 u64 start, end;
171 if (ret)
172 break;
173 180
174 clear_extent_bits(&info->pinned_extents, start, end, 181 start = cache->key.objectid;
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 182 end = start + cache->key.offset - 1;
176 last = end+1; 183
177 } 184 clear_extent_bits(&root->fs_info->freed_extents[0],
185 start, end, EXTENT_UPTODATE, GFP_NOFS);
186 clear_extent_bits(&root->fs_info->freed_extents[1],
187 start, end, EXTENT_UPTODATE, GFP_NOFS);
178} 188}
179 189
180static int remove_sb_from_cache(struct btrfs_root *root, 190static int exclude_super_stripes(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache) 191 struct btrfs_block_group_cache *cache)
182{ 192{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr; 193 u64 bytenr;
185 u64 *logical; 194 u64 *logical;
186 int stripe_len; 195 int stripe_len;
@@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
192 cache->key.objectid, bytenr, 201 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len); 202 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret); 203 BUG_ON(ret);
204
195 while (nr--) { 205 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents, 206 cache->bytes_super += stripe_len;
197 logical[nr], 207 ret = add_excluded_extent(root, logical[nr],
198 logical[nr] + stripe_len - 1, GFP_NOFS); 208 stripe_len);
209 BUG_ON(ret);
199 } 210 }
211
200 kfree(logical); 212 kfree(logical);
201 } 213 }
202
203 return 0; 214 return 0;
204} 215}
205 216
217static struct btrfs_caching_control *
218get_caching_control(struct btrfs_block_group_cache *cache)
219{
220 struct btrfs_caching_control *ctl;
221
222 spin_lock(&cache->lock);
223 if (cache->cached != BTRFS_CACHE_STARTED) {
224 spin_unlock(&cache->lock);
225 return NULL;
226 }
227
228 ctl = cache->caching_ctl;
229 atomic_inc(&ctl->count);
230 spin_unlock(&cache->lock);
231 return ctl;
232}
233
234static void put_caching_control(struct btrfs_caching_control *ctl)
235{
236 if (atomic_dec_and_test(&ctl->count))
237 kfree(ctl);
238}
239
206/* 240/*
207 * this is only called by cache_block_group, since we could have freed extents 241 * this is only called by cache_block_group, since we could have freed extents
208 * we need to check the pinned_extents for any extents that can't be used yet 242 * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
215 int ret; 249 int ret;
216 250
217 while (start < end) { 251 while (start < end) {
218 ret = find_first_extent_bit(&info->pinned_extents, start, 252 ret = find_first_extent_bit(info->pinned_extents, start,
219 &extent_start, &extent_end, 253 &extent_start, &extent_end,
220 EXTENT_DIRTY|EXTENT_LOCKED); 254 EXTENT_DIRTY | EXTENT_UPTODATE);
221 if (ret) 255 if (ret)
222 break; 256 break;
223 257
@@ -249,22 +283,27 @@ static int caching_kthread(void *data)
249{ 283{
250 struct btrfs_block_group_cache *block_group = data; 284 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info; 285 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0; 286 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
287 struct btrfs_root *extent_root = fs_info->extent_root;
253 struct btrfs_path *path; 288 struct btrfs_path *path;
254 int ret = 0;
255 struct btrfs_key key;
256 struct extent_buffer *leaf; 289 struct extent_buffer *leaf;
257 int slot; 290 struct btrfs_key key;
258 u64 total_found = 0; 291 u64 total_found = 0;
259 292 u64 last = 0;
260 BUG_ON(!fs_info); 293 u32 nritems;
294 int ret = 0;
261 295
262 path = btrfs_alloc_path(); 296 path = btrfs_alloc_path();
263 if (!path) 297 if (!path)
264 return -ENOMEM; 298 return -ENOMEM;
265 299
266 atomic_inc(&block_group->space_info->caching_threads); 300 exclude_super_stripes(extent_root, block_group);
301 spin_lock(&block_group->space_info->lock);
302 block_group->space_info->bytes_super += block_group->bytes_super;
303 spin_unlock(&block_group->space_info->lock);
304
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 305 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
306
268 /* 307 /*
269 * We don't want to deadlock with somebody trying to allocate a new 308 * We don't want to deadlock with somebody trying to allocate a new
270 * extent for the extent root while also trying to search the extent 309 * extent for the extent root while also trying to search the extent
@@ -277,74 +316,64 @@ static int caching_kthread(void *data)
277 316
278 key.objectid = last; 317 key.objectid = last;
279 key.offset = 0; 318 key.offset = 0;
280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 319 key.type = BTRFS_EXTENT_ITEM_KEY;
281again: 320again:
321 mutex_lock(&caching_ctl->mutex);
282 /* need to make sure the commit_root doesn't disappear */ 322 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem); 323 down_read(&fs_info->extent_commit_sem);
284 324
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 325 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
286 if (ret < 0) 326 if (ret < 0)
287 goto err; 327 goto err;
288 328
329 leaf = path->nodes[0];
330 nritems = btrfs_header_nritems(leaf);
331
289 while (1) { 332 while (1) {
290 smp_mb(); 333 smp_mb();
291 if (block_group->fs_info->closing > 1) { 334 if (fs_info->closing > 1) {
292 last = (u64)-1; 335 last = (u64)-1;
293 break; 336 break;
294 } 337 }
295 338
296 leaf = path->nodes[0]; 339 if (path->slots[0] < nritems) {
297 slot = path->slots[0]; 340 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
298 if (slot >= btrfs_header_nritems(leaf)) { 341 } else {
299 ret = btrfs_next_leaf(fs_info->extent_root, path); 342 ret = find_next_key(path, 0, &key);
300 if (ret < 0) 343 if (ret)
301 goto err;
302 else if (ret)
303 break; 344 break;
304 345
305 if (need_resched() || 346 caching_ctl->progress = last;
306 btrfs_transaction_in_commit(fs_info)) { 347 btrfs_release_path(extent_root, path);
307 leaf = path->nodes[0]; 348 up_read(&fs_info->extent_commit_sem);
308 349 mutex_unlock(&caching_ctl->mutex);
309 /* this shouldn't happen, but if the 350 if (btrfs_transaction_in_commit(fs_info))
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1); 351 schedule_timeout(1);
323 goto again; 352 else
324 } 353 cond_resched();
354 goto again;
355 }
325 356
357 if (key.objectid < block_group->key.objectid) {
358 path->slots[0]++;
326 continue; 359 continue;
327 } 360 }
328 btrfs_item_key_to_cpu(leaf, &key, slot);
329 if (key.objectid < block_group->key.objectid)
330 goto next;
331 361
332 if (key.objectid >= block_group->key.objectid + 362 if (key.objectid >= block_group->key.objectid +
333 block_group->key.offset) 363 block_group->key.offset)
334 break; 364 break;
335 365
336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 366 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
337 total_found += add_new_free_space(block_group, 367 total_found += add_new_free_space(block_group,
338 fs_info, last, 368 fs_info, last,
339 key.objectid); 369 key.objectid);
340 last = key.objectid + key.offset; 370 last = key.objectid + key.offset;
341 }
342 371
343 if (total_found > (1024 * 1024 * 2)) { 372 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0; 373 total_found = 0;
345 wake_up(&block_group->caching_q); 374 wake_up(&caching_ctl->wait);
375 }
346 } 376 }
347next:
348 path->slots[0]++; 377 path->slots[0]++;
349 } 378 }
350 ret = 0; 379 ret = 0;
@@ -352,33 +381,65 @@ next:
352 total_found += add_new_free_space(block_group, fs_info, last, 381 total_found += add_new_free_space(block_group, fs_info, last,
353 block_group->key.objectid + 382 block_group->key.objectid +
354 block_group->key.offset); 383 block_group->key.offset);
384 caching_ctl->progress = (u64)-1;
355 385
356 spin_lock(&block_group->lock); 386 spin_lock(&block_group->lock);
387 block_group->caching_ctl = NULL;
357 block_group->cached = BTRFS_CACHE_FINISHED; 388 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock); 389 spin_unlock(&block_group->lock);
359 390
360err: 391err:
361 btrfs_free_path(path); 392 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem); 393 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365 394
395 free_excluded_extents(extent_root, block_group);
396
397 mutex_unlock(&caching_ctl->mutex);
398 wake_up(&caching_ctl->wait);
399
400 put_caching_control(caching_ctl);
401 atomic_dec(&block_group->space_info->caching_threads);
366 return 0; 402 return 0;
367} 403}
368 404
369static int cache_block_group(struct btrfs_block_group_cache *cache) 405static int cache_block_group(struct btrfs_block_group_cache *cache)
370{ 406{
407 struct btrfs_fs_info *fs_info = cache->fs_info;
408 struct btrfs_caching_control *caching_ctl;
371 struct task_struct *tsk; 409 struct task_struct *tsk;
372 int ret = 0; 410 int ret = 0;
373 411
412 smp_mb();
413 if (cache->cached != BTRFS_CACHE_NO)
414 return 0;
415
416 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
417 BUG_ON(!caching_ctl);
418
419 INIT_LIST_HEAD(&caching_ctl->list);
420 mutex_init(&caching_ctl->mutex);
421 init_waitqueue_head(&caching_ctl->wait);
422 caching_ctl->block_group = cache;
423 caching_ctl->progress = cache->key.objectid;
424 /* one for caching kthread, one for caching block group list */
425 atomic_set(&caching_ctl->count, 2);
426
374 spin_lock(&cache->lock); 427 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) { 428 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock); 429 spin_unlock(&cache->lock);
377 return ret; 430 kfree(caching_ctl);
431 return 0;
378 } 432 }
433 cache->caching_ctl = caching_ctl;
379 cache->cached = BTRFS_CACHE_STARTED; 434 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock); 435 spin_unlock(&cache->lock);
381 436
437 down_write(&fs_info->extent_commit_sem);
438 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
439 up_write(&fs_info->extent_commit_sem);
440
441 atomic_inc(&cache->space_info->caching_threads);
442
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid); 444 cache->key.objectid);
384 if (IS_ERR(tsk)) { 445 if (IS_ERR(tsk)) {
@@ -1511,7 +1572,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1511static void btrfs_issue_discard(struct block_device *bdev, 1572static void btrfs_issue_discard(struct block_device *bdev,
1512 u64 start, u64 len) 1573 u64 start, u64 len)
1513{ 1574{
1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1575 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1576 DISCARD_FL_BARRIER);
1515} 1577}
1516#endif 1578#endif
1517 1579
@@ -1656,7 +1718,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1656 parent, ref_root, flags, 1718 parent, ref_root, flags,
1657 ref->objectid, ref->offset, 1719 ref->objectid, ref->offset,
1658 &ins, node->ref_mod); 1720 &ins, node->ref_mod);
1659 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1660 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1721 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1661 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1722 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1662 node->num_bytes, parent, 1723 node->num_bytes, parent,
@@ -1782,7 +1843,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1782 extent_op->flags_to_set, 1843 extent_op->flags_to_set,
1783 &extent_op->key, 1844 &extent_op->key,
1784 ref->level, &ins); 1845 ref->level, &ins);
1785 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1786 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1846 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1787 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1847 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1788 node->num_bytes, parent, ref_root, 1848 node->num_bytes, parent, ref_root,
@@ -1817,16 +1877,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1817 BUG_ON(extent_op); 1877 BUG_ON(extent_op);
1818 head = btrfs_delayed_node_to_head(node); 1878 head = btrfs_delayed_node_to_head(node);
1819 if (insert_reserved) { 1879 if (insert_reserved) {
1880 int mark_free = 0;
1881 struct extent_buffer *must_clean = NULL;
1882
1883 ret = pin_down_bytes(trans, root, NULL,
1884 node->bytenr, node->num_bytes,
1885 head->is_data, 1, &must_clean);
1886 if (ret > 0)
1887 mark_free = 1;
1888
1889 if (must_clean) {
1890 clean_tree_block(NULL, root, must_clean);
1891 btrfs_tree_unlock(must_clean);
1892 free_extent_buffer(must_clean);
1893 }
1820 if (head->is_data) { 1894 if (head->is_data) {
1821 ret = btrfs_del_csums(trans, root, 1895 ret = btrfs_del_csums(trans, root,
1822 node->bytenr, 1896 node->bytenr,
1823 node->num_bytes); 1897 node->num_bytes);
1824 BUG_ON(ret); 1898 BUG_ON(ret);
1825 } 1899 }
1826 btrfs_update_pinned_extents(root, node->bytenr, 1900 if (mark_free) {
1827 node->num_bytes, 1); 1901 ret = btrfs_free_reserved_extent(root,
1828 update_reserved_extents(root, node->bytenr, 1902 node->bytenr,
1829 node->num_bytes, 0); 1903 node->num_bytes);
1904 BUG_ON(ret);
1905 }
1830 } 1906 }
1831 mutex_unlock(&head->mutex); 1907 mutex_unlock(&head->mutex);
1832 return 0; 1908 return 0;
@@ -2691,60 +2767,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2691 alloc_target); 2767 alloc_target);
2692} 2768}
2693 2769
2770static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2771{
2772 u64 num_bytes;
2773 int level;
2774
2775 level = BTRFS_MAX_LEVEL - 2;
2776 /*
2777 * NOTE: these calculations are absolutely the worst possible case.
2778 * This assumes that _every_ item we insert will require a new leaf, and
2779 * that the tree has grown to its maximum level size.
2780 */
2781
2782 /*
2783 * for every item we insert we could insert both an extent item and a
2784 * extent ref item. Then for ever item we insert, we will need to cow
2785 * both the original leaf, plus the leaf to the left and right of it.
2786 *
2787 * Unless we are talking about the extent root, then we just want the
2788 * number of items * 2, since we just need the extent item plus its ref.
2789 */
2790 if (root == root->fs_info->extent_root)
2791 num_bytes = num_items * 2;
2792 else
2793 num_bytes = (num_items + (2 * num_items)) * 3;
2794
2795 /*
2796 * num_bytes is total number of leaves we could need times the leaf
2797 * size, and then for every leaf we could end up cow'ing 2 nodes per
2798 * level, down to the leaf level.
2799 */
2800 num_bytes = (num_bytes * root->leafsize) +
2801 (num_bytes * (level * 2)) * root->nodesize;
2802
2803 return num_bytes;
2804}
2805
2694/* 2806/*
2695 * for now this just makes sure we have at least 5% of our metadata space free 2807 * Unreserve metadata space for delalloc. If we have less reserved credits than
2696 * for use. 2808 * we have extents, this function does nothing.
2697 */ 2809 */
2698int btrfs_check_metadata_free_space(struct btrfs_root *root) 2810int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2811 struct inode *inode, int num_items)
2699{ 2812{
2700 struct btrfs_fs_info *info = root->fs_info; 2813 struct btrfs_fs_info *info = root->fs_info;
2701 struct btrfs_space_info *meta_sinfo; 2814 struct btrfs_space_info *meta_sinfo;
2702 u64 alloc_target, thresh; 2815 u64 num_bytes;
2703 int committed = 0, ret; 2816 u64 alloc_target;
2817 bool bug = false;
2704 2818
2705 /* get the space info for where the metadata will live */ 2819 /* get the space info for where the metadata will live */
2706 alloc_target = btrfs_get_alloc_profile(root, 0); 2820 alloc_target = btrfs_get_alloc_profile(root, 0);
2707 meta_sinfo = __find_space_info(info, alloc_target); 2821 meta_sinfo = __find_space_info(info, alloc_target);
2708 2822
2709again: 2823 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2824 num_items);
2825
2710 spin_lock(&meta_sinfo->lock); 2826 spin_lock(&meta_sinfo->lock);
2711 if (!meta_sinfo->full) 2827 if (BTRFS_I(inode)->delalloc_reserved_extents <=
2712 thresh = meta_sinfo->total_bytes * 80; 2828 BTRFS_I(inode)->delalloc_extents) {
2713 else 2829 spin_unlock(&meta_sinfo->lock);
2714 thresh = meta_sinfo->total_bytes * 95; 2830 return 0;
2831 }
2832
2833 BTRFS_I(inode)->delalloc_reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
2835
2836 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true;
2838 meta_sinfo->bytes_delalloc = 0;
2839 } else {
2840 meta_sinfo->bytes_delalloc -= num_bytes;
2841 }
2842 spin_unlock(&meta_sinfo->lock);
2843
2844 BUG_ON(bug);
2845
2846 return 0;
2847}
2715 2848
2849static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2850{
2851 u64 thresh;
2852
2853 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2854 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2855 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2856 meta_sinfo->bytes_may_use;
2857
2858 thresh = meta_sinfo->total_bytes - thresh;
2859 thresh *= 80;
2716 do_div(thresh, 100); 2860 do_div(thresh, 100);
2861 if (thresh <= meta_sinfo->bytes_delalloc)
2862 meta_sinfo->force_delalloc = 1;
2863 else
2864 meta_sinfo->force_delalloc = 0;
2865}
2717 2866
2718 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2867static int maybe_allocate_chunk(struct btrfs_root *root,
2719 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2868 struct btrfs_space_info *info)
2720 struct btrfs_trans_handle *trans; 2869{
2721 if (!meta_sinfo->full) { 2870 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2722 meta_sinfo->force_alloc = 1; 2871 struct btrfs_trans_handle *trans;
2723 spin_unlock(&meta_sinfo->lock); 2872 bool wait = false;
2873 int ret = 0;
2874 u64 min_metadata;
2875 u64 free_space;
2724 2876
2725 trans = btrfs_start_transaction(root, 1); 2877 free_space = btrfs_super_total_bytes(disk_super);
2726 if (!trans) 2878 /*
2727 return -ENOMEM; 2879 * we allow the metadata to grow to a max of either 5gb or 5% of the
2880 * space in the volume.
2881 */
2882 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2883 div64_u64(free_space * 5, 100));
2884 if (info->total_bytes >= min_metadata) {
2885 spin_unlock(&info->lock);
2886 return 0;
2887 }
2728 2888
2729 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2889 if (info->full) {
2730 2 * 1024 * 1024, alloc_target, 0); 2890 spin_unlock(&info->lock);
2731 btrfs_end_transaction(trans, root); 2891 return 0;
2892 }
2893
2894 if (!info->allocating_chunk) {
2895 info->force_alloc = 1;
2896 info->allocating_chunk = 1;
2897 init_waitqueue_head(&info->wait);
2898 } else {
2899 wait = true;
2900 }
2901
2902 spin_unlock(&info->lock);
2903
2904 if (wait) {
2905 wait_event(info->wait,
2906 !info->allocating_chunk);
2907 return 1;
2908 }
2909
2910 trans = btrfs_start_transaction(root, 1);
2911 if (!trans) {
2912 ret = -ENOMEM;
2913 goto out;
2914 }
2915
2916 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2917 4096 + 2 * 1024 * 1024,
2918 info->flags, 0);
2919 btrfs_end_transaction(trans, root);
2920 if (ret)
2921 goto out;
2922out:
2923 spin_lock(&info->lock);
2924 info->allocating_chunk = 0;
2925 spin_unlock(&info->lock);
2926 wake_up(&info->wait);
2927
2928 if (ret)
2929 return 0;
2930 return 1;
2931}
2932
2933/*
2934 * Reserve metadata space for delalloc.
2935 */
2936int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2937 struct inode *inode, int num_items)
2938{
2939 struct btrfs_fs_info *info = root->fs_info;
2940 struct btrfs_space_info *meta_sinfo;
2941 u64 num_bytes;
2942 u64 used;
2943 u64 alloc_target;
2944 int flushed = 0;
2945 int force_delalloc;
2946
2947 /* get the space info for where the metadata will live */
2948 alloc_target = btrfs_get_alloc_profile(root, 0);
2949 meta_sinfo = __find_space_info(info, alloc_target);
2950
2951 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2952 num_items);
2953again:
2954 spin_lock(&meta_sinfo->lock);
2955
2956 force_delalloc = meta_sinfo->force_delalloc;
2957
2958 if (unlikely(!meta_sinfo->bytes_root))
2959 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
2960
2961 if (!flushed)
2962 meta_sinfo->bytes_delalloc += num_bytes;
2963
2964 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2965 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2966 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2967 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
2968
2969 if (used > meta_sinfo->total_bytes) {
2970 flushed++;
2971
2972 if (flushed == 1) {
2973 if (maybe_allocate_chunk(root, meta_sinfo))
2974 goto again;
2975 flushed++;
2976 } else {
2977 spin_unlock(&meta_sinfo->lock);
2978 }
2979
2980 if (flushed == 2) {
2981 filemap_flush(inode->i_mapping);
2982 goto again;
2983 } else if (flushed == 3) {
2984 btrfs_start_delalloc_inodes(root);
2985 btrfs_wait_ordered_extents(root, 0);
2732 goto again; 2986 goto again;
2733 } 2987 }
2988 spin_lock(&meta_sinfo->lock);
2989 meta_sinfo->bytes_delalloc -= num_bytes;
2734 spin_unlock(&meta_sinfo->lock); 2990 spin_unlock(&meta_sinfo->lock);
2991 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2992 BTRFS_I(inode)->delalloc_extents,
2993 BTRFS_I(inode)->delalloc_reserved_extents);
2994 dump_space_info(meta_sinfo, 0, 0);
2995 return -ENOSPC;
2996 }
2735 2997
2736 if (!committed) { 2998 BTRFS_I(inode)->delalloc_reserved_extents++;
2737 committed = 1; 2999 check_force_delalloc(meta_sinfo);
2738 trans = btrfs_join_transaction(root, 1); 3000 spin_unlock(&meta_sinfo->lock);
2739 if (!trans) 3001
2740 return -ENOMEM; 3002 if (!flushed && force_delalloc)
2741 ret = btrfs_commit_transaction(trans, root); 3003 filemap_flush(inode->i_mapping);
2742 if (ret) 3004
2743 return ret; 3005 return 0;
3006}
3007
3008/*
3009 * unreserve num_items number of items worth of metadata space. This needs to
3010 * be paired with btrfs_reserve_metadata_space.
3011 *
3012 * NOTE: if you have the option, run this _AFTER_ you do a
3013 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3014 * oprations which will result in more used metadata, so we want to make sure we
3015 * can do that without issue.
3016 */
3017int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3018{
3019 struct btrfs_fs_info *info = root->fs_info;
3020 struct btrfs_space_info *meta_sinfo;
3021 u64 num_bytes;
3022 u64 alloc_target;
3023 bool bug = false;
3024
3025 /* get the space info for where the metadata will live */
3026 alloc_target = btrfs_get_alloc_profile(root, 0);
3027 meta_sinfo = __find_space_info(info, alloc_target);
3028
3029 num_bytes = calculate_bytes_needed(root, num_items);
3030
3031 spin_lock(&meta_sinfo->lock);
3032 if (meta_sinfo->bytes_may_use < num_bytes) {
3033 bug = true;
3034 meta_sinfo->bytes_may_use = 0;
3035 } else {
3036 meta_sinfo->bytes_may_use -= num_bytes;
3037 }
3038 spin_unlock(&meta_sinfo->lock);
3039
3040 BUG_ON(bug);
3041
3042 return 0;
3043}
3044
3045/*
3046 * Reserve some metadata space for use. We'll calculate the worste case number
3047 * of bytes that would be needed to modify num_items number of items. If we
3048 * have space, fantastic, if not, you get -ENOSPC. Please call
3049 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3050 * items you reserved, since whatever metadata you needed should have already
3051 * been allocated.
3052 *
3053 * This will commit the transaction to make more space if we don't have enough
3054 * metadata space. THe only time we don't do this is if we're reserving space
3055 * inside of a transaction, then we will just return -ENOSPC and it is the
3056 * callers responsibility to handle it properly.
3057 */
3058int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3059{
3060 struct btrfs_fs_info *info = root->fs_info;
3061 struct btrfs_space_info *meta_sinfo;
3062 u64 num_bytes;
3063 u64 used;
3064 u64 alloc_target;
3065 int retries = 0;
3066
3067 /* get the space info for where the metadata will live */
3068 alloc_target = btrfs_get_alloc_profile(root, 0);
3069 meta_sinfo = __find_space_info(info, alloc_target);
3070
3071 num_bytes = calculate_bytes_needed(root, num_items);
3072again:
3073 spin_lock(&meta_sinfo->lock);
3074
3075 if (unlikely(!meta_sinfo->bytes_root))
3076 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3077
3078 if (!retries)
3079 meta_sinfo->bytes_may_use += num_bytes;
3080
3081 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3082 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3083 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3084 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3085
3086 if (used > meta_sinfo->total_bytes) {
3087 retries++;
3088 if (retries == 1) {
3089 if (maybe_allocate_chunk(root, meta_sinfo))
3090 goto again;
3091 retries++;
3092 } else {
3093 spin_unlock(&meta_sinfo->lock);
3094 }
3095
3096 if (retries == 2) {
3097 btrfs_start_delalloc_inodes(root);
3098 btrfs_wait_ordered_extents(root, 0);
2744 goto again; 3099 goto again;
2745 } 3100 }
3101 spin_lock(&meta_sinfo->lock);
3102 meta_sinfo->bytes_may_use -= num_bytes;
3103 spin_unlock(&meta_sinfo->lock);
3104
3105 dump_space_info(meta_sinfo, 0, 0);
2746 return -ENOSPC; 3106 return -ENOSPC;
2747 } 3107 }
3108
3109 check_force_delalloc(meta_sinfo);
2748 spin_unlock(&meta_sinfo->lock); 3110 spin_unlock(&meta_sinfo->lock);
2749 3111
2750 return 0; 3112 return 0;
@@ -2764,13 +3126,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2764 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3126 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2765 3127
2766 data_sinfo = BTRFS_I(inode)->space_info; 3128 data_sinfo = BTRFS_I(inode)->space_info;
3129 if (!data_sinfo)
3130 goto alloc;
3131
2767again: 3132again:
2768 /* make sure we have enough space to handle the data first */ 3133 /* make sure we have enough space to handle the data first */
2769 spin_lock(&data_sinfo->lock); 3134 spin_lock(&data_sinfo->lock);
2770 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 3135 if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2771 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 3136 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2772 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 3137 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2773 data_sinfo->bytes_may_use < bytes) { 3138 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
2774 struct btrfs_trans_handle *trans; 3139 struct btrfs_trans_handle *trans;
2775 3140
2776 /* 3141 /*
@@ -2782,7 +3147,7 @@ again:
2782 3147
2783 data_sinfo->force_alloc = 1; 3148 data_sinfo->force_alloc = 1;
2784 spin_unlock(&data_sinfo->lock); 3149 spin_unlock(&data_sinfo->lock);
2785 3150alloc:
2786 alloc_target = btrfs_get_alloc_profile(root, 1); 3151 alloc_target = btrfs_get_alloc_profile(root, 1);
2787 trans = btrfs_start_transaction(root, 1); 3152 trans = btrfs_start_transaction(root, 1);
2788 if (!trans) 3153 if (!trans)
@@ -2794,12 +3159,17 @@ again:
2794 btrfs_end_transaction(trans, root); 3159 btrfs_end_transaction(trans, root);
2795 if (ret) 3160 if (ret)
2796 return ret; 3161 return ret;
3162
3163 if (!data_sinfo) {
3164 btrfs_set_inode_space_info(root, inode);
3165 data_sinfo = BTRFS_I(inode)->space_info;
3166 }
2797 goto again; 3167 goto again;
2798 } 3168 }
2799 spin_unlock(&data_sinfo->lock); 3169 spin_unlock(&data_sinfo->lock);
2800 3170
2801 /* commit the current transaction and try again */ 3171 /* commit the current transaction and try again */
2802 if (!committed) { 3172 if (!committed && !root->fs_info->open_ioctl_trans) {
2803 committed = 1; 3173 committed = 1;
2804 trans = btrfs_join_transaction(root, 1); 3174 trans = btrfs_join_transaction(root, 1);
2805 if (!trans) 3175 if (!trans)
@@ -2827,7 +3197,7 @@ again:
2827 BTRFS_I(inode)->reserved_bytes += bytes; 3197 BTRFS_I(inode)->reserved_bytes += bytes;
2828 spin_unlock(&data_sinfo->lock); 3198 spin_unlock(&data_sinfo->lock);
2829 3199
2830 return btrfs_check_metadata_free_space(root); 3200 return 0;
2831} 3201}
2832 3202
2833/* 3203/*
@@ -2926,17 +3296,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2926 BUG_ON(!space_info); 3296 BUG_ON(!space_info);
2927 3297
2928 spin_lock(&space_info->lock); 3298 spin_lock(&space_info->lock);
2929 if (space_info->force_alloc) { 3299 if (space_info->force_alloc)
2930 force = 1; 3300 force = 1;
2931 space_info->force_alloc = 0;
2932 }
2933 if (space_info->full) { 3301 if (space_info->full) {
2934 spin_unlock(&space_info->lock); 3302 spin_unlock(&space_info->lock);
2935 goto out; 3303 goto out;
2936 } 3304 }
2937 3305
2938 thresh = space_info->total_bytes - space_info->bytes_readonly; 3306 thresh = space_info->total_bytes - space_info->bytes_readonly;
2939 thresh = div_factor(thresh, 6); 3307 thresh = div_factor(thresh, 8);
2940 if (!force && 3308 if (!force &&
2941 (space_info->bytes_used + space_info->bytes_pinned + 3309 (space_info->bytes_used + space_info->bytes_pinned +
2942 space_info->bytes_reserved + alloc_bytes) < thresh) { 3310 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -2950,7 +3318,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2950 * we keep a reasonable number of metadata chunks allocated in the 3318 * we keep a reasonable number of metadata chunks allocated in the
2951 * FS as well. 3319 * FS as well.
2952 */ 3320 */
2953 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3321 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
2954 fs_info->data_chunk_allocations++; 3322 fs_info->data_chunk_allocations++;
2955 if (!(fs_info->data_chunk_allocations % 3323 if (!(fs_info->data_chunk_allocations %
2956 fs_info->metadata_ratio)) 3324 fs_info->metadata_ratio))
@@ -2958,8 +3326,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2958 } 3326 }
2959 3327
2960 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3328 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3329 spin_lock(&space_info->lock);
2961 if (ret) 3330 if (ret)
2962 space_info->full = 1; 3331 space_info->full = 1;
3332 space_info->force_alloc = 0;
3333 spin_unlock(&space_info->lock);
2963out: 3334out:
2964 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3335 mutex_unlock(&extent_root->fs_info->chunk_mutex);
2965 return ret; 3336 return ret;
@@ -3008,10 +3379,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3008 num_bytes = min(total, cache->key.offset - byte_in_group); 3379 num_bytes = min(total, cache->key.offset - byte_in_group);
3009 if (alloc) { 3380 if (alloc) {
3010 old_val += num_bytes; 3381 old_val += num_bytes;
3382 btrfs_set_block_group_used(&cache->item, old_val);
3383 cache->reserved -= num_bytes;
3011 cache->space_info->bytes_used += num_bytes; 3384 cache->space_info->bytes_used += num_bytes;
3385 cache->space_info->bytes_reserved -= num_bytes;
3012 if (cache->ro) 3386 if (cache->ro)
3013 cache->space_info->bytes_readonly -= num_bytes; 3387 cache->space_info->bytes_readonly -= num_bytes;
3014 btrfs_set_block_group_used(&cache->item, old_val);
3015 spin_unlock(&cache->lock); 3388 spin_unlock(&cache->lock);
3016 spin_unlock(&cache->space_info->lock); 3389 spin_unlock(&cache->space_info->lock);
3017 } else { 3390 } else {
@@ -3056,127 +3429,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3056 return bytenr; 3429 return bytenr;
3057} 3430}
3058 3431
3059int btrfs_update_pinned_extents(struct btrfs_root *root, 3432/*
3060 u64 bytenr, u64 num, int pin) 3433 * this function must be called within transaction
3434 */
3435int btrfs_pin_extent(struct btrfs_root *root,
3436 u64 bytenr, u64 num_bytes, int reserved)
3061{ 3437{
3062 u64 len;
3063 struct btrfs_block_group_cache *cache;
3064 struct btrfs_fs_info *fs_info = root->fs_info; 3438 struct btrfs_fs_info *fs_info = root->fs_info;
3439 struct btrfs_block_group_cache *cache;
3065 3440
3066 if (pin) 3441 cache = btrfs_lookup_block_group(fs_info, bytenr);
3067 set_extent_dirty(&fs_info->pinned_extents, 3442 BUG_ON(!cache);
3068 bytenr, bytenr + num - 1, GFP_NOFS);
3069
3070 while (num > 0) {
3071 cache = btrfs_lookup_block_group(fs_info, bytenr);
3072 BUG_ON(!cache);
3073 len = min(num, cache->key.offset -
3074 (bytenr - cache->key.objectid));
3075 if (pin) {
3076 spin_lock(&cache->space_info->lock);
3077 spin_lock(&cache->lock);
3078 cache->pinned += len;
3079 cache->space_info->bytes_pinned += len;
3080 spin_unlock(&cache->lock);
3081 spin_unlock(&cache->space_info->lock);
3082 fs_info->total_pinned += len;
3083 } else {
3084 int unpin = 0;
3085 3443
3086 /* 3444 spin_lock(&cache->space_info->lock);
3087 * in order to not race with the block group caching, we 3445 spin_lock(&cache->lock);
3088 * only want to unpin the extent if we are cached. If 3446 cache->pinned += num_bytes;
3089 * we aren't cached, we want to start async caching this 3447 cache->space_info->bytes_pinned += num_bytes;
3090 * block group so we can free the extent the next time 3448 if (reserved) {
3091 * around. 3449 cache->reserved -= num_bytes;
3092 */ 3450 cache->space_info->bytes_reserved -= num_bytes;
3093 spin_lock(&cache->space_info->lock); 3451 }
3094 spin_lock(&cache->lock); 3452 spin_unlock(&cache->lock);
3095 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3453 spin_unlock(&cache->space_info->lock);
3096 if (likely(unpin)) {
3097 cache->pinned -= len;
3098 cache->space_info->bytes_pinned -= len;
3099 fs_info->total_pinned -= len;
3100 }
3101 spin_unlock(&cache->lock);
3102 spin_unlock(&cache->space_info->lock);
3103 3454
3104 if (likely(unpin)) 3455 btrfs_put_block_group(cache);
3105 clear_extent_dirty(&fs_info->pinned_extents,
3106 bytenr, bytenr + len -1,
3107 GFP_NOFS);
3108 else
3109 cache_block_group(cache);
3110 3456
3111 if (unpin) 3457 set_extent_dirty(fs_info->pinned_extents,
3112 btrfs_add_free_space(cache, bytenr, len); 3458 bytenr, bytenr + num_bytes - 1, GFP_NOFS);
3113 } 3459 return 0;
3114 btrfs_put_block_group(cache); 3460}
3115 bytenr += len; 3461
3116 num -= len; 3462static int update_reserved_extents(struct btrfs_block_group_cache *cache,
3463 u64 num_bytes, int reserve)
3464{
3465 spin_lock(&cache->space_info->lock);
3466 spin_lock(&cache->lock);
3467 if (reserve) {
3468 cache->reserved += num_bytes;
3469 cache->space_info->bytes_reserved += num_bytes;
3470 } else {
3471 cache->reserved -= num_bytes;
3472 cache->space_info->bytes_reserved -= num_bytes;
3117 } 3473 }
3474 spin_unlock(&cache->lock);
3475 spin_unlock(&cache->space_info->lock);
3118 return 0; 3476 return 0;
3119} 3477}
3120 3478
3121static int update_reserved_extents(struct btrfs_root *root, 3479int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3122 u64 bytenr, u64 num, int reserve) 3480 struct btrfs_root *root)
3123{ 3481{
3124 u64 len;
3125 struct btrfs_block_group_cache *cache;
3126 struct btrfs_fs_info *fs_info = root->fs_info; 3482 struct btrfs_fs_info *fs_info = root->fs_info;
3483 struct btrfs_caching_control *next;
3484 struct btrfs_caching_control *caching_ctl;
3485 struct btrfs_block_group_cache *cache;
3127 3486
3128 while (num > 0) { 3487 down_write(&fs_info->extent_commit_sem);
3129 cache = btrfs_lookup_block_group(fs_info, bytenr);
3130 BUG_ON(!cache);
3131 len = min(num, cache->key.offset -
3132 (bytenr - cache->key.objectid));
3133 3488
3134 spin_lock(&cache->space_info->lock); 3489 list_for_each_entry_safe(caching_ctl, next,
3135 spin_lock(&cache->lock); 3490 &fs_info->caching_block_groups, list) {
3136 if (reserve) { 3491 cache = caching_ctl->block_group;
3137 cache->reserved += len; 3492 if (block_group_cache_done(cache)) {
3138 cache->space_info->bytes_reserved += len; 3493 cache->last_byte_to_unpin = (u64)-1;
3494 list_del_init(&caching_ctl->list);
3495 put_caching_control(caching_ctl);
3139 } else { 3496 } else {
3140 cache->reserved -= len; 3497 cache->last_byte_to_unpin = caching_ctl->progress;
3141 cache->space_info->bytes_reserved -= len;
3142 } 3498 }
3143 spin_unlock(&cache->lock);
3144 spin_unlock(&cache->space_info->lock);
3145 btrfs_put_block_group(cache);
3146 bytenr += len;
3147 num -= len;
3148 } 3499 }
3500
3501 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3502 fs_info->pinned_extents = &fs_info->freed_extents[1];
3503 else
3504 fs_info->pinned_extents = &fs_info->freed_extents[0];
3505
3506 up_write(&fs_info->extent_commit_sem);
3149 return 0; 3507 return 0;
3150} 3508}
3151 3509
3152int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3510static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3153{ 3511{
3154 u64 last = 0; 3512 struct btrfs_fs_info *fs_info = root->fs_info;
3155 u64 start; 3513 struct btrfs_block_group_cache *cache = NULL;
3156 u64 end; 3514 u64 len;
3157 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3158 int ret;
3159 3515
3160 while (1) { 3516 while (start <= end) {
3161 ret = find_first_extent_bit(pinned_extents, last, 3517 if (!cache ||
3162 &start, &end, EXTENT_DIRTY); 3518 start >= cache->key.objectid + cache->key.offset) {
3163 if (ret) 3519 if (cache)
3164 break; 3520 btrfs_put_block_group(cache);
3521 cache = btrfs_lookup_block_group(fs_info, start);
3522 BUG_ON(!cache);
3523 }
3524
3525 len = cache->key.objectid + cache->key.offset - start;
3526 len = min(len, end + 1 - start);
3527
3528 if (start < cache->last_byte_to_unpin) {
3529 len = min(len, cache->last_byte_to_unpin - start);
3530 btrfs_add_free_space(cache, start, len);
3531 }
3165 3532
3166 set_extent_dirty(copy, start, end, GFP_NOFS); 3533 spin_lock(&cache->space_info->lock);
3167 last = end + 1; 3534 spin_lock(&cache->lock);
3535 cache->pinned -= len;
3536 cache->space_info->bytes_pinned -= len;
3537 spin_unlock(&cache->lock);
3538 spin_unlock(&cache->space_info->lock);
3539
3540 start += len;
3168 } 3541 }
3542
3543 if (cache)
3544 btrfs_put_block_group(cache);
3169 return 0; 3545 return 0;
3170} 3546}
3171 3547
3172int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3548int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root, 3549 struct btrfs_root *root)
3174 struct extent_io_tree *unpin)
3175{ 3550{
3551 struct btrfs_fs_info *fs_info = root->fs_info;
3552 struct extent_io_tree *unpin;
3176 u64 start; 3553 u64 start;
3177 u64 end; 3554 u64 end;
3178 int ret; 3555 int ret;
3179 3556
3557 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3558 unpin = &fs_info->freed_extents[1];
3559 else
3560 unpin = &fs_info->freed_extents[0];
3561
3180 while (1) { 3562 while (1) {
3181 ret = find_first_extent_bit(unpin, 0, &start, &end, 3563 ret = find_first_extent_bit(unpin, 0, &start, &end,
3182 EXTENT_DIRTY); 3564 EXTENT_DIRTY);
@@ -3185,10 +3567,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3185 3567
3186 ret = btrfs_discard_extent(root, start, end + 1 - start); 3568 ret = btrfs_discard_extent(root, start, end + 1 - start);
3187 3569
3188 /* unlocks the pinned mutex */
3189 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3190 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3570 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3191 3571 unpin_extent_range(root, start, end);
3192 cond_resched(); 3572 cond_resched();
3193 } 3573 }
3194 3574
@@ -3198,7 +3578,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3198static int pin_down_bytes(struct btrfs_trans_handle *trans, 3578static int pin_down_bytes(struct btrfs_trans_handle *trans,
3199 struct btrfs_root *root, 3579 struct btrfs_root *root,
3200 struct btrfs_path *path, 3580 struct btrfs_path *path,
3201 u64 bytenr, u64 num_bytes, int is_data, 3581 u64 bytenr, u64 num_bytes,
3582 int is_data, int reserved,
3202 struct extent_buffer **must_clean) 3583 struct extent_buffer **must_clean)
3203{ 3584{
3204 int err = 0; 3585 int err = 0;
@@ -3230,15 +3611,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3230 } 3611 }
3231 free_extent_buffer(buf); 3612 free_extent_buffer(buf);
3232pinit: 3613pinit:
3233 btrfs_set_path_blocking(path); 3614 if (path)
3615 btrfs_set_path_blocking(path);
3234 /* unlocks the pinned mutex */ 3616 /* unlocks the pinned mutex */
3235 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3617 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3236 3618
3237 BUG_ON(err < 0); 3619 BUG_ON(err < 0);
3238 return 0; 3620 return 0;
3239} 3621}
3240 3622
3241
3242static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3623static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root, 3624 struct btrfs_root *root,
3244 u64 bytenr, u64 num_bytes, u64 parent, 3625 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3412,7 +3793,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3412 } 3793 }
3413 3794
3414 ret = pin_down_bytes(trans, root, path, bytenr, 3795 ret = pin_down_bytes(trans, root, path, bytenr,
3415 num_bytes, is_data, &must_clean); 3796 num_bytes, is_data, 0, &must_clean);
3416 if (ret > 0) 3797 if (ret > 0)
3417 mark_free = 1; 3798 mark_free = 1;
3418 BUG_ON(ret < 0); 3799 BUG_ON(ret < 0);
@@ -3543,8 +3924,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3543 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3924 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3544 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3925 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3545 /* unlocks the pinned mutex */ 3926 /* unlocks the pinned mutex */
3546 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3927 btrfs_pin_extent(root, bytenr, num_bytes, 1);
3547 update_reserved_extents(root, bytenr, num_bytes, 0);
3548 ret = 0; 3928 ret = 0;
3549 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3929 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
3550 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3930 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3584,19 +3964,33 @@ static noinline int
3584wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3964wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3585 u64 num_bytes) 3965 u64 num_bytes)
3586{ 3966{
3967 struct btrfs_caching_control *caching_ctl;
3587 DEFINE_WAIT(wait); 3968 DEFINE_WAIT(wait);
3588 3969
3589 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3970 caching_ctl = get_caching_control(cache);
3590 3971 if (!caching_ctl)
3591 if (block_group_cache_done(cache)) {
3592 finish_wait(&cache->caching_q, &wait);
3593 return 0; 3972 return 0;
3594 }
3595 schedule();
3596 finish_wait(&cache->caching_q, &wait);
3597 3973
3598 wait_event(cache->caching_q, block_group_cache_done(cache) || 3974 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
3599 (cache->free_space >= num_bytes)); 3975 (cache->free_space >= num_bytes));
3976
3977 put_caching_control(caching_ctl);
3978 return 0;
3979}
3980
3981static noinline int
3982wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3983{
3984 struct btrfs_caching_control *caching_ctl;
3985 DEFINE_WAIT(wait);
3986
3987 caching_ctl = get_caching_control(cache);
3988 if (!caching_ctl)
3989 return 0;
3990
3991 wait_event(caching_ctl->wait, block_group_cache_done(cache));
3992
3993 put_caching_control(caching_ctl);
3600 return 0; 3994 return 0;
3601} 3995}
3602 3996
@@ -3634,6 +4028,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3634 int last_ptr_loop = 0; 4028 int last_ptr_loop = 0;
3635 int loop = 0; 4029 int loop = 0;
3636 bool found_uncached_bg = false; 4030 bool found_uncached_bg = false;
4031 bool failed_cluster_refill = false;
3637 4032
3638 WARN_ON(num_bytes < root->sectorsize); 4033 WARN_ON(num_bytes < root->sectorsize);
3639 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4034 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3731,7 +4126,16 @@ have_block_group:
3731 if (unlikely(block_group->ro)) 4126 if (unlikely(block_group->ro))
3732 goto loop; 4127 goto loop;
3733 4128
3734 if (last_ptr) { 4129 /*
4130 * Ok we want to try and use the cluster allocator, so lets look
4131 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
4132 * have tried the cluster allocator plenty of times at this
4133 * point and not have found anything, so we are likely way too
4134 * fragmented for the clustering stuff to find anything, so lets
4135 * just skip it and let the allocator find whatever block it can
4136 * find
4137 */
4138 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
3735 /* 4139 /*
3736 * the refill lock keeps out other 4140 * the refill lock keeps out other
3737 * people trying to start a new cluster 4141 * people trying to start a new cluster
@@ -3806,9 +4210,11 @@ refill_cluster:
3806 spin_unlock(&last_ptr->refill_lock); 4210 spin_unlock(&last_ptr->refill_lock);
3807 goto checks; 4211 goto checks;
3808 } 4212 }
3809 } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 4213 } else if (!cached && loop > LOOP_CACHING_NOWAIT
4214 && !failed_cluster_refill) {
3810 spin_unlock(&last_ptr->refill_lock); 4215 spin_unlock(&last_ptr->refill_lock);
3811 4216
4217 failed_cluster_refill = true;
3812 wait_block_group_cache_progress(block_group, 4218 wait_block_group_cache_progress(block_group,
3813 num_bytes + empty_cluster + empty_size); 4219 num_bytes + empty_cluster + empty_size);
3814 goto have_block_group; 4220 goto have_block_group;
@@ -3820,13 +4226,9 @@ refill_cluster:
3820 * cluster. Free the cluster we've been trying 4226 * cluster. Free the cluster we've been trying
3821 * to use, and go to the next block group 4227 * to use, and go to the next block group
3822 */ 4228 */
3823 if (loop < LOOP_NO_EMPTY_SIZE) { 4229 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3824 btrfs_return_cluster_to_free_space(NULL,
3825 last_ptr);
3826 spin_unlock(&last_ptr->refill_lock);
3827 goto loop;
3828 }
3829 spin_unlock(&last_ptr->refill_lock); 4230 spin_unlock(&last_ptr->refill_lock);
4231 goto loop;
3830 } 4232 }
3831 4233
3832 offset = btrfs_find_space_for_alloc(block_group, search_start, 4234 offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3880,9 +4282,12 @@ checks:
3880 search_start - offset); 4282 search_start - offset);
3881 BUG_ON(offset > search_start); 4283 BUG_ON(offset > search_start);
3882 4284
4285 update_reserved_extents(block_group, num_bytes, 1);
4286
3883 /* we are all good, lets return */ 4287 /* we are all good, lets return */
3884 break; 4288 break;
3885loop: 4289loop:
4290 failed_cluster_refill = false;
3886 btrfs_put_block_group(block_group); 4291 btrfs_put_block_group(block_group);
3887 } 4292 }
3888 up_read(&space_info->groups_sem); 4293 up_read(&space_info->groups_sem);
@@ -3940,21 +4345,32 @@ loop:
3940 return ret; 4345 return ret;
3941} 4346}
3942 4347
3943static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4348static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4349 int dump_block_groups)
3944{ 4350{
3945 struct btrfs_block_group_cache *cache; 4351 struct btrfs_block_group_cache *cache;
3946 4352
4353 spin_lock(&info->lock);
3947 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4354 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
3948 (unsigned long long)(info->total_bytes - info->bytes_used - 4355 (unsigned long long)(info->total_bytes - info->bytes_used -
3949 info->bytes_pinned - info->bytes_reserved), 4356 info->bytes_pinned - info->bytes_reserved -
4357 info->bytes_super),
3950 (info->full) ? "" : "not "); 4358 (info->full) ? "" : "not ");
3951 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4359 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
3952 " may_use=%llu, used=%llu\n", 4360 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4361 "\n",
3953 (unsigned long long)info->total_bytes, 4362 (unsigned long long)info->total_bytes,
3954 (unsigned long long)info->bytes_pinned, 4363 (unsigned long long)info->bytes_pinned,
3955 (unsigned long long)info->bytes_delalloc, 4364 (unsigned long long)info->bytes_delalloc,
3956 (unsigned long long)info->bytes_may_use, 4365 (unsigned long long)info->bytes_may_use,
3957 (unsigned long long)info->bytes_used); 4366 (unsigned long long)info->bytes_used,
4367 (unsigned long long)info->bytes_root,
4368 (unsigned long long)info->bytes_super,
4369 (unsigned long long)info->bytes_reserved);
4370 spin_unlock(&info->lock);
4371
4372 if (!dump_block_groups)
4373 return;
3958 4374
3959 down_read(&info->groups_sem); 4375 down_read(&info->groups_sem);
3960 list_for_each_entry(cache, &info->block_groups, list) { 4376 list_for_each_entry(cache, &info->block_groups, list) {
@@ -3972,12 +4388,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3972 up_read(&info->groups_sem); 4388 up_read(&info->groups_sem);
3973} 4389}
3974 4390
3975static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4391int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3976 struct btrfs_root *root, 4392 struct btrfs_root *root,
3977 u64 num_bytes, u64 min_alloc_size, 4393 u64 num_bytes, u64 min_alloc_size,
3978 u64 empty_size, u64 hint_byte, 4394 u64 empty_size, u64 hint_byte,
3979 u64 search_end, struct btrfs_key *ins, 4395 u64 search_end, struct btrfs_key *ins,
3980 u64 data) 4396 u64 data)
3981{ 4397{
3982 int ret; 4398 int ret;
3983 u64 search_start = 0; 4399 u64 search_start = 0;
@@ -4022,7 +4438,7 @@ again:
4022 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4438 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4023 "wanted %llu\n", (unsigned long long)data, 4439 "wanted %llu\n", (unsigned long long)data,
4024 (unsigned long long)num_bytes); 4440 (unsigned long long)num_bytes);
4025 dump_space_info(sinfo, num_bytes); 4441 dump_space_info(sinfo, num_bytes, 1);
4026 } 4442 }
4027 4443
4028 return ret; 4444 return ret;
@@ -4043,25 +4459,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4043 ret = btrfs_discard_extent(root, start, len); 4459 ret = btrfs_discard_extent(root, start, len);
4044 4460
4045 btrfs_add_free_space(cache, start, len); 4461 btrfs_add_free_space(cache, start, len);
4462 update_reserved_extents(cache, len, 0);
4046 btrfs_put_block_group(cache); 4463 btrfs_put_block_group(cache);
4047 update_reserved_extents(root, start, len, 0);
4048
4049 return ret;
4050}
4051
4052int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4053 struct btrfs_root *root,
4054 u64 num_bytes, u64 min_alloc_size,
4055 u64 empty_size, u64 hint_byte,
4056 u64 search_end, struct btrfs_key *ins,
4057 u64 data)
4058{
4059 int ret;
4060 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
4061 empty_size, hint_byte, search_end, ins,
4062 data);
4063 if (!ret)
4064 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4065 4464
4066 return ret; 4465 return ret;
4067} 4466}
@@ -4222,15 +4621,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4222{ 4621{
4223 int ret; 4622 int ret;
4224 struct btrfs_block_group_cache *block_group; 4623 struct btrfs_block_group_cache *block_group;
4624 struct btrfs_caching_control *caching_ctl;
4625 u64 start = ins->objectid;
4626 u64 num_bytes = ins->offset;
4225 4627
4226 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4628 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4227 cache_block_group(block_group); 4629 cache_block_group(block_group);
4228 wait_event(block_group->caching_q, 4630 caching_ctl = get_caching_control(block_group);
4229 block_group_cache_done(block_group));
4230 4631
4231 ret = btrfs_remove_free_space(block_group, ins->objectid, 4632 if (!caching_ctl) {
4232 ins->offset); 4633 BUG_ON(!block_group_cache_done(block_group));
4233 BUG_ON(ret); 4634 ret = btrfs_remove_free_space(block_group, start, num_bytes);
4635 BUG_ON(ret);
4636 } else {
4637 mutex_lock(&caching_ctl->mutex);
4638
4639 if (start >= caching_ctl->progress) {
4640 ret = add_excluded_extent(root, start, num_bytes);
4641 BUG_ON(ret);
4642 } else if (start + num_bytes <= caching_ctl->progress) {
4643 ret = btrfs_remove_free_space(block_group,
4644 start, num_bytes);
4645 BUG_ON(ret);
4646 } else {
4647 num_bytes = caching_ctl->progress - start;
4648 ret = btrfs_remove_free_space(block_group,
4649 start, num_bytes);
4650 BUG_ON(ret);
4651
4652 start = caching_ctl->progress;
4653 num_bytes = ins->objectid + ins->offset -
4654 caching_ctl->progress;
4655 ret = add_excluded_extent(root, start, num_bytes);
4656 BUG_ON(ret);
4657 }
4658
4659 mutex_unlock(&caching_ctl->mutex);
4660 put_caching_control(caching_ctl);
4661 }
4662
4663 update_reserved_extents(block_group, ins->offset, 1);
4234 btrfs_put_block_group(block_group); 4664 btrfs_put_block_group(block_group);
4235 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4665 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4236 0, owner, offset, ins, 1); 4666 0, owner, offset, ins, 1);
@@ -4254,9 +4684,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4254 int ret; 4684 int ret;
4255 u64 flags = 0; 4685 u64 flags = 0;
4256 4686
4257 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4687 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4258 empty_size, hint_byte, search_end, 4688 empty_size, hint_byte, search_end,
4259 ins, 0); 4689 ins, 0);
4260 if (ret) 4690 if (ret)
4261 return ret; 4691 return ret;
4262 4692
@@ -4267,7 +4697,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4267 } else 4697 } else
4268 BUG_ON(parent > 0); 4698 BUG_ON(parent > 0);
4269 4699
4270 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4271 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4700 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4272 struct btrfs_delayed_extent_op *extent_op; 4701 struct btrfs_delayed_extent_op *extent_op;
4273 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4702 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4346,452 +4775,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4346 return buf; 4775 return buf;
4347} 4776}
4348 4777
4349#if 0 4778struct walk_control {
4350int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4779 u64 refs[BTRFS_MAX_LEVEL];
4351 struct btrfs_root *root, struct extent_buffer *leaf) 4780 u64 flags[BTRFS_MAX_LEVEL];
4352{ 4781 struct btrfs_key update_progress;
4353 u64 disk_bytenr; 4782 int stage;
4354 u64 num_bytes; 4783 int level;
4355 struct btrfs_key key; 4784 int shared_level;
4356 struct btrfs_file_extent_item *fi; 4785 int update_ref;
4357 u32 nritems; 4786 int keep_locks;
4358 int i; 4787 int reada_slot;
4359 int ret; 4788 int reada_count;
4360 4789};
4361 BUG_ON(!btrfs_is_leaf(leaf));
4362 nritems = btrfs_header_nritems(leaf);
4363
4364 for (i = 0; i < nritems; i++) {
4365 cond_resched();
4366 btrfs_item_key_to_cpu(leaf, &key, i);
4367
4368 /* only extents have references, skip everything else */
4369 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4370 continue;
4371
4372 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4373
4374 /* inline extents live in the btree, they don't have refs */
4375 if (btrfs_file_extent_type(leaf, fi) ==
4376 BTRFS_FILE_EXTENT_INLINE)
4377 continue;
4378
4379 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4380
4381 /* holes don't have refs */
4382 if (disk_bytenr == 0)
4383 continue;
4384
4385 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4386 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
4387 leaf->start, 0, key.objectid, 0);
4388 BUG_ON(ret);
4389 }
4390 return 0;
4391}
4392
4393static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4394 struct btrfs_root *root,
4395 struct btrfs_leaf_ref *ref)
4396{
4397 int i;
4398 int ret;
4399 struct btrfs_extent_info *info;
4400 struct refsort *sorted;
4401
4402 if (ref->nritems == 0)
4403 return 0;
4404
4405 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
4406 for (i = 0; i < ref->nritems; i++) {
4407 sorted[i].bytenr = ref->extents[i].bytenr;
4408 sorted[i].slot = i;
4409 }
4410 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
4411
4412 /*
4413 * the items in the ref were sorted when the ref was inserted
4414 * into the ref cache, so this is already in order
4415 */
4416 for (i = 0; i < ref->nritems; i++) {
4417 info = ref->extents + sorted[i].slot;
4418 ret = btrfs_free_extent(trans, root, info->bytenr,
4419 info->num_bytes, ref->bytenr,
4420 ref->owner, ref->generation,
4421 info->objectid, 0);
4422
4423 atomic_inc(&root->fs_info->throttle_gen);
4424 wake_up(&root->fs_info->transaction_throttle);
4425 cond_resched();
4426
4427 BUG_ON(ret);
4428 info++;
4429 }
4430
4431 kfree(sorted);
4432 return 0;
4433}
4434
4435
4436static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
4437 struct btrfs_root *root, u64 start,
4438 u64 len, u32 *refs)
4439{
4440 int ret;
4441
4442 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
4443 BUG_ON(ret);
4444
4445#if 0 /* some debugging code in case we see problems here */
4446 /* if the refs count is one, it won't get increased again. But
4447 * if the ref count is > 1, someone may be decreasing it at
4448 * the same time we are.
4449 */
4450 if (*refs != 1) {
4451 struct extent_buffer *eb = NULL;
4452 eb = btrfs_find_create_tree_block(root, start, len);
4453 if (eb)
4454 btrfs_tree_lock(eb);
4455
4456 mutex_lock(&root->fs_info->alloc_mutex);
4457 ret = lookup_extent_ref(NULL, root, start, len, refs);
4458 BUG_ON(ret);
4459 mutex_unlock(&root->fs_info->alloc_mutex);
4460
4461 if (eb) {
4462 btrfs_tree_unlock(eb);
4463 free_extent_buffer(eb);
4464 }
4465 if (*refs == 1) {
4466 printk(KERN_ERR "btrfs block %llu went down to one "
4467 "during drop_snap\n", (unsigned long long)start);
4468 }
4469
4470 }
4471#endif
4472
4473 cond_resched();
4474 return ret;
4475}
4476 4790
4791#define DROP_REFERENCE 1
4792#define UPDATE_BACKREF 2
4477 4793
4478/* 4794static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4479 * this is used while deleting old snapshots, and it drops the refs 4795 struct btrfs_root *root,
4480 * on a whole subtree starting from a level 1 node. 4796 struct walk_control *wc,
4481 * 4797 struct btrfs_path *path)
4482 * The idea is to sort all the leaf pointers, and then drop the
4483 * ref on all the leaves in order. Most of the time the leaves
4484 * will have ref cache entries, so no leaf IOs will be required to
4485 * find the extents they have references on.
4486 *
4487 * For each leaf, any references it has are also dropped in order
4488 *
4489 * This ends up dropping the references in something close to optimal
4490 * order for reading and modifying the extent allocation tree.
4491 */
4492static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4493 struct btrfs_root *root,
4494 struct btrfs_path *path)
4495{ 4798{
4496 u64 bytenr; 4799 u64 bytenr;
4497 u64 root_owner; 4800 u64 generation;
4498 u64 root_gen; 4801 u64 refs;
4499 struct extent_buffer *eb = path->nodes[1]; 4802 u64 last = 0;
4500 struct extent_buffer *leaf; 4803 u32 nritems;
4501 struct btrfs_leaf_ref *ref; 4804 u32 blocksize;
4502 struct refsort *sorted = NULL; 4805 struct btrfs_key key;
4503 int nritems = btrfs_header_nritems(eb); 4806 struct extent_buffer *eb;
4504 int ret; 4807 int ret;
4505 int i; 4808 int slot;
4506 int refi = 0; 4809 int nread = 0;
4507 int slot = path->slots[1];
4508 u32 blocksize = btrfs_level_size(root, 0);
4509 u32 refs;
4510
4511 if (nritems == 0)
4512 goto out;
4513
4514 root_owner = btrfs_header_owner(eb);
4515 root_gen = btrfs_header_generation(eb);
4516 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
4517 4810
4518 /* 4811 if (path->slots[wc->level] < wc->reada_slot) {
4519 * step one, sort all the leaf pointers so we don't scribble 4812 wc->reada_count = wc->reada_count * 2 / 3;
4520 * randomly into the extent allocation tree 4813 wc->reada_count = max(wc->reada_count, 2);
4521 */ 4814 } else {
4522 for (i = slot; i < nritems; i++) { 4815 wc->reada_count = wc->reada_count * 3 / 2;
4523 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4816 wc->reada_count = min_t(int, wc->reada_count,
4524 sorted[refi].slot = i; 4817 BTRFS_NODEPTRS_PER_BLOCK(root));
4525 refi++;
4526 } 4818 }
4527 4819
4528 /* 4820 eb = path->nodes[wc->level];
4529 * nritems won't be zero, but if we're picking up drop_snapshot 4821 nritems = btrfs_header_nritems(eb);
4530 * after a crash, slot might be > 0, so double check things 4822 blocksize = btrfs_level_size(root, wc->level - 1);
4531 * just in case.
4532 */
4533 if (refi == 0)
4534 goto out;
4535 4823
4536 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4824 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4825 if (nread >= wc->reada_count)
4826 break;
4537 4827
4538 /* 4828 cond_resched();
4539 * the first loop frees everything the leaves point to 4829 bytenr = btrfs_node_blockptr(eb, slot);
4540 */ 4830 generation = btrfs_node_ptr_generation(eb, slot);
4541 for (i = 0; i < refi; i++) {
4542 u64 ptr_gen;
4543 4831
4544 bytenr = sorted[i].bytenr; 4832 if (slot == path->slots[wc->level])
4833 goto reada;
4545 4834
4546 /* 4835 if (wc->stage == UPDATE_BACKREF &&
4547 * check the reference count on this leaf. If it is > 1 4836 generation <= root->root_key.offset)
4548 * we just decrement it below and don't update any
4549 * of the refs the leaf points to.
4550 */
4551 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4552 blocksize, &refs);
4553 BUG_ON(ret);
4554 if (refs != 1)
4555 continue; 4837 continue;
4556 4838
4557 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4839 if (wc->stage == DROP_REFERENCE) {
4558 4840 ret = btrfs_lookup_extent_info(trans, root,
4559 /* 4841 bytenr, blocksize,
4560 * the leaf only had one reference, which means the 4842 &refs, NULL);
4561 * only thing pointing to this leaf is the snapshot
4562 * we're deleting. It isn't possible for the reference
4563 * count to increase again later
4564 *
4565 * The reference cache is checked for the leaf,
4566 * and if found we'll be able to drop any refs held by
4567 * the leaf without needing to read it in.
4568 */
4569 ref = btrfs_lookup_leaf_ref(root, bytenr);
4570 if (ref && ref->generation != ptr_gen) {
4571 btrfs_free_leaf_ref(root, ref);
4572 ref = NULL;
4573 }
4574 if (ref) {
4575 ret = cache_drop_leaf_ref(trans, root, ref);
4576 BUG_ON(ret);
4577 btrfs_remove_leaf_ref(root, ref);
4578 btrfs_free_leaf_ref(root, ref);
4579 } else {
4580 /*
4581 * the leaf wasn't in the reference cache, so
4582 * we have to read it.
4583 */
4584 leaf = read_tree_block(root, bytenr, blocksize,
4585 ptr_gen);
4586 ret = btrfs_drop_leaf_ref(trans, root, leaf);
4587 BUG_ON(ret); 4843 BUG_ON(ret);
4588 free_extent_buffer(leaf); 4844 BUG_ON(refs == 0);
4589 } 4845 if (refs == 1)
4590 atomic_inc(&root->fs_info->throttle_gen); 4846 goto reada;
4591 wake_up(&root->fs_info->transaction_throttle);
4592 cond_resched();
4593 }
4594
4595 /*
4596 * run through the loop again to free the refs on the leaves.
4597 * This is faster than doing it in the loop above because
4598 * the leaves are likely to be clustered together. We end up
4599 * working in nice chunks on the extent allocation tree.
4600 */
4601 for (i = 0; i < refi; i++) {
4602 bytenr = sorted[i].bytenr;
4603 ret = btrfs_free_extent(trans, root, bytenr,
4604 blocksize, eb->start,
4605 root_owner, root_gen, 0, 1);
4606 BUG_ON(ret);
4607
4608 atomic_inc(&root->fs_info->throttle_gen);
4609 wake_up(&root->fs_info->transaction_throttle);
4610 cond_resched();
4611 }
4612out:
4613 kfree(sorted);
4614
4615 /*
4616 * update the path to show we've processed the entire level 1
4617 * node. This will get saved into the root's drop_snapshot_progress
4618 * field so these drops are not repeated again if this transaction
4619 * commits.
4620 */
4621 path->slots[1] = nritems;
4622 return 0;
4623}
4624
4625/*
4626 * helper function for drop_snapshot, this walks down the tree dropping ref
4627 * counts as it goes.
4628 */
4629static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4630 struct btrfs_root *root,
4631 struct btrfs_path *path, int *level)
4632{
4633 u64 root_owner;
4634 u64 root_gen;
4635 u64 bytenr;
4636 u64 ptr_gen;
4637 struct extent_buffer *next;
4638 struct extent_buffer *cur;
4639 struct extent_buffer *parent;
4640 u32 blocksize;
4641 int ret;
4642 u32 refs;
4643
4644 WARN_ON(*level < 0);
4645 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4646 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4647 path->nodes[*level]->len, &refs);
4648 BUG_ON(ret);
4649 if (refs > 1)
4650 goto out;
4651
4652 /*
4653 * walk down to the last node level and free all the leaves
4654 */
4655 while (*level >= 0) {
4656 WARN_ON(*level < 0);
4657 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4658 cur = path->nodes[*level];
4659 4847
4660 if (btrfs_header_level(cur) != *level) 4848 if (!wc->update_ref ||
4661 WARN_ON(1); 4849 generation <= root->root_key.offset)
4662 4850 continue;
4663 if (path->slots[*level] >= 4851 btrfs_node_key_to_cpu(eb, &key, slot);
4664 btrfs_header_nritems(cur)) 4852 ret = btrfs_comp_cpu_keys(&key,
4665 break; 4853 &wc->update_progress);
4666 4854 if (ret < 0)
4667 /* the new code goes down to level 1 and does all the 4855 continue;
4668 * leaves pointed to that node in bulk. So, this check
4669 * for level 0 will always be false.
4670 *
4671 * But, the disk format allows the drop_snapshot_progress
4672 * field in the root to leave things in a state where
4673 * a leaf will need cleaning up here. If someone crashes
4674 * with the old code and then boots with the new code,
4675 * we might find a leaf here.
4676 */
4677 if (*level == 0) {
4678 ret = btrfs_drop_leaf_ref(trans, root, cur);
4679 BUG_ON(ret);
4680 break;
4681 } 4856 }
4682 4857reada:
4683 /* 4858 ret = readahead_tree_block(root, bytenr, blocksize,
4684 * once we get to level one, process the whole node 4859 generation);
4685 * at once, including everything below it. 4860 if (ret)
4686 */
4687 if (*level == 1) {
4688 ret = drop_level_one_refs(trans, root, path);
4689 BUG_ON(ret);
4690 break; 4861 break;
4691 } 4862 last = bytenr + blocksize;
4692 4863 nread++;
4693 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
4694 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4695 blocksize = btrfs_level_size(root, *level - 1);
4696
4697 ret = drop_snap_lookup_refcount(trans, root, bytenr,
4698 blocksize, &refs);
4699 BUG_ON(ret);
4700
4701 /*
4702 * if there is more than one reference, we don't need
4703 * to read that node to drop any references it has. We
4704 * just drop the ref we hold on that node and move on to the
4705 * next slot in this level.
4706 */
4707 if (refs != 1) {
4708 parent = path->nodes[*level];
4709 root_owner = btrfs_header_owner(parent);
4710 root_gen = btrfs_header_generation(parent);
4711 path->slots[*level]++;
4712
4713 ret = btrfs_free_extent(trans, root, bytenr,
4714 blocksize, parent->start,
4715 root_owner, root_gen,
4716 *level - 1, 1);
4717 BUG_ON(ret);
4718
4719 atomic_inc(&root->fs_info->throttle_gen);
4720 wake_up(&root->fs_info->transaction_throttle);
4721 cond_resched();
4722
4723 continue;
4724 }
4725
4726 /*
4727 * we need to keep freeing things in the next level down.
4728 * read the block and loop around to process it
4729 */
4730 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4731 WARN_ON(*level <= 0);
4732 if (path->nodes[*level-1])
4733 free_extent_buffer(path->nodes[*level-1]);
4734 path->nodes[*level-1] = next;
4735 *level = btrfs_header_level(next);
4736 path->slots[*level] = 0;
4737 cond_resched();
4738 } 4864 }
4739out: 4865 wc->reada_slot = slot;
4740 WARN_ON(*level < 0);
4741 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4742
4743 if (path->nodes[*level] == root->node) {
4744 parent = path->nodes[*level];
4745 bytenr = path->nodes[*level]->start;
4746 } else {
4747 parent = path->nodes[*level + 1];
4748 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
4749 }
4750
4751 blocksize = btrfs_level_size(root, *level);
4752 root_owner = btrfs_header_owner(parent);
4753 root_gen = btrfs_header_generation(parent);
4754
4755 /*
4756 * cleanup and free the reference on the last node
4757 * we processed
4758 */
4759 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4760 parent->start, root_owner, root_gen,
4761 *level, 1);
4762 free_extent_buffer(path->nodes[*level]);
4763 path->nodes[*level] = NULL;
4764
4765 *level += 1;
4766 BUG_ON(ret);
4767
4768 cond_resched();
4769 return 0;
4770} 4866}
4771#endif
4772
4773struct walk_control {
4774 u64 refs[BTRFS_MAX_LEVEL];
4775 u64 flags[BTRFS_MAX_LEVEL];
4776 struct btrfs_key update_progress;
4777 int stage;
4778 int level;
4779 int shared_level;
4780 int update_ref;
4781 int keep_locks;
4782};
4783
4784#define DROP_REFERENCE 1
4785#define UPDATE_BACKREF 2
4786 4867
4787/* 4868/*
4788 * hepler to process tree block while walking down the tree. 4869 * hepler to process tree block while walking down the tree.
4789 * 4870 *
4790 * when wc->stage == DROP_REFERENCE, this function checks
4791 * reference count of the block. if the block is shared and
4792 * we need update back refs for the subtree rooted at the
4793 * block, this function changes wc->stage to UPDATE_BACKREF
4794 *
4795 * when wc->stage == UPDATE_BACKREF, this function updates 4871 * when wc->stage == UPDATE_BACKREF, this function updates
4796 * back refs for pointers in the block. 4872 * back refs for pointers in the block.
4797 * 4873 *
@@ -4804,7 +4880,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4804{ 4880{
4805 int level = wc->level; 4881 int level = wc->level;
4806 struct extent_buffer *eb = path->nodes[level]; 4882 struct extent_buffer *eb = path->nodes[level];
4807 struct btrfs_key key;
4808 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4883 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4809 int ret; 4884 int ret;
4810 4885
@@ -4827,21 +4902,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4827 BUG_ON(wc->refs[level] == 0); 4902 BUG_ON(wc->refs[level] == 0);
4828 } 4903 }
4829 4904
4830 if (wc->stage == DROP_REFERENCE &&
4831 wc->update_ref && wc->refs[level] > 1) {
4832 BUG_ON(eb == root->node);
4833 BUG_ON(path->slots[level] > 0);
4834 if (level == 0)
4835 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4836 else
4837 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4838 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4839 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4840 wc->stage = UPDATE_BACKREF;
4841 wc->shared_level = level;
4842 }
4843 }
4844
4845 if (wc->stage == DROP_REFERENCE) { 4905 if (wc->stage == DROP_REFERENCE) {
4846 if (wc->refs[level] > 1) 4906 if (wc->refs[level] > 1)
4847 return 1; 4907 return 1;
@@ -4878,6 +4938,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4878} 4938}
4879 4939
4880/* 4940/*
4941 * hepler to process tree block pointer.
4942 *
4943 * when wc->stage == DROP_REFERENCE, this function checks
4944 * reference count of the block pointed to. if the block
4945 * is shared and we need update back refs for the subtree
4946 * rooted at the block, this function changes wc->stage to
4947 * UPDATE_BACKREF. if the block is shared and there is no
4948 * need to update back, this function drops the reference
4949 * to the block.
4950 *
4951 * NOTE: return value 1 means we should stop walking down.
4952 */
4953static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4954 struct btrfs_root *root,
4955 struct btrfs_path *path,
4956 struct walk_control *wc)
4957{
4958 u64 bytenr;
4959 u64 generation;
4960 u64 parent;
4961 u32 blocksize;
4962 struct btrfs_key key;
4963 struct extent_buffer *next;
4964 int level = wc->level;
4965 int reada = 0;
4966 int ret = 0;
4967
4968 generation = btrfs_node_ptr_generation(path->nodes[level],
4969 path->slots[level]);
4970 /*
4971 * if the lower level block was created before the snapshot
4972 * was created, we know there is no need to update back refs
4973 * for the subtree
4974 */
4975 if (wc->stage == UPDATE_BACKREF &&
4976 generation <= root->root_key.offset)
4977 return 1;
4978
4979 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4980 blocksize = btrfs_level_size(root, level - 1);
4981
4982 next = btrfs_find_tree_block(root, bytenr, blocksize);
4983 if (!next) {
4984 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
4985 reada = 1;
4986 }
4987 btrfs_tree_lock(next);
4988 btrfs_set_lock_blocking(next);
4989
4990 if (wc->stage == DROP_REFERENCE) {
4991 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4992 &wc->refs[level - 1],
4993 &wc->flags[level - 1]);
4994 BUG_ON(ret);
4995 BUG_ON(wc->refs[level - 1] == 0);
4996
4997 if (wc->refs[level - 1] > 1) {
4998 if (!wc->update_ref ||
4999 generation <= root->root_key.offset)
5000 goto skip;
5001
5002 btrfs_node_key_to_cpu(path->nodes[level], &key,
5003 path->slots[level]);
5004 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5005 if (ret < 0)
5006 goto skip;
5007
5008 wc->stage = UPDATE_BACKREF;
5009 wc->shared_level = level - 1;
5010 }
5011 }
5012
5013 if (!btrfs_buffer_uptodate(next, generation)) {
5014 btrfs_tree_unlock(next);
5015 free_extent_buffer(next);
5016 next = NULL;
5017 }
5018
5019 if (!next) {
5020 if (reada && level == 1)
5021 reada_walk_down(trans, root, wc, path);
5022 next = read_tree_block(root, bytenr, blocksize, generation);
5023 btrfs_tree_lock(next);
5024 btrfs_set_lock_blocking(next);
5025 }
5026
5027 level--;
5028 BUG_ON(level != btrfs_header_level(next));
5029 path->nodes[level] = next;
5030 path->slots[level] = 0;
5031 path->locks[level] = 1;
5032 wc->level = level;
5033 if (wc->level == 1)
5034 wc->reada_slot = 0;
5035 return 0;
5036skip:
5037 wc->refs[level - 1] = 0;
5038 wc->flags[level - 1] = 0;
5039
5040 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5041 parent = path->nodes[level]->start;
5042 } else {
5043 BUG_ON(root->root_key.objectid !=
5044 btrfs_header_owner(path->nodes[level]));
5045 parent = 0;
5046 }
5047
5048 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
5049 root->root_key.objectid, level - 1, 0);
5050 BUG_ON(ret);
5051
5052 btrfs_tree_unlock(next);
5053 free_extent_buffer(next);
5054 return 1;
5055}
5056
5057/*
4881 * hepler to process tree block while walking up the tree. 5058 * hepler to process tree block while walking up the tree.
4882 * 5059 *
4883 * when wc->stage == DROP_REFERENCE, this function drops 5060 * when wc->stage == DROP_REFERENCE, this function drops
@@ -4904,7 +5081,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4904 if (level < wc->shared_level) 5081 if (level < wc->shared_level)
4905 goto out; 5082 goto out;
4906 5083
4907 BUG_ON(wc->refs[level] <= 1);
4908 ret = find_next_key(path, level + 1, &wc->update_progress); 5084 ret = find_next_key(path, level + 1, &wc->update_progress);
4909 if (ret > 0) 5085 if (ret > 0)
4910 wc->update_ref = 0; 5086 wc->update_ref = 0;
@@ -4935,8 +5111,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4935 path->locks[level] = 0; 5111 path->locks[level] = 0;
4936 return 1; 5112 return 1;
4937 } 5113 }
4938 } else {
4939 BUG_ON(level != 0);
4940 } 5114 }
4941 } 5115 }
4942 5116
@@ -4989,17 +5163,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4989 struct btrfs_path *path, 5163 struct btrfs_path *path,
4990 struct walk_control *wc) 5164 struct walk_control *wc)
4991{ 5165{
4992 struct extent_buffer *next;
4993 struct extent_buffer *cur;
4994 u64 bytenr;
4995 u64 ptr_gen;
4996 u32 blocksize;
4997 int level = wc->level; 5166 int level = wc->level;
4998 int ret; 5167 int ret;
4999 5168
5000 while (level >= 0) { 5169 while (level >= 0) {
5001 cur = path->nodes[level]; 5170 if (path->slots[level] >=
5002 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 5171 btrfs_header_nritems(path->nodes[level]))
5172 break;
5003 5173
5004 ret = walk_down_proc(trans, root, path, wc); 5174 ret = walk_down_proc(trans, root, path, wc);
5005 if (ret > 0) 5175 if (ret > 0)
@@ -5008,20 +5178,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5008 if (level == 0) 5178 if (level == 0)
5009 break; 5179 break;
5010 5180
5011 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 5181 ret = do_walk_down(trans, root, path, wc);
5012 blocksize = btrfs_level_size(root, level - 1); 5182 if (ret > 0) {
5013 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 5183 path->slots[level]++;
5014 5184 continue;
5015 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 5185 }
5016 btrfs_tree_lock(next); 5186 level = wc->level;
5017 btrfs_set_lock_blocking(next);
5018
5019 level--;
5020 BUG_ON(level != btrfs_header_level(next));
5021 path->nodes[level] = next;
5022 path->slots[level] = 0;
5023 path->locks[level] = 1;
5024 wc->level = level;
5025 } 5187 }
5026 return 0; 5188 return 0;
5027} 5189}
@@ -5111,9 +5273,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5111 err = ret; 5273 err = ret;
5112 goto out; 5274 goto out;
5113 } 5275 }
5114 btrfs_node_key_to_cpu(path->nodes[level], &key, 5276 WARN_ON(ret > 0);
5115 path->slots[level]);
5116 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
5117 5277
5118 /* 5278 /*
5119 * unlock our path, this is safe because only this 5279 * unlock our path, this is safe because only this
@@ -5148,6 +5308,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5148 wc->stage = DROP_REFERENCE; 5308 wc->stage = DROP_REFERENCE;
5149 wc->update_ref = update_ref; 5309 wc->update_ref = update_ref;
5150 wc->keep_locks = 0; 5310 wc->keep_locks = 0;
5311 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5151 5312
5152 while (1) { 5313 while (1) {
5153 ret = walk_down_tree(trans, root, path, wc); 5314 ret = walk_down_tree(trans, root, path, wc);
@@ -5200,9 +5361,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5200 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5361 ret = btrfs_del_root(trans, tree_root, &root->root_key);
5201 BUG_ON(ret); 5362 BUG_ON(ret);
5202 5363
5203 free_extent_buffer(root->node); 5364 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5204 free_extent_buffer(root->commit_root); 5365 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
5205 kfree(root); 5366 NULL, NULL);
5367 BUG_ON(ret < 0);
5368 if (ret > 0) {
5369 ret = btrfs_del_orphan_item(trans, tree_root,
5370 root->root_key.objectid);
5371 BUG_ON(ret);
5372 }
5373 }
5374
5375 if (root->in_radix) {
5376 btrfs_free_fs_root(tree_root->fs_info, root);
5377 } else {
5378 free_extent_buffer(root->node);
5379 free_extent_buffer(root->commit_root);
5380 kfree(root);
5381 }
5206out: 5382out:
5207 btrfs_end_transaction(trans, tree_root); 5383 btrfs_end_transaction(trans, tree_root);
5208 kfree(wc); 5384 kfree(wc);
@@ -5254,6 +5430,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5254 wc->stage = DROP_REFERENCE; 5430 wc->stage = DROP_REFERENCE;
5255 wc->update_ref = 0; 5431 wc->update_ref = 0;
5256 wc->keep_locks = 1; 5432 wc->keep_locks = 1;
5433 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
5257 5434
5258 while (1) { 5435 while (1) {
5259 wret = walk_down_tree(trans, root, path, wc); 5436 wret = walk_down_tree(trans, root, path, wc);
@@ -5396,9 +5573,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
5396 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5573 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
5397 while (1) { 5574 while (1) {
5398 int ret; 5575 int ret;
5399 spin_lock(&em_tree->lock); 5576 write_lock(&em_tree->lock);
5400 ret = add_extent_mapping(em_tree, em); 5577 ret = add_extent_mapping(em_tree, em);
5401 spin_unlock(&em_tree->lock); 5578 write_unlock(&em_tree->lock);
5402 if (ret != -EEXIST) { 5579 if (ret != -EEXIST) {
5403 free_extent_map(em); 5580 free_extent_map(em);
5404 break; 5581 break;
@@ -6841,287 +7018,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6841 return 0; 7018 return 0;
6842} 7019}
6843 7020
6844#if 0 7021/*
6845static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 7022 * checks to see if its even possible to relocate this block group.
6846 struct btrfs_root *root, 7023 *
6847 u64 objectid, u64 size) 7024 * @return - -1 if it's not a good idea to relocate this block group, 0 if its
6848{ 7025 * ok to go ahead and try.
6849 struct btrfs_path *path; 7026 */
6850 struct btrfs_inode_item *item; 7027int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
6851 struct extent_buffer *leaf;
6852 int ret;
6853
6854 path = btrfs_alloc_path();
6855 if (!path)
6856 return -ENOMEM;
6857
6858 path->leave_spinning = 1;
6859 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
6860 if (ret)
6861 goto out;
6862
6863 leaf = path->nodes[0];
6864 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
6865 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
6866 btrfs_set_inode_generation(leaf, item, 1);
6867 btrfs_set_inode_size(leaf, item, size);
6868 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
6869 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
6870 btrfs_mark_buffer_dirty(leaf);
6871 btrfs_release_path(root, path);
6872out:
6873 btrfs_free_path(path);
6874 return ret;
6875}
6876
6877static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
6878 struct btrfs_block_group_cache *group)
6879{ 7028{
6880 struct inode *inode = NULL; 7029 struct btrfs_block_group_cache *block_group;
6881 struct btrfs_trans_handle *trans; 7030 struct btrfs_space_info *space_info;
6882 struct btrfs_root *root; 7031 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
6883 struct btrfs_key root_key; 7032 struct btrfs_device *device;
6884 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 7033 int full = 0;
6885 int err = 0; 7034 int ret = 0;
6886 7035
6887 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 7036 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
6888 root_key.type = BTRFS_ROOT_ITEM_KEY;
6889 root_key.offset = (u64)-1;
6890 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
6891 if (IS_ERR(root))
6892 return ERR_CAST(root);
6893 7037
6894 trans = btrfs_start_transaction(root, 1); 7038 /* odd, couldn't find the block group, leave it alone */
6895 BUG_ON(!trans); 7039 if (!block_group)
7040 return -1;
6896 7041
6897 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 7042 /* no bytes used, we're good */
6898 if (err) 7043 if (!btrfs_block_group_used(&block_group->item))
6899 goto out; 7044 goto out;
6900 7045
6901 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 7046 space_info = block_group->space_info;
6902 BUG_ON(err); 7047 spin_lock(&space_info->lock);
6903
6904 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
6905 group->key.offset, 0, group->key.offset,
6906 0, 0, 0);
6907 BUG_ON(err);
6908
6909 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
6910 if (inode->i_state & I_NEW) {
6911 BTRFS_I(inode)->root = root;
6912 BTRFS_I(inode)->location.objectid = objectid;
6913 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
6914 BTRFS_I(inode)->location.offset = 0;
6915 btrfs_read_locked_inode(inode);
6916 unlock_new_inode(inode);
6917 BUG_ON(is_bad_inode(inode));
6918 } else {
6919 BUG_ON(1);
6920 }
6921 BTRFS_I(inode)->index_cnt = group->key.objectid;
6922
6923 err = btrfs_orphan_add(trans, inode);
6924out:
6925 btrfs_end_transaction(trans, root);
6926 if (err) {
6927 if (inode)
6928 iput(inode);
6929 inode = ERR_PTR(err);
6930 }
6931 return inode;
6932}
6933
6934int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
6935{
6936
6937 struct btrfs_ordered_sum *sums;
6938 struct btrfs_sector_sum *sector_sum;
6939 struct btrfs_ordered_extent *ordered;
6940 struct btrfs_root *root = BTRFS_I(inode)->root;
6941 struct list_head list;
6942 size_t offset;
6943 int ret;
6944 u64 disk_bytenr;
6945
6946 INIT_LIST_HEAD(&list);
6947
6948 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
6949 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
6950
6951 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
6952 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
6953 disk_bytenr + len - 1, &list);
6954
6955 while (!list_empty(&list)) {
6956 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
6957 list_del_init(&sums->list);
6958
6959 sector_sum = sums->sums;
6960 sums->bytenr = ordered->start;
6961 7048
6962 offset = 0; 7049 full = space_info->full;
6963 while (offset < sums->len) {
6964 sector_sum->bytenr += ordered->start - disk_bytenr;
6965 sector_sum++;
6966 offset += root->sectorsize;
6967 }
6968 7050
6969 btrfs_add_ordered_sum(inode, ordered, sums); 7051 /*
7052 * if this is the last block group we have in this space, we can't
7053 * relocate it unless we're able to allocate a new chunk below.
7054 *
7055 * Otherwise, we need to make sure we have room in the space to handle
7056 * all of the extents from this block group. If we can, we're good
7057 */
7058 if ((space_info->total_bytes != block_group->key.offset) &&
7059 (space_info->bytes_used + space_info->bytes_reserved +
7060 space_info->bytes_pinned + space_info->bytes_readonly +
7061 btrfs_block_group_used(&block_group->item) <
7062 space_info->total_bytes)) {
7063 spin_unlock(&space_info->lock);
7064 goto out;
6970 } 7065 }
6971 btrfs_put_ordered_extent(ordered); 7066 spin_unlock(&space_info->lock);
6972 return 0;
6973}
6974
6975int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
6976{
6977 struct btrfs_trans_handle *trans;
6978 struct btrfs_path *path;
6979 struct btrfs_fs_info *info = root->fs_info;
6980 struct extent_buffer *leaf;
6981 struct inode *reloc_inode;
6982 struct btrfs_block_group_cache *block_group;
6983 struct btrfs_key key;
6984 u64 skipped;
6985 u64 cur_byte;
6986 u64 total_found;
6987 u32 nritems;
6988 int ret;
6989 int progress;
6990 int pass = 0;
6991
6992 root = root->fs_info->extent_root;
6993
6994 block_group = btrfs_lookup_block_group(info, group_start);
6995 BUG_ON(!block_group);
6996
6997 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
6998 (unsigned long long)block_group->key.objectid,
6999 (unsigned long long)block_group->flags);
7000
7001 path = btrfs_alloc_path();
7002 BUG_ON(!path);
7003
7004 reloc_inode = create_reloc_inode(info, block_group);
7005 BUG_ON(IS_ERR(reloc_inode));
7006
7007 __alloc_chunk_for_shrink(root, block_group, 1);
7008 set_block_group_readonly(block_group);
7009
7010 btrfs_start_delalloc_inodes(info->tree_root);
7011 btrfs_wait_ordered_extents(info->tree_root, 0);
7012again:
7013 skipped = 0;
7014 total_found = 0;
7015 progress = 0;
7016 key.objectid = block_group->key.objectid;
7017 key.offset = 0;
7018 key.type = 0;
7019 cur_byte = key.objectid;
7020
7021 trans = btrfs_start_transaction(info->tree_root, 1);
7022 btrfs_commit_transaction(trans, info->tree_root);
7023 7067
7024 mutex_lock(&root->fs_info->cleaner_mutex); 7068 /*
7025 btrfs_clean_old_snapshots(info->tree_root); 7069 * ok we don't have enough space, but maybe we have free space on our
7026 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 7070 * devices to allocate new chunks for relocation, so loop through our
7027 mutex_unlock(&root->fs_info->cleaner_mutex); 7071 * alloc devices and guess if we have enough space. However, if we
7072 * were marked as full, then we know there aren't enough chunks, and we
7073 * can just return.
7074 */
7075 ret = -1;
7076 if (full)
7077 goto out;
7028 7078
7029 trans = btrfs_start_transaction(info->tree_root, 1); 7079 mutex_lock(&root->fs_info->chunk_mutex);
7030 btrfs_commit_transaction(trans, info->tree_root); 7080 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
7081 u64 min_free = btrfs_block_group_used(&block_group->item);
7082 u64 dev_offset, max_avail;
7031 7083
7032 while (1) { 7084 /*
7033 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 7085 * check to make sure we can actually find a chunk with enough
7034 if (ret < 0) 7086 * space to fit our block group in.
7035 goto out; 7087 */
7036next: 7088 if (device->total_bytes > device->bytes_used + min_free) {
7037 leaf = path->nodes[0]; 7089 ret = find_free_dev_extent(NULL, device, min_free,
7038 nritems = btrfs_header_nritems(leaf); 7090 &dev_offset, &max_avail);
7039 if (path->slots[0] >= nritems) { 7091 if (!ret)
7040 ret = btrfs_next_leaf(root, path);
7041 if (ret < 0)
7042 goto out;
7043 if (ret == 1) {
7044 ret = 0;
7045 break; 7092 break;
7046 } 7093 ret = -1;
7047 leaf = path->nodes[0];
7048 nritems = btrfs_header_nritems(leaf);
7049 }
7050
7051 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7052
7053 if (key.objectid >= block_group->key.objectid +
7054 block_group->key.offset)
7055 break;
7056
7057 if (progress && need_resched()) {
7058 btrfs_release_path(root, path);
7059 cond_resched();
7060 progress = 0;
7061 continue;
7062 }
7063 progress = 1;
7064
7065 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
7066 key.objectid + key.offset <= cur_byte) {
7067 path->slots[0]++;
7068 goto next;
7069 }
7070
7071 total_found++;
7072 cur_byte = key.objectid + key.offset;
7073 btrfs_release_path(root, path);
7074
7075 __alloc_chunk_for_shrink(root, block_group, 0);
7076 ret = relocate_one_extent(root, path, &key, block_group,
7077 reloc_inode, pass);
7078 BUG_ON(ret < 0);
7079 if (ret > 0)
7080 skipped++;
7081
7082 key.objectid = cur_byte;
7083 key.type = 0;
7084 key.offset = 0;
7085 }
7086
7087 btrfs_release_path(root, path);
7088
7089 if (pass == 0) {
7090 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
7091 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
7092 }
7093
7094 if (total_found > 0) {
7095 printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
7096 (unsigned long long)total_found, pass);
7097 pass++;
7098 if (total_found == skipped && pass > 2) {
7099 iput(reloc_inode);
7100 reloc_inode = create_reloc_inode(info, block_group);
7101 pass = 0;
7102 } 7094 }
7103 goto again;
7104 } 7095 }
7105 7096 mutex_unlock(&root->fs_info->chunk_mutex);
7106 /* delete reloc_inode */
7107 iput(reloc_inode);
7108
7109 /* unpin extents in this range */
7110 trans = btrfs_start_transaction(info->tree_root, 1);
7111 btrfs_commit_transaction(trans, info->tree_root);
7112
7113 spin_lock(&block_group->lock);
7114 WARN_ON(block_group->pinned > 0);
7115 WARN_ON(block_group->reserved > 0);
7116 WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
7117 spin_unlock(&block_group->lock);
7118 btrfs_put_block_group(block_group);
7119 ret = 0;
7120out: 7097out:
7121 btrfs_free_path(path); 7098 btrfs_put_block_group(block_group);
7122 return ret; 7099 return ret;
7123} 7100}
7124#endif
7125 7101
7126static int find_first_block_group(struct btrfs_root *root, 7102static int find_first_block_group(struct btrfs_root *root,
7127 struct btrfs_path *path, struct btrfs_key *key) 7103 struct btrfs_path *path, struct btrfs_key *key)
@@ -7164,8 +7140,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7164{ 7140{
7165 struct btrfs_block_group_cache *block_group; 7141 struct btrfs_block_group_cache *block_group;
7166 struct btrfs_space_info *space_info; 7142 struct btrfs_space_info *space_info;
7143 struct btrfs_caching_control *caching_ctl;
7167 struct rb_node *n; 7144 struct rb_node *n;
7168 7145
7146 down_write(&info->extent_commit_sem);
7147 while (!list_empty(&info->caching_block_groups)) {
7148 caching_ctl = list_entry(info->caching_block_groups.next,
7149 struct btrfs_caching_control, list);
7150 list_del(&caching_ctl->list);
7151 put_caching_control(caching_ctl);
7152 }
7153 up_write(&info->extent_commit_sem);
7154
7169 spin_lock(&info->block_group_cache_lock); 7155 spin_lock(&info->block_group_cache_lock);
7170 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7156 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
7171 block_group = rb_entry(n, struct btrfs_block_group_cache, 7157 block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7179,8 +7165,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7179 up_write(&block_group->space_info->groups_sem); 7165 up_write(&block_group->space_info->groups_sem);
7180 7166
7181 if (block_group->cached == BTRFS_CACHE_STARTED) 7167 if (block_group->cached == BTRFS_CACHE_STARTED)
7182 wait_event(block_group->caching_q, 7168 wait_block_group_cache_done(block_group);
7183 block_group_cache_done(block_group));
7184 7169
7185 btrfs_remove_free_space_cache(block_group); 7170 btrfs_remove_free_space_cache(block_group);
7186 7171
@@ -7250,7 +7235,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7250 spin_lock_init(&cache->lock); 7235 spin_lock_init(&cache->lock);
7251 spin_lock_init(&cache->tree_lock); 7236 spin_lock_init(&cache->tree_lock);
7252 cache->fs_info = info; 7237 cache->fs_info = info;
7253 init_waitqueue_head(&cache->caching_q);
7254 INIT_LIST_HEAD(&cache->list); 7238 INIT_LIST_HEAD(&cache->list);
7255 INIT_LIST_HEAD(&cache->cluster_list); 7239 INIT_LIST_HEAD(&cache->cluster_list);
7256 7240
@@ -7272,8 +7256,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7272 cache->flags = btrfs_block_group_flags(&cache->item); 7256 cache->flags = btrfs_block_group_flags(&cache->item);
7273 cache->sectorsize = root->sectorsize; 7257 cache->sectorsize = root->sectorsize;
7274 7258
7275 remove_sb_from_cache(root, cache);
7276
7277 /* 7259 /*
7278 * check for two cases, either we are full, and therefore 7260 * check for two cases, either we are full, and therefore
7279 * don't need to bother with the caching work since we won't 7261 * don't need to bother with the caching work since we won't
@@ -7282,13 +7264,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7282 * time, particularly in the full case. 7264 * time, particularly in the full case.
7283 */ 7265 */
7284 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7266 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7267 exclude_super_stripes(root, cache);
7268 cache->last_byte_to_unpin = (u64)-1;
7285 cache->cached = BTRFS_CACHE_FINISHED; 7269 cache->cached = BTRFS_CACHE_FINISHED;
7270 free_excluded_extents(root, cache);
7286 } else if (btrfs_block_group_used(&cache->item) == 0) { 7271 } else if (btrfs_block_group_used(&cache->item) == 0) {
7272 exclude_super_stripes(root, cache);
7273 cache->last_byte_to_unpin = (u64)-1;
7287 cache->cached = BTRFS_CACHE_FINISHED; 7274 cache->cached = BTRFS_CACHE_FINISHED;
7288 add_new_free_space(cache, root->fs_info, 7275 add_new_free_space(cache, root->fs_info,
7289 found_key.objectid, 7276 found_key.objectid,
7290 found_key.objectid + 7277 found_key.objectid +
7291 found_key.offset); 7278 found_key.offset);
7279 free_excluded_extents(root, cache);
7292 } 7280 }
7293 7281
7294 ret = update_space_info(info, cache->flags, found_key.offset, 7282 ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7296,6 +7284,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7296 &space_info); 7284 &space_info);
7297 BUG_ON(ret); 7285 BUG_ON(ret);
7298 cache->space_info = space_info; 7286 cache->space_info = space_info;
7287 spin_lock(&cache->space_info->lock);
7288 cache->space_info->bytes_super += cache->bytes_super;
7289 spin_unlock(&cache->space_info->lock);
7290
7299 down_write(&space_info->groups_sem); 7291 down_write(&space_info->groups_sem);
7300 list_add_tail(&cache->list, &space_info->block_groups); 7292 list_add_tail(&cache->list, &space_info->block_groups);
7301 up_write(&space_info->groups_sem); 7293 up_write(&space_info->groups_sem);
@@ -7345,7 +7337,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7345 atomic_set(&cache->count, 1); 7337 atomic_set(&cache->count, 1);
7346 spin_lock_init(&cache->lock); 7338 spin_lock_init(&cache->lock);
7347 spin_lock_init(&cache->tree_lock); 7339 spin_lock_init(&cache->tree_lock);
7348 init_waitqueue_head(&cache->caching_q);
7349 INIT_LIST_HEAD(&cache->list); 7340 INIT_LIST_HEAD(&cache->list);
7350 INIT_LIST_HEAD(&cache->cluster_list); 7341 INIT_LIST_HEAD(&cache->cluster_list);
7351 7342
@@ -7354,15 +7345,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7354 cache->flags = type; 7345 cache->flags = type;
7355 btrfs_set_block_group_flags(&cache->item, type); 7346 btrfs_set_block_group_flags(&cache->item, type);
7356 7347
7348 cache->last_byte_to_unpin = (u64)-1;
7357 cache->cached = BTRFS_CACHE_FINISHED; 7349 cache->cached = BTRFS_CACHE_FINISHED;
7358 remove_sb_from_cache(root, cache); 7350 exclude_super_stripes(root, cache);
7359 7351
7360 add_new_free_space(cache, root->fs_info, chunk_offset, 7352 add_new_free_space(cache, root->fs_info, chunk_offset,
7361 chunk_offset + size); 7353 chunk_offset + size);
7362 7354
7355 free_excluded_extents(root, cache);
7356
7363 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7357 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7364 &cache->space_info); 7358 &cache->space_info);
7365 BUG_ON(ret); 7359 BUG_ON(ret);
7360
7361 spin_lock(&cache->space_info->lock);
7362 cache->space_info->bytes_super += cache->bytes_super;
7363 spin_unlock(&cache->space_info->lock);
7364
7366 down_write(&cache->space_info->groups_sem); 7365 down_write(&cache->space_info->groups_sem);
7367 list_add_tail(&cache->list, &cache->space_info->block_groups); 7366 list_add_tail(&cache->list, &cache->space_info->block_groups);
7368 up_write(&cache->space_info->groups_sem); 7367 up_write(&cache->space_info->groups_sem);
@@ -7428,8 +7427,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7428 up_write(&block_group->space_info->groups_sem); 7427 up_write(&block_group->space_info->groups_sem);
7429 7428
7430 if (block_group->cached == BTRFS_CACHE_STARTED) 7429 if (block_group->cached == BTRFS_CACHE_STARTED)
7431 wait_event(block_group->caching_q, 7430 wait_block_group_cache_done(block_group);
7432 block_group_cache_done(block_group));
7433 7431
7434 btrfs_remove_free_space_cache(block_group); 7432 btrfs_remove_free_space_cache(block_group);
7435 7433