aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/btrfs/extent-tree.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c3159
1 files changed, 1170 insertions, 1989 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 32d094002a57..71cd456fdb60 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,11 +33,28 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
39static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve, int sinfo);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 58static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 59 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 60 u64 bytenr, u64 num_bytes, u64 parent,
@@ -77,7 +94,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
77 return (cache->flags & bits) == bits; 94 return (cache->flags & bits) == bits;
78} 95}
79 96
80void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 97static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
81{ 98{
82 atomic_inc(&cache->count); 99 atomic_inc(&cache->count);
83} 100}
@@ -88,6 +105,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
88 WARN_ON(cache->pinned > 0); 105 WARN_ON(cache->pinned > 0);
89 WARN_ON(cache->reserved > 0); 106 WARN_ON(cache->reserved > 0);
90 WARN_ON(cache->reserved_pinned > 0); 107 WARN_ON(cache->reserved_pinned > 0);
108 kfree(cache->free_space_ctl);
91 kfree(cache); 109 kfree(cache);
92 } 110 }
93} 111}
@@ -242,6 +260,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
242 return NULL; 260 return NULL;
243 } 261 }
244 262
263 /* We're loading it the fast way, so we don't have a caching_ctl. */
264 if (!cache->caching_ctl) {
265 spin_unlock(&cache->lock);
266 return NULL;
267 }
268
245 ctl = cache->caching_ctl; 269 ctl = cache->caching_ctl;
246 atomic_inc(&ctl->count); 270 atomic_inc(&ctl->count);
247 spin_unlock(&cache->lock); 271 spin_unlock(&cache->lock);
@@ -314,11 +338,6 @@ static int caching_kthread(void *data)
314 if (!path) 338 if (!path)
315 return -ENOMEM; 339 return -ENOMEM;
316 340
317 exclude_super_stripes(extent_root, block_group);
318 spin_lock(&block_group->space_info->lock);
319 block_group->space_info->bytes_readonly += block_group->bytes_super;
320 spin_unlock(&block_group->space_info->lock);
321
322 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
323 342
324 /* 343 /*
@@ -329,7 +348,7 @@ static int caching_kthread(void *data)
329 */ 348 */
330 path->skip_locking = 1; 349 path->skip_locking = 1;
331 path->search_commit_root = 1; 350 path->search_commit_root = 1;
332 path->reada = 2; 351 path->reada = 1;
333 352
334 key.objectid = last; 353 key.objectid = last;
335 key.offset = 0; 354 key.offset = 0;
@@ -347,8 +366,7 @@ again:
347 nritems = btrfs_header_nritems(leaf); 366 nritems = btrfs_header_nritems(leaf);
348 367
349 while (1) { 368 while (1) {
350 smp_mb(); 369 if (btrfs_fs_closing(fs_info) > 1) {
351 if (fs_info->closing > 1) {
352 last = (u64)-1; 370 last = (u64)-1;
353 break; 371 break;
354 } 372 }
@@ -360,15 +378,18 @@ again:
360 if (ret) 378 if (ret)
361 break; 379 break;
362 380
363 caching_ctl->progress = last; 381 if (need_resched() ||
364 btrfs_release_path(extent_root, path); 382 btrfs_next_leaf(extent_root, path)) {
365 up_read(&fs_info->extent_commit_sem); 383 caching_ctl->progress = last;
366 mutex_unlock(&caching_ctl->mutex); 384 btrfs_release_path(path);
367 if (btrfs_transaction_in_commit(fs_info)) 385 up_read(&fs_info->extent_commit_sem);
368 schedule_timeout(1); 386 mutex_unlock(&caching_ctl->mutex);
369 else
370 cond_resched(); 387 cond_resched();
371 goto again; 388 goto again;
389 }
390 leaf = path->nodes[0];
391 nritems = btrfs_header_nritems(leaf);
392 continue;
372 } 393 }
373 394
374 if (key.objectid < block_group->key.objectid) { 395 if (key.objectid < block_group->key.objectid) {
@@ -421,7 +442,10 @@ err:
421 return 0; 442 return 0;
422} 443}
423 444
424static int cache_block_group(struct btrfs_block_group_cache *cache) 445static int cache_block_group(struct btrfs_block_group_cache *cache,
446 struct btrfs_trans_handle *trans,
447 struct btrfs_root *root,
448 int load_cache_only)
425{ 449{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 450 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 451 struct btrfs_caching_control *caching_ctl;
@@ -432,7 +456,42 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 456 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 457 return 0;
434 458
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 459 /*
460 * We can't do the read from on-disk cache during a commit since we need
461 * to have the normal tree locking. Also if we are currently trying to
462 * allocate blocks for the tree root we can't do the fast caching since
463 * we likely hold important locks.
464 */
465 if (trans && (!trans->transaction->in_commit) &&
466 (root && root != root->fs_info->tree_root)) {
467 spin_lock(&cache->lock);
468 if (cache->cached != BTRFS_CACHE_NO) {
469 spin_unlock(&cache->lock);
470 return 0;
471 }
472 cache->cached = BTRFS_CACHE_STARTED;
473 spin_unlock(&cache->lock);
474
475 ret = load_free_space_cache(fs_info, cache);
476
477 spin_lock(&cache->lock);
478 if (ret == 1) {
479 cache->cached = BTRFS_CACHE_FINISHED;
480 cache->last_byte_to_unpin = (u64)-1;
481 } else {
482 cache->cached = BTRFS_CACHE_NO;
483 }
484 spin_unlock(&cache->lock);
485 if (ret == 1) {
486 free_excluded_extents(fs_info->extent_root, cache);
487 return 0;
488 }
489 }
490
491 if (load_cache_only)
492 return 0;
493
494 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
436 BUG_ON(!caching_ctl); 495 BUG_ON(!caching_ctl);
437 496
438 INIT_LIST_HEAD(&caching_ctl->list); 497 INIT_LIST_HEAD(&caching_ctl->list);
@@ -509,7 +568,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
509 568
510 rcu_read_lock(); 569 rcu_read_lock();
511 list_for_each_entry_rcu(found, head, list) { 570 list_for_each_entry_rcu(found, head, list) {
512 if (found->flags == flags) { 571 if (found->flags & flags) {
513 rcu_read_unlock(); 572 rcu_read_unlock();
514 return found; 573 return found;
515 } 574 }
@@ -542,6 +601,15 @@ static u64 div_factor(u64 num, int factor)
542 return num; 601 return num;
543} 602}
544 603
604static u64 div_factor_fine(u64 num, int factor)
605{
606 if (factor == 100)
607 return num;
608 num *= factor;
609 do_div(num, 100);
610 return num;
611}
612
545u64 btrfs_find_block_group(struct btrfs_root *root, 613u64 btrfs_find_block_group(struct btrfs_root *root,
546 u64 search_start, u64 search_hint, int owner) 614 u64 search_start, u64 search_hint, int owner)
547{ 615{
@@ -689,8 +757,12 @@ again:
689 atomic_inc(&head->node.refs); 757 atomic_inc(&head->node.refs);
690 spin_unlock(&delayed_refs->lock); 758 spin_unlock(&delayed_refs->lock);
691 759
692 btrfs_release_path(root->fs_info->extent_root, path); 760 btrfs_release_path(path);
693 761
762 /*
763 * Mutex was contended, block until it's released and try
764 * again
765 */
694 mutex_lock(&head->mutex); 766 mutex_lock(&head->mutex);
695 mutex_unlock(&head->mutex); 767 mutex_unlock(&head->mutex);
696 btrfs_put_delayed_ref(&head->node); 768 btrfs_put_delayed_ref(&head->node);
@@ -869,7 +941,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
869 break; 941 break;
870 } 942 }
871 } 943 }
872 btrfs_release_path(root, path); 944 btrfs_release_path(path);
873 945
874 if (owner < BTRFS_FIRST_FREE_OBJECTID) 946 if (owner < BTRFS_FIRST_FREE_OBJECTID)
875 new_size += sizeof(*bi); 947 new_size += sizeof(*bi);
@@ -882,7 +954,6 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
882 BUG_ON(ret); 954 BUG_ON(ret);
883 955
884 ret = btrfs_extend_item(trans, root, path, new_size); 956 ret = btrfs_extend_item(trans, root, path, new_size);
885 BUG_ON(ret);
886 957
887 leaf = path->nodes[0]; 958 leaf = path->nodes[0];
888 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 959 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -977,7 +1048,7 @@ again:
977 return 0; 1048 return 0;
978#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1049#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
979 key.type = BTRFS_EXTENT_REF_V0_KEY; 1050 key.type = BTRFS_EXTENT_REF_V0_KEY;
980 btrfs_release_path(root, path); 1051 btrfs_release_path(path);
981 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1052 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
982 if (ret < 0) { 1053 if (ret < 0) {
983 err = ret; 1054 err = ret;
@@ -1015,7 +1086,7 @@ again:
1015 if (match_extent_data_ref(leaf, ref, root_objectid, 1086 if (match_extent_data_ref(leaf, ref, root_objectid,
1016 owner, offset)) { 1087 owner, offset)) {
1017 if (recow) { 1088 if (recow) {
1018 btrfs_release_path(root, path); 1089 btrfs_release_path(path);
1019 goto again; 1090 goto again;
1020 } 1091 }
1021 err = 0; 1092 err = 0;
@@ -1076,7 +1147,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1076 if (match_extent_data_ref(leaf, ref, root_objectid, 1147 if (match_extent_data_ref(leaf, ref, root_objectid,
1077 owner, offset)) 1148 owner, offset))
1078 break; 1149 break;
1079 btrfs_release_path(root, path); 1150 btrfs_release_path(path);
1080 key.offset++; 1151 key.offset++;
1081 ret = btrfs_insert_empty_item(trans, root, path, &key, 1152 ret = btrfs_insert_empty_item(trans, root, path, &key,
1082 size); 1153 size);
@@ -1102,7 +1173,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1102 btrfs_mark_buffer_dirty(leaf); 1173 btrfs_mark_buffer_dirty(leaf);
1103 ret = 0; 1174 ret = 0;
1104fail: 1175fail:
1105 btrfs_release_path(root, path); 1176 btrfs_release_path(path);
1106 return ret; 1177 return ret;
1107} 1178}
1108 1179
@@ -1228,7 +1299,7 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1228 ret = -ENOENT; 1299 ret = -ENOENT;
1229#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1300#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1230 if (ret == -ENOENT && parent) { 1301 if (ret == -ENOENT && parent) {
1231 btrfs_release_path(root, path); 1302 btrfs_release_path(path);
1232 key.type = BTRFS_EXTENT_REF_V0_KEY; 1303 key.type = BTRFS_EXTENT_REF_V0_KEY;
1233 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1304 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1234 if (ret > 0) 1305 if (ret > 0)
@@ -1257,7 +1328,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1257 } 1328 }
1258 1329
1259 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1330 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1260 btrfs_release_path(root, path); 1331 btrfs_release_path(path);
1261 return ret; 1332 return ret;
1262} 1333}
1263 1334
@@ -1490,7 +1561,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1490 size = btrfs_extent_inline_ref_size(type); 1561 size = btrfs_extent_inline_ref_size(type);
1491 1562
1492 ret = btrfs_extend_item(trans, root, path, size); 1563 ret = btrfs_extend_item(trans, root, path, size);
1493 BUG_ON(ret);
1494 1564
1495 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1565 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1496 refs = btrfs_extent_refs(leaf, ei); 1566 refs = btrfs_extent_refs(leaf, ei);
@@ -1543,7 +1613,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1543 if (ret != -ENOENT) 1613 if (ret != -ENOENT)
1544 return ret; 1614 return ret;
1545 1615
1546 btrfs_release_path(root, path); 1616 btrfs_release_path(path);
1547 *ref_ret = NULL; 1617 *ref_ret = NULL;
1548 1618
1549 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1619 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -1619,7 +1689,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1619 end - ptr - size); 1689 end - ptr - size);
1620 item_size -= size; 1690 item_size -= size;
1621 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1691 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1622 BUG_ON(ret);
1623 } 1692 }
1624 btrfs_mark_buffer_dirty(leaf); 1693 btrfs_mark_buffer_dirty(leaf);
1625 return 0; 1694 return 0;
@@ -1692,40 +1761,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1692 return ret; 1761 return ret;
1693} 1762}
1694 1763
1695static void btrfs_issue_discard(struct block_device *bdev, 1764static int btrfs_issue_discard(struct block_device *bdev,
1696 u64 start, u64 len) 1765 u64 start, u64 len)
1697{ 1766{
1698 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1767 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1699 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
1700} 1768}
1701 1769
1702static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1770static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1703 u64 num_bytes) 1771 u64 num_bytes, u64 *actual_bytes)
1704{ 1772{
1705 int ret; 1773 int ret;
1706 u64 map_length = num_bytes; 1774 u64 discarded_bytes = 0;
1707 struct btrfs_multi_bio *multi = NULL; 1775 struct btrfs_multi_bio *multi = NULL;
1708 1776
1709 if (!btrfs_test_opt(root, DISCARD))
1710 return 0;
1711 1777
1712 /* Tell the block device(s) that the sectors can be discarded */ 1778 /* Tell the block device(s) that the sectors can be discarded */
1713 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1779 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1714 bytenr, &map_length, &multi, 0); 1780 bytenr, &num_bytes, &multi, 0);
1715 if (!ret) { 1781 if (!ret) {
1716 struct btrfs_bio_stripe *stripe = multi->stripes; 1782 struct btrfs_bio_stripe *stripe = multi->stripes;
1717 int i; 1783 int i;
1718 1784
1719 if (map_length > num_bytes)
1720 map_length = num_bytes;
1721 1785
1722 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1786 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1723 btrfs_issue_discard(stripe->dev->bdev, 1787 ret = btrfs_issue_discard(stripe->dev->bdev,
1724 stripe->physical, 1788 stripe->physical,
1725 map_length); 1789 stripe->length);
1790 if (!ret)
1791 discarded_bytes += stripe->length;
1792 else if (ret != -EOPNOTSUPP)
1793 break;
1726 } 1794 }
1727 kfree(multi); 1795 kfree(multi);
1728 } 1796 }
1797 if (discarded_bytes && ret == -EOPNOTSUPP)
1798 ret = 0;
1799
1800 if (actual_bytes)
1801 *actual_bytes = discarded_bytes;
1802
1729 1803
1730 return ret; 1804 return ret;
1731} 1805}
@@ -1792,7 +1866,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1792 __run_delayed_extent_op(extent_op, leaf, item); 1866 __run_delayed_extent_op(extent_op, leaf, item);
1793 1867
1794 btrfs_mark_buffer_dirty(leaf); 1868 btrfs_mark_buffer_dirty(leaf);
1795 btrfs_release_path(root->fs_info->extent_root, path); 1869 btrfs_release_path(path);
1796 1870
1797 path->reada = 1; 1871 path->reada = 1;
1798 path->leave_spinning = 1; 1872 path->leave_spinning = 1;
@@ -2227,6 +2301,10 @@ again:
2227 atomic_inc(&ref->refs); 2301 atomic_inc(&ref->refs);
2228 2302
2229 spin_unlock(&delayed_refs->lock); 2303 spin_unlock(&delayed_refs->lock);
2304 /*
2305 * Mutex was contended, block until it's
2306 * released and try again
2307 */
2230 mutex_lock(&head->mutex); 2308 mutex_lock(&head->mutex);
2231 mutex_unlock(&head->mutex); 2309 mutex_unlock(&head->mutex);
2232 2310
@@ -2291,8 +2369,12 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2291 atomic_inc(&head->node.refs); 2369 atomic_inc(&head->node.refs);
2292 spin_unlock(&delayed_refs->lock); 2370 spin_unlock(&delayed_refs->lock);
2293 2371
2294 btrfs_release_path(root->fs_info->extent_root, path); 2372 btrfs_release_path(path);
2295 2373
2374 /*
2375 * Mutex was contended, block until it's released and let
2376 * caller try again
2377 */
2296 mutex_lock(&head->mutex); 2378 mutex_lock(&head->mutex);
2297 mutex_unlock(&head->mutex); 2379 mutex_unlock(&head->mutex);
2298 btrfs_put_delayed_ref(&head->node); 2380 btrfs_put_delayed_ref(&head->node);
@@ -2440,126 +2522,6 @@ out:
2440 return ret; 2522 return ret;
2441} 2523}
2442 2524
2443#if 0
2444int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2445 struct extent_buffer *buf, u32 nr_extents)
2446{
2447 struct btrfs_key key;
2448 struct btrfs_file_extent_item *fi;
2449 u64 root_gen;
2450 u32 nritems;
2451 int i;
2452 int level;
2453 int ret = 0;
2454 int shared = 0;
2455
2456 if (!root->ref_cows)
2457 return 0;
2458
2459 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2460 shared = 0;
2461 root_gen = root->root_key.offset;
2462 } else {
2463 shared = 1;
2464 root_gen = trans->transid - 1;
2465 }
2466
2467 level = btrfs_header_level(buf);
2468 nritems = btrfs_header_nritems(buf);
2469
2470 if (level == 0) {
2471 struct btrfs_leaf_ref *ref;
2472 struct btrfs_extent_info *info;
2473
2474 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2475 if (!ref) {
2476 ret = -ENOMEM;
2477 goto out;
2478 }
2479
2480 ref->root_gen = root_gen;
2481 ref->bytenr = buf->start;
2482 ref->owner = btrfs_header_owner(buf);
2483 ref->generation = btrfs_header_generation(buf);
2484 ref->nritems = nr_extents;
2485 info = ref->extents;
2486
2487 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2488 u64 disk_bytenr;
2489 btrfs_item_key_to_cpu(buf, &key, i);
2490 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2491 continue;
2492 fi = btrfs_item_ptr(buf, i,
2493 struct btrfs_file_extent_item);
2494 if (btrfs_file_extent_type(buf, fi) ==
2495 BTRFS_FILE_EXTENT_INLINE)
2496 continue;
2497 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2498 if (disk_bytenr == 0)
2499 continue;
2500
2501 info->bytenr = disk_bytenr;
2502 info->num_bytes =
2503 btrfs_file_extent_disk_num_bytes(buf, fi);
2504 info->objectid = key.objectid;
2505 info->offset = key.offset;
2506 info++;
2507 }
2508
2509 ret = btrfs_add_leaf_ref(root, ref, shared);
2510 if (ret == -EEXIST && shared) {
2511 struct btrfs_leaf_ref *old;
2512 old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2513 BUG_ON(!old);
2514 btrfs_remove_leaf_ref(root, old);
2515 btrfs_free_leaf_ref(root, old);
2516 ret = btrfs_add_leaf_ref(root, ref, shared);
2517 }
2518 WARN_ON(ret);
2519 btrfs_free_leaf_ref(root, ref);
2520 }
2521out:
2522 return ret;
2523}
2524
2525/* when a block goes through cow, we update the reference counts of
2526 * everything that block points to. The internal pointers of the block
2527 * can be in just about any order, and it is likely to have clusters of
2528 * things that are close together and clusters of things that are not.
2529 *
2530 * To help reduce the seeks that come with updating all of these reference
2531 * counts, sort them by byte number before actual updates are done.
2532 *
2533 * struct refsort is used to match byte number to slot in the btree block.
2534 * we sort based on the byte number and then use the slot to actually
2535 * find the item.
2536 *
2537 * struct refsort is smaller than strcut btrfs_item and smaller than
2538 * struct btrfs_key_ptr. Since we're currently limited to the page size
2539 * for a btree block, there's no way for a kmalloc of refsorts for a
2540 * single node to be bigger than a page.
2541 */
2542struct refsort {
2543 u64 bytenr;
2544 u32 slot;
2545};
2546
2547/*
2548 * for passing into sort()
2549 */
2550static int refsort_cmp(const void *a_void, const void *b_void)
2551{
2552 const struct refsort *a = a_void;
2553 const struct refsort *b = b_void;
2554
2555 if (a->bytenr < b->bytenr)
2556 return -1;
2557 if (a->bytenr > b->bytenr)
2558 return 1;
2559 return 0;
2560}
2561#endif
2562
2563static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2525static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2564 struct btrfs_root *root, 2526 struct btrfs_root *root,
2565 struct extent_buffer *buf, 2527 struct extent_buffer *buf,
@@ -2662,7 +2624,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2662 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2624 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2663 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2625 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2664 btrfs_mark_buffer_dirty(leaf); 2626 btrfs_mark_buffer_dirty(leaf);
2665 btrfs_release_path(extent_root, path); 2627 btrfs_release_path(path);
2666fail: 2628fail:
2667 if (ret) 2629 if (ret)
2668 return ret; 2630 return ret;
@@ -2688,6 +2650,111 @@ next_block_group(struct btrfs_root *root,
2688 return cache; 2650 return cache;
2689} 2651}
2690 2652
2653static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2654 struct btrfs_trans_handle *trans,
2655 struct btrfs_path *path)
2656{
2657 struct btrfs_root *root = block_group->fs_info->tree_root;
2658 struct inode *inode = NULL;
2659 u64 alloc_hint = 0;
2660 int dcs = BTRFS_DC_ERROR;
2661 int num_pages = 0;
2662 int retries = 0;
2663 int ret = 0;
2664
2665 /*
2666 * If this block group is smaller than 100 megs don't bother caching the
2667 * block group.
2668 */
2669 if (block_group->key.offset < (100 * 1024 * 1024)) {
2670 spin_lock(&block_group->lock);
2671 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2672 spin_unlock(&block_group->lock);
2673 return 0;
2674 }
2675
2676again:
2677 inode = lookup_free_space_inode(root, block_group, path);
2678 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2679 ret = PTR_ERR(inode);
2680 btrfs_release_path(path);
2681 goto out;
2682 }
2683
2684 if (IS_ERR(inode)) {
2685 BUG_ON(retries);
2686 retries++;
2687
2688 if (block_group->ro)
2689 goto out_free;
2690
2691 ret = create_free_space_inode(root, trans, block_group, path);
2692 if (ret)
2693 goto out_free;
2694 goto again;
2695 }
2696
2697 /*
2698 * We want to set the generation to 0, that way if anything goes wrong
2699 * from here on out we know not to trust this cache when we load up next
2700 * time.
2701 */
2702 BTRFS_I(inode)->generation = 0;
2703 ret = btrfs_update_inode(trans, root, inode);
2704 WARN_ON(ret);
2705
2706 if (i_size_read(inode) > 0) {
2707 ret = btrfs_truncate_free_space_cache(root, trans, path,
2708 inode);
2709 if (ret)
2710 goto out_put;
2711 }
2712
2713 spin_lock(&block_group->lock);
2714 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2715 /* We're not cached, don't bother trying to write stuff out */
2716 dcs = BTRFS_DC_WRITTEN;
2717 spin_unlock(&block_group->lock);
2718 goto out_put;
2719 }
2720 spin_unlock(&block_group->lock);
2721
2722 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2723 if (!num_pages)
2724 num_pages = 1;
2725
2726 /*
2727 * Just to make absolutely sure we have enough space, we're going to
2728 * preallocate 12 pages worth of space for each block group. In
2729 * practice we ought to use at most 8, but we need extra space so we can
2730 * add our header and have a terminator between the extents and the
2731 * bitmaps.
2732 */
2733 num_pages *= 16;
2734 num_pages *= PAGE_CACHE_SIZE;
2735
2736 ret = btrfs_check_data_free_space(inode, num_pages);
2737 if (ret)
2738 goto out_put;
2739
2740 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2741 num_pages, num_pages,
2742 &alloc_hint);
2743 if (!ret)
2744 dcs = BTRFS_DC_SETUP;
2745 btrfs_free_reserved_data_space(inode, num_pages);
2746out_put:
2747 iput(inode);
2748out_free:
2749 btrfs_release_path(path);
2750out:
2751 spin_lock(&block_group->lock);
2752 block_group->disk_cache_state = dcs;
2753 spin_unlock(&block_group->lock);
2754
2755 return ret;
2756}
2757
2691int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2758int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2692 struct btrfs_root *root) 2759 struct btrfs_root *root)
2693{ 2760{
@@ -2700,6 +2767,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2700 if (!path) 2767 if (!path)
2701 return -ENOMEM; 2768 return -ENOMEM;
2702 2769
2770again:
2771 while (1) {
2772 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2773 while (cache) {
2774 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2775 break;
2776 cache = next_block_group(root, cache);
2777 }
2778 if (!cache) {
2779 if (last == 0)
2780 break;
2781 last = 0;
2782 continue;
2783 }
2784 err = cache_save_setup(cache, trans, path);
2785 last = cache->key.objectid + cache->key.offset;
2786 btrfs_put_block_group(cache);
2787 }
2788
2703 while (1) { 2789 while (1) {
2704 if (last == 0) { 2790 if (last == 0) {
2705 err = btrfs_run_delayed_refs(trans, root, 2791 err = btrfs_run_delayed_refs(trans, root,
@@ -2709,6 +2795,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2709 2795
2710 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2796 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2711 while (cache) { 2797 while (cache) {
2798 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2799 btrfs_put_block_group(cache);
2800 goto again;
2801 }
2802
2712 if (cache->dirty) 2803 if (cache->dirty)
2713 break; 2804 break;
2714 cache = next_block_group(root, cache); 2805 cache = next_block_group(root, cache);
@@ -2720,6 +2811,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2720 continue; 2811 continue;
2721 } 2812 }
2722 2813
2814 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2815 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2723 cache->dirty = 0; 2816 cache->dirty = 0;
2724 last = cache->key.objectid + cache->key.offset; 2817 last = cache->key.objectid + cache->key.offset;
2725 2818
@@ -2728,6 +2821,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2728 btrfs_put_block_group(cache); 2821 btrfs_put_block_group(cache);
2729 } 2822 }
2730 2823
2824 while (1) {
2825 /*
2826 * I don't think this is needed since we're just marking our
2827 * preallocated extent as written, but just in case it can't
2828 * hurt.
2829 */
2830 if (last == 0) {
2831 err = btrfs_run_delayed_refs(trans, root,
2832 (unsigned long)-1);
2833 BUG_ON(err);
2834 }
2835
2836 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2837 while (cache) {
2838 /*
2839 * Really this shouldn't happen, but it could if we
2840 * couldn't write the entire preallocated extent and
2841 * splitting the extent resulted in a new block.
2842 */
2843 if (cache->dirty) {
2844 btrfs_put_block_group(cache);
2845 goto again;
2846 }
2847 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2848 break;
2849 cache = next_block_group(root, cache);
2850 }
2851 if (!cache) {
2852 if (last == 0)
2853 break;
2854 last = 0;
2855 continue;
2856 }
2857
2858 btrfs_write_out_cache(root, trans, cache, path);
2859
2860 /*
2861 * If we didn't have an error then the cache state is still
2862 * NEED_WRITE, so we can set it to WRITTEN.
2863 */
2864 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2865 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2866 last = cache->key.objectid + cache->key.offset;
2867 btrfs_put_block_group(cache);
2868 }
2869
2731 btrfs_free_path(path); 2870 btrfs_free_path(path);
2732 return 0; 2871 return 0;
2733} 2872}
@@ -2763,6 +2902,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2763 if (found) { 2902 if (found) {
2764 spin_lock(&found->lock); 2903 spin_lock(&found->lock);
2765 found->total_bytes += total_bytes; 2904 found->total_bytes += total_bytes;
2905 found->disk_total += total_bytes * factor;
2766 found->bytes_used += bytes_used; 2906 found->bytes_used += bytes_used;
2767 found->disk_used += bytes_used * factor; 2907 found->disk_used += bytes_used * factor;
2768 found->full = 0; 2908 found->full = 0;
@@ -2782,6 +2922,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2782 BTRFS_BLOCK_GROUP_SYSTEM | 2922 BTRFS_BLOCK_GROUP_SYSTEM |
2783 BTRFS_BLOCK_GROUP_METADATA); 2923 BTRFS_BLOCK_GROUP_METADATA);
2784 found->total_bytes = total_bytes; 2924 found->total_bytes = total_bytes;
2925 found->disk_total = total_bytes * factor;
2785 found->bytes_used = bytes_used; 2926 found->bytes_used = bytes_used;
2786 found->disk_used = bytes_used * factor; 2927 found->disk_used = bytes_used * factor;
2787 found->bytes_pinned = 0; 2928 found->bytes_pinned = 0;
@@ -2789,7 +2930,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2789 found->bytes_readonly = 0; 2930 found->bytes_readonly = 0;
2790 found->bytes_may_use = 0; 2931 found->bytes_may_use = 0;
2791 found->full = 0; 2932 found->full = 0;
2792 found->force_alloc = 0; 2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
2934 found->chunk_alloc = 0;
2793 *space_info = found; 2935 *space_info = found;
2794 list_add_rcu(&found->list, &info->space_info); 2936 list_add_rcu(&found->list, &info->space_info);
2795 atomic_set(&found->caching_threads, 0); 2937 atomic_set(&found->caching_threads, 0);
@@ -2814,7 +2956,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2814 2956
2815u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2957u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2816{ 2958{
2817 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2959 /*
2960 * we add in the count of missing devices because we want
2961 * to make sure that any RAID levels on a degraded FS
2962 * continue to be honored.
2963 */
2964 u64 num_devices = root->fs_info->fs_devices->rw_devices +
2965 root->fs_info->fs_devices->missing_devices;
2818 2966
2819 if (num_devices == 1) 2967 if (num_devices == 1)
2820 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2968 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -2854,7 +3002,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
2854 return btrfs_reduce_alloc_profile(root, flags); 3002 return btrfs_reduce_alloc_profile(root, flags);
2855} 3003}
2856 3004
2857static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3005u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
2858{ 3006{
2859 u64 flags; 3007 u64 flags;
2860 3008
@@ -2883,11 +3031,17 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2883 struct btrfs_space_info *data_sinfo; 3031 struct btrfs_space_info *data_sinfo;
2884 struct btrfs_root *root = BTRFS_I(inode)->root; 3032 struct btrfs_root *root = BTRFS_I(inode)->root;
2885 u64 used; 3033 u64 used;
2886 int ret = 0, committed = 0; 3034 int ret = 0, committed = 0, alloc_chunk = 1;
2887 3035
2888 /* make sure bytes are sectorsize aligned */ 3036 /* make sure bytes are sectorsize aligned */
2889 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3037 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2890 3038
3039 if (root == root->fs_info->tree_root ||
3040 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3041 alloc_chunk = 0;
3042 committed = 1;
3043 }
3044
2891 data_sinfo = BTRFS_I(inode)->space_info; 3045 data_sinfo = BTRFS_I(inode)->space_info;
2892 if (!data_sinfo) 3046 if (!data_sinfo)
2893 goto alloc; 3047 goto alloc;
@@ -2906,23 +3060,28 @@ again:
2906 * if we don't have enough free bytes in this space then we need 3060 * if we don't have enough free bytes in this space then we need
2907 * to alloc a new chunk. 3061 * to alloc a new chunk.
2908 */ 3062 */
2909 if (!data_sinfo->full) { 3063 if (!data_sinfo->full && alloc_chunk) {
2910 u64 alloc_target; 3064 u64 alloc_target;
2911 3065
2912 data_sinfo->force_alloc = 1; 3066 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
2913 spin_unlock(&data_sinfo->lock); 3067 spin_unlock(&data_sinfo->lock);
2914alloc: 3068alloc:
2915 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 alloc_target = btrfs_get_alloc_profile(root, 1);
2916 trans = btrfs_join_transaction(root, 1); 3070 trans = btrfs_join_transaction(root);
2917 if (IS_ERR(trans)) 3071 if (IS_ERR(trans))
2918 return PTR_ERR(trans); 3072 return PTR_ERR(trans);
2919 3073
2920 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2921 bytes + 2 * 1024 * 1024, 3075 bytes + 2 * 1024 * 1024,
2922 alloc_target, 0); 3076 alloc_target,
3077 CHUNK_ALLOC_NO_FORCE);
2923 btrfs_end_transaction(trans, root); 3078 btrfs_end_transaction(trans, root);
2924 if (ret < 0) 3079 if (ret < 0) {
2925 return ret; 3080 if (ret != -ENOSPC)
3081 return ret;
3082 else
3083 goto commit_trans;
3084 }
2926 3085
2927 if (!data_sinfo) { 3086 if (!data_sinfo) {
2928 btrfs_set_inode_space_info(root, inode); 3087 btrfs_set_inode_space_info(root, inode);
@@ -2930,12 +3089,21 @@ alloc:
2930 } 3089 }
2931 goto again; 3090 goto again;
2932 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
2933 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
2934 3100
2935 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
2936 if (!committed && !root->fs_info->open_ioctl_trans) { 3102commit_trans:
3103 if (!committed &&
3104 !atomic_read(&root->fs_info->open_ioctl_trans)) {
2937 committed = 1; 3105 committed = 1;
2938 trans = btrfs_join_transaction(root, 1); 3106 trans = btrfs_join_transaction(root);
2939 if (IS_ERR(trans)) 3107 if (IS_ERR(trans))
2940 return PTR_ERR(trans); 3108 return PTR_ERR(trans);
2941 ret = btrfs_commit_transaction(trans, root); 3109 ret = btrfs_commit_transaction(trans, root);
@@ -2944,18 +3112,6 @@ alloc:
2944 goto again; 3112 goto again;
2945 } 3113 }
2946 3114
2947#if 0 /* I hope we never need this code again, just in case */
2948 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
2949 "%llu bytes_reserved, " "%llu bytes_pinned, "
2950 "%llu bytes_readonly, %llu may use %llu total\n",
2951 (unsigned long long)bytes,
2952 (unsigned long long)data_sinfo->bytes_used,
2953 (unsigned long long)data_sinfo->bytes_reserved,
2954 (unsigned long long)data_sinfo->bytes_pinned,
2955 (unsigned long long)data_sinfo->bytes_readonly,
2956 (unsigned long long)data_sinfo->bytes_may_use,
2957 (unsigned long long)data_sinfo->total_bytes);
2958#endif
2959 return -ENOSPC; 3115 return -ENOSPC;
2960 } 3116 }
2961 data_sinfo->bytes_may_use += bytes; 3117 data_sinfo->bytes_may_use += bytes;
@@ -2993,24 +3149,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
2993 rcu_read_lock(); 3149 rcu_read_lock();
2994 list_for_each_entry_rcu(found, head, list) { 3150 list_for_each_entry_rcu(found, head, list) {
2995 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3151 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
2996 found->force_alloc = 1; 3152 found->force_alloc = CHUNK_ALLOC_FORCE;
2997 } 3153 }
2998 rcu_read_unlock(); 3154 rcu_read_unlock();
2999} 3155}
3000 3156
3001static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3157static int should_alloc_chunk(struct btrfs_root *root,
3002 u64 alloc_bytes) 3158 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3159 int force)
3003{ 3160{
3004 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3161 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3162 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3163 u64 thresh;
3164
3165 if (force == CHUNK_ALLOC_FORCE)
3166 return 1;
3005 3167
3006 if (sinfo->bytes_used + sinfo->bytes_reserved + 3168 /*
3007 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3169 * in limited mode, we want to have some free space up to
3170 * about 1% of the FS size.
3171 */
3172 if (force == CHUNK_ALLOC_LIMITED) {
3173 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3174 thresh = max_t(u64, 64 * 1024 * 1024,
3175 div_factor_fine(thresh, 1));
3176
3177 if (num_bytes - num_allocated < thresh)
3178 return 1;
3179 }
3180
3181 /*
3182 * we have two similar checks here, one based on percentage
3183 * and once based on a hard number of 256MB. The idea
3184 * is that if we have a good amount of free
3185 * room, don't allocate a chunk. A good mount is
3186 * less than 80% utilized of the chunks we have allocated,
3187 * or more than 256MB free
3188 */
3189 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3008 return 0; 3190 return 0;
3009 3191
3010 if (sinfo->bytes_used + sinfo->bytes_reserved + 3192 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3011 alloc_bytes < div_factor(num_bytes, 8))
3012 return 0; 3193 return 0;
3013 3194
3195 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3196
3197 /* 256MB or 5% of the FS */
3198 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3199
3200 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3201 return 0;
3014 return 1; 3202 return 1;
3015} 3203}
3016 3204
@@ -3020,10 +3208,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3020{ 3208{
3021 struct btrfs_space_info *space_info; 3209 struct btrfs_space_info *space_info;
3022 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3210 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3211 int wait_for_alloc = 0;
3023 int ret = 0; 3212 int ret = 0;
3024 3213
3025 mutex_lock(&fs_info->chunk_mutex);
3026
3027 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3214 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3028 3215
3029 space_info = __find_space_info(extent_root->fs_info, flags); 3216 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3034,20 +3221,47 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3034 } 3221 }
3035 BUG_ON(!space_info); 3222 BUG_ON(!space_info);
3036 3223
3224again:
3037 spin_lock(&space_info->lock); 3225 spin_lock(&space_info->lock);
3038 if (space_info->force_alloc) 3226 if (space_info->force_alloc)
3039 force = 1; 3227 force = space_info->force_alloc;
3040 if (space_info->full) { 3228 if (space_info->full) {
3041 spin_unlock(&space_info->lock); 3229 spin_unlock(&space_info->lock);
3042 goto out; 3230 return 0;
3043 } 3231 }
3044 3232
3045 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3233 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3046 spin_unlock(&space_info->lock); 3234 spin_unlock(&space_info->lock);
3047 goto out; 3235 return 0;
3236 } else if (space_info->chunk_alloc) {
3237 wait_for_alloc = 1;
3238 } else {
3239 space_info->chunk_alloc = 1;
3048 } 3240 }
3241
3049 spin_unlock(&space_info->lock); 3242 spin_unlock(&space_info->lock);
3050 3243
3244 mutex_lock(&fs_info->chunk_mutex);
3245
3246 /*
3247 * The chunk_mutex is held throughout the entirety of a chunk
3248 * allocation, so once we've acquired the chunk_mutex we know that the
3249 * other guy is done and we need to recheck and see if we should
3250 * allocate.
3251 */
3252 if (wait_for_alloc) {
3253 mutex_unlock(&fs_info->chunk_mutex);
3254 wait_for_alloc = 0;
3255 goto again;
3256 }
3257
3258 /*
3259 * If we have mixed data/metadata chunks we want to make sure we keep
3260 * allocating mixed chunks instead of individual chunks.
3261 */
3262 if (btrfs_mixed_space_info(space_info))
3263 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3264
3051 /* 3265 /*
3052 * if we're doing a data chunk, go ahead and make sure that 3266 * if we're doing a data chunk, go ahead and make sure that
3053 * we keep a reasonable number of metadata chunks allocated in the 3267 * we keep a reasonable number of metadata chunks allocated in the
@@ -3066,167 +3280,220 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3066 space_info->full = 1; 3280 space_info->full = 1;
3067 else 3281 else
3068 ret = 1; 3282 ret = 1;
3069 space_info->force_alloc = 0; 3283
3284 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3285 space_info->chunk_alloc = 0;
3070 spin_unlock(&space_info->lock); 3286 spin_unlock(&space_info->lock);
3071out:
3072 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3287 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3073 return ret; 3288 return ret;
3074} 3289}
3075 3290
3076static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3077 struct btrfs_root *root,
3078 struct btrfs_space_info *sinfo, u64 num_bytes)
3079{
3080 int ret;
3081 int end_trans = 0;
3082
3083 if (sinfo->full)
3084 return 0;
3085
3086 spin_lock(&sinfo->lock);
3087 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3088 spin_unlock(&sinfo->lock);
3089 if (!ret)
3090 return 0;
3091
3092 if (!trans) {
3093 trans = btrfs_join_transaction(root, 1);
3094 BUG_ON(IS_ERR(trans));
3095 end_trans = 1;
3096 }
3097
3098 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3099 num_bytes + 2 * 1024 * 1024,
3100 get_alloc_profile(root, sinfo->flags), 0);
3101
3102 if (end_trans)
3103 btrfs_end_transaction(trans, root);
3104
3105 return ret == 1 ? 1 : 0;
3106}
3107
3108/* 3291/*
3109 * shrink metadata reservation for delalloc 3292 * shrink metadata reservation for delalloc
3110 */ 3293 */
3111static int shrink_delalloc(struct btrfs_trans_handle *trans, 3294static int shrink_delalloc(struct btrfs_trans_handle *trans,
3112 struct btrfs_root *root, u64 to_reclaim) 3295 struct btrfs_root *root, u64 to_reclaim, int sync)
3113{ 3296{
3114 struct btrfs_block_rsv *block_rsv; 3297 struct btrfs_block_rsv *block_rsv;
3298 struct btrfs_space_info *space_info;
3115 u64 reserved; 3299 u64 reserved;
3116 u64 max_reclaim; 3300 u64 max_reclaim;
3117 u64 reclaimed = 0; 3301 u64 reclaimed = 0;
3118 int pause = 1; 3302 long time_left;
3119 int ret; 3303 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3304 int loops = 0;
3305 unsigned long progress;
3120 3306
3121 block_rsv = &root->fs_info->delalloc_block_rsv; 3307 block_rsv = &root->fs_info->delalloc_block_rsv;
3122 spin_lock(&block_rsv->lock); 3308 space_info = block_rsv->space_info;
3123 reserved = block_rsv->reserved; 3309
3124 spin_unlock(&block_rsv->lock); 3310 smp_mb();
3311 reserved = space_info->bytes_reserved;
3312 progress = space_info->reservation_progress;
3125 3313
3126 if (reserved == 0) 3314 if (reserved == 0)
3127 return 0; 3315 return 0;
3128 3316
3129 max_reclaim = min(reserved, to_reclaim); 3317 max_reclaim = min(reserved, to_reclaim);
3130 3318
3131 while (1) { 3319 while (loops < 1024) {
3132 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3320 /* have the flusher threads jump in and do some IO */
3133 if (!ret) { 3321 smp_mb();
3134 __set_current_state(TASK_INTERRUPTIBLE); 3322 nr_pages = min_t(unsigned long, nr_pages,
3135 schedule_timeout(pause); 3323 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3136 pause <<= 1; 3324 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3137 if (pause > HZ / 10)
3138 pause = HZ / 10;
3139 } else {
3140 pause = 1;
3141 }
3142 3325
3143 spin_lock(&block_rsv->lock); 3326 spin_lock(&space_info->lock);
3144 if (reserved > block_rsv->reserved) 3327 if (reserved > space_info->bytes_reserved)
3145 reclaimed = reserved - block_rsv->reserved; 3328 reclaimed += reserved - space_info->bytes_reserved;
3146 reserved = block_rsv->reserved; 3329 reserved = space_info->bytes_reserved;
3147 spin_unlock(&block_rsv->lock); 3330 spin_unlock(&space_info->lock);
3331
3332 loops++;
3148 3333
3149 if (reserved == 0 || reclaimed >= max_reclaim) 3334 if (reserved == 0 || reclaimed >= max_reclaim)
3150 break; 3335 break;
3151 3336
3152 if (trans && trans->transaction->blocked) 3337 if (trans && trans->transaction->blocked)
3153 return -EAGAIN; 3338 return -EAGAIN;
3339
3340 time_left = schedule_timeout_interruptible(1);
3341
3342 /* We were interrupted, exit */
3343 if (time_left)
3344 break;
3345
3346 /* we've kicked the IO a few times, if anything has been freed,
3347 * exit. There is no sense in looping here for a long time
3348 * when we really need to commit the transaction, or there are
3349 * just too many writers without enough free space
3350 */
3351
3352 if (loops > 3) {
3353 smp_mb();
3354 if (progress != space_info->reservation_progress)
3355 break;
3356 }
3357
3154 } 3358 }
3155 return reclaimed >= to_reclaim; 3359 return reclaimed >= to_reclaim;
3156} 3360}
3157 3361
3158static int should_retry_reserve(struct btrfs_trans_handle *trans, 3362/*
3159 struct btrfs_root *root, 3363 * Retries tells us how many times we've called reserve_metadata_bytes. The
3160 struct btrfs_block_rsv *block_rsv, 3364 * idea is if this is the first call (retries == 0) then we will add to our
3161 u64 num_bytes, int *retries) 3365 * reserved count if we can't make the allocation in order to hold our place
3366 * while we go and try and free up space. That way for retries > 1 we don't try
3367 * and add space, we just check to see if the amount of unused space is >= the
3368 * total space, meaning that our reservation is valid.
3369 *
3370 * However if we don't intend to retry this reservation, pass -1 as retries so
3371 * that it short circuits this logic.
3372 */
3373static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3374 struct btrfs_root *root,
3375 struct btrfs_block_rsv *block_rsv,
3376 u64 orig_bytes, int flush)
3162{ 3377{
3163 struct btrfs_space_info *space_info = block_rsv->space_info; 3378 struct btrfs_space_info *space_info = block_rsv->space_info;
3164 int ret; 3379 u64 unused;
3380 u64 num_bytes = orig_bytes;
3381 int retries = 0;
3382 int ret = 0;
3383 bool reserved = false;
3384 bool committed = false;
3165 3385
3166 if ((*retries) > 2) 3386again:
3167 return -ENOSPC; 3387 ret = -ENOSPC;
3388 if (reserved)
3389 num_bytes = 0;
3168 3390
3169 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3391 spin_lock(&space_info->lock);
3170 if (ret) 3392 unused = space_info->bytes_used + space_info->bytes_reserved +
3171 return 1; 3393 space_info->bytes_pinned + space_info->bytes_readonly +
3394 space_info->bytes_may_use;
3172 3395
3173 if (trans && trans->transaction->in_commit) 3396 /*
3174 return -ENOSPC; 3397 * The idea here is that we've not already over-reserved the block group
3398 * then we can go ahead and save our reservation first and then start
3399 * flushing if we need to. Otherwise if we've already overcommitted
3400 * lets start flushing stuff first and then come back and try to make
3401 * our reservation.
3402 */
3403 if (unused <= space_info->total_bytes) {
3404 unused = space_info->total_bytes - unused;
3405 if (unused >= num_bytes) {
3406 if (!reserved)
3407 space_info->bytes_reserved += orig_bytes;
3408 ret = 0;
3409 } else {
3410 /*
3411 * Ok set num_bytes to orig_bytes since we aren't
3412 * overocmmitted, this way we only try and reclaim what
3413 * we need.
3414 */
3415 num_bytes = orig_bytes;
3416 }
3417 } else {
3418 /*
3419 * Ok we're over committed, set num_bytes to the overcommitted
3420 * amount plus the amount of bytes that we need for this
3421 * reservation.
3422 */
3423 num_bytes = unused - space_info->total_bytes +
3424 (orig_bytes * (retries + 1));
3425 }
3175 3426
3176 ret = shrink_delalloc(trans, root, num_bytes); 3427 /*
3177 if (ret) 3428 * Couldn't make our reservation, save our place so while we're trying
3178 return ret; 3429 * to reclaim space we can actually use it instead of somebody else
3430 * stealing it from us.
3431 */
3432 if (ret && !reserved) {
3433 space_info->bytes_reserved += orig_bytes;
3434 reserved = true;
3435 }
3179 3436
3180 spin_lock(&space_info->lock);
3181 if (space_info->bytes_pinned < num_bytes)
3182 ret = 1;
3183 spin_unlock(&space_info->lock); 3437 spin_unlock(&space_info->lock);
3184 if (ret)
3185 return -ENOSPC;
3186 3438
3187 (*retries)++; 3439 if (!ret)
3188 3440 return 0;
3189 if (trans)
3190 return -EAGAIN;
3191 3441
3192 trans = btrfs_join_transaction(root, 1); 3442 if (!flush)
3193 BUG_ON(IS_ERR(trans)); 3443 goto out;
3194 ret = btrfs_commit_transaction(trans, root);
3195 BUG_ON(ret);
3196 3444
3197 return 1; 3445 /*
3198} 3446 * We do synchronous shrinking since we don't actually unreserve
3447 * metadata until after the IO is completed.
3448 */
3449 ret = shrink_delalloc(trans, root, num_bytes, 1);
3450 if (ret > 0)
3451 return 0;
3452 else if (ret < 0)
3453 goto out;
3199 3454
3200static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3455 /*
3201 u64 num_bytes) 3456 * So if we were overcommitted it's possible that somebody else flushed
3202{ 3457 * out enough space and we simply didn't have enough space to reclaim,
3203 struct btrfs_space_info *space_info = block_rsv->space_info; 3458 * so go back around and try again.
3204 u64 unused; 3459 */
3205 int ret = -ENOSPC; 3460 if (retries < 2) {
3461 retries++;
3462 goto again;
3463 }
3206 3464
3207 spin_lock(&space_info->lock); 3465 spin_lock(&space_info->lock);
3208 unused = space_info->bytes_used + space_info->bytes_reserved + 3466 /*
3209 space_info->bytes_pinned + space_info->bytes_readonly; 3467 * Not enough space to be reclaimed, don't bother committing the
3468 * transaction.
3469 */
3470 if (space_info->bytes_pinned < orig_bytes)
3471 ret = -ENOSPC;
3472 spin_unlock(&space_info->lock);
3473 if (ret)
3474 goto out;
3210 3475
3211 if (unused < space_info->total_bytes) 3476 ret = -EAGAIN;
3212 unused = space_info->total_bytes - unused; 3477 if (trans || committed)
3213 else 3478 goto out;
3214 unused = 0;
3215 3479
3216 if (unused >= num_bytes) { 3480 ret = -ENOSPC;
3217 if (block_rsv->priority >= 10) { 3481 trans = btrfs_join_transaction(root);
3218 space_info->bytes_reserved += num_bytes; 3482 if (IS_ERR(trans))
3219 ret = 0; 3483 goto out;
3220 } else { 3484 ret = btrfs_commit_transaction(trans, root);
3221 if ((unused + block_rsv->reserved) * 3485 if (!ret) {
3222 block_rsv->priority >= 3486 trans = NULL;
3223 (num_bytes + block_rsv->reserved) * 10) { 3487 committed = true;
3224 space_info->bytes_reserved += num_bytes; 3488 goto again;
3225 ret = 0; 3489 }
3226 } 3490
3227 } 3491out:
3492 if (reserved) {
3493 spin_lock(&space_info->lock);
3494 space_info->bytes_reserved -= orig_bytes;
3495 spin_unlock(&space_info->lock);
3228 } 3496 }
3229 spin_unlock(&space_info->lock);
3230 3497
3231 return ret; 3498 return ret;
3232} 3499}
@@ -3273,8 +3540,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3273 spin_unlock(&block_rsv->lock); 3540 spin_unlock(&block_rsv->lock);
3274} 3541}
3275 3542
3276void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3543static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3277 struct btrfs_block_rsv *dest, u64 num_bytes) 3544 struct btrfs_block_rsv *dest, u64 num_bytes)
3278{ 3545{
3279 struct btrfs_space_info *space_info = block_rsv->space_info; 3546 struct btrfs_space_info *space_info = block_rsv->space_info;
3280 3547
@@ -3293,10 +3560,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3293 3560
3294 if (num_bytes > 0) { 3561 if (num_bytes > 0) {
3295 if (dest) { 3562 if (dest) {
3296 block_rsv_add_bytes(dest, num_bytes, 0); 3563 spin_lock(&dest->lock);
3297 } else { 3564 if (!dest->full) {
3565 u64 bytes_to_add;
3566
3567 bytes_to_add = dest->size - dest->reserved;
3568 bytes_to_add = min(num_bytes, bytes_to_add);
3569 dest->reserved += bytes_to_add;
3570 if (dest->reserved >= dest->size)
3571 dest->full = 1;
3572 num_bytes -= bytes_to_add;
3573 }
3574 spin_unlock(&dest->lock);
3575 }
3576 if (num_bytes) {
3298 spin_lock(&space_info->lock); 3577 spin_lock(&space_info->lock);
3299 space_info->bytes_reserved -= num_bytes; 3578 space_info->bytes_reserved -= num_bytes;
3579 space_info->reservation_progress++;
3300 spin_unlock(&space_info->lock); 3580 spin_unlock(&space_info->lock);
3301 } 3581 }
3302 } 3582 }
@@ -3328,18 +3608,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3328{ 3608{
3329 struct btrfs_block_rsv *block_rsv; 3609 struct btrfs_block_rsv *block_rsv;
3330 struct btrfs_fs_info *fs_info = root->fs_info; 3610 struct btrfs_fs_info *fs_info = root->fs_info;
3331 u64 alloc_target;
3332 3611
3333 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3612 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3334 if (!block_rsv) 3613 if (!block_rsv)
3335 return NULL; 3614 return NULL;
3336 3615
3337 btrfs_init_block_rsv(block_rsv); 3616 btrfs_init_block_rsv(block_rsv);
3338
3339 alloc_target = btrfs_get_alloc_profile(root, 0);
3340 block_rsv->space_info = __find_space_info(fs_info, 3617 block_rsv->space_info = __find_space_info(fs_info,
3341 BTRFS_BLOCK_GROUP_METADATA); 3618 BTRFS_BLOCK_GROUP_METADATA);
3342
3343 return block_rsv; 3619 return block_rsv;
3344} 3620}
3345 3621
@@ -3370,23 +3646,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3370int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3646int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3371 struct btrfs_root *root, 3647 struct btrfs_root *root,
3372 struct btrfs_block_rsv *block_rsv, 3648 struct btrfs_block_rsv *block_rsv,
3373 u64 num_bytes, int *retries) 3649 u64 num_bytes)
3374{ 3650{
3375 int ret; 3651 int ret;
3376 3652
3377 if (num_bytes == 0) 3653 if (num_bytes == 0)
3378 return 0; 3654 return 0;
3379again: 3655
3380 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3656 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3381 if (!ret) { 3657 if (!ret) {
3382 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3658 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3383 return 0; 3659 return 0;
3384 } 3660 }
3385 3661
3386 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3387 if (ret > 0)
3388 goto again;
3389
3390 return ret; 3662 return ret;
3391} 3663}
3392 3664
@@ -3421,7 +3693,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3421 return 0; 3693 return 0;
3422 3694
3423 if (block_rsv->refill_used) { 3695 if (block_rsv->refill_used) {
3424 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3696 ret = reserve_metadata_bytes(trans, root, block_rsv,
3697 num_bytes, 0);
3425 if (!ret) { 3698 if (!ret) {
3426 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3699 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3427 return 0; 3700 return 0;
@@ -3432,17 +3705,12 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3432 if (trans) 3705 if (trans)
3433 return -EAGAIN; 3706 return -EAGAIN;
3434 3707
3435 trans = btrfs_join_transaction(root, 1); 3708 trans = btrfs_join_transaction(root);
3436 BUG_ON(IS_ERR(trans)); 3709 BUG_ON(IS_ERR(trans));
3437 ret = btrfs_commit_transaction(trans, root); 3710 ret = btrfs_commit_transaction(trans, root);
3438 return 0; 3711 return 0;
3439 } 3712 }
3440 3713
3441 WARN_ON(1);
3442 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
3443 block_rsv->size, block_rsv->reserved,
3444 block_rsv->freed[0], block_rsv->freed[1]);
3445
3446 return -ENOSPC; 3714 return -ENOSPC;
3447} 3715}
3448 3716
@@ -3476,23 +3744,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3476 u64 meta_used; 3744 u64 meta_used;
3477 u64 data_used; 3745 u64 data_used;
3478 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3746 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3479#if 0
3480 /*
3481 * per tree used space accounting can be inaccuracy, so we
3482 * can't rely on it.
3483 */
3484 spin_lock(&fs_info->extent_root->accounting_lock);
3485 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3486 spin_unlock(&fs_info->extent_root->accounting_lock);
3487
3488 spin_lock(&fs_info->csum_root->accounting_lock);
3489 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3490 spin_unlock(&fs_info->csum_root->accounting_lock);
3491 3747
3492 spin_lock(&fs_info->tree_root->accounting_lock);
3493 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3494 spin_unlock(&fs_info->tree_root->accounting_lock);
3495#endif
3496 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3748 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3497 spin_lock(&sinfo->lock); 3749 spin_lock(&sinfo->lock);
3498 data_used = sinfo->bytes_used; 3750 data_used = sinfo->bytes_used;
@@ -3500,6 +3752,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3500 3752
3501 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3753 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3502 spin_lock(&sinfo->lock); 3754 spin_lock(&sinfo->lock);
3755 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3756 data_used = 0;
3503 meta_used = sinfo->bytes_used; 3757 meta_used = sinfo->bytes_used;
3504 spin_unlock(&sinfo->lock); 3758 spin_unlock(&sinfo->lock);
3505 3759
@@ -3527,7 +3781,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3527 block_rsv->size = num_bytes; 3781 block_rsv->size = num_bytes;
3528 3782
3529 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3783 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3530 sinfo->bytes_reserved + sinfo->bytes_readonly; 3784 sinfo->bytes_reserved + sinfo->bytes_readonly +
3785 sinfo->bytes_may_use;
3531 3786
3532 if (sinfo->total_bytes > num_bytes) { 3787 if (sinfo->total_bytes > num_bytes) {
3533 num_bytes = sinfo->total_bytes - num_bytes; 3788 num_bytes = sinfo->total_bytes - num_bytes;
@@ -3538,13 +3793,11 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3538 if (block_rsv->reserved >= block_rsv->size) { 3793 if (block_rsv->reserved >= block_rsv->size) {
3539 num_bytes = block_rsv->reserved - block_rsv->size; 3794 num_bytes = block_rsv->reserved - block_rsv->size;
3540 sinfo->bytes_reserved -= num_bytes; 3795 sinfo->bytes_reserved -= num_bytes;
3796 sinfo->reservation_progress++;
3541 block_rsv->reserved = block_rsv->size; 3797 block_rsv->reserved = block_rsv->size;
3542 block_rsv->full = 1; 3798 block_rsv->full = 1;
3543 } 3799 }
3544#if 0 3800
3545 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3546 block_rsv->size, block_rsv->reserved);
3547#endif
3548 spin_unlock(&sinfo->lock); 3801 spin_unlock(&sinfo->lock);
3549 spin_unlock(&block_rsv->lock); 3802 spin_unlock(&block_rsv->lock);
3550} 3803}
@@ -3590,15 +3843,40 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3590 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3843 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3591} 3844}
3592 3845
3593static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) 3846int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3847 struct btrfs_root *root,
3848 struct btrfs_block_rsv *rsv)
3594{ 3849{
3595 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3850 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3596 3 * num_items; 3851 u64 num_bytes;
3852 int ret;
3853
3854 /*
3855 * Truncate should be freeing data, but give us 2 items just in case it
3856 * needs to use some space. We may want to be smarter about this in the
3857 * future.
3858 */
3859 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3860
3861 /* We already have enough bytes, just return */
3862 if (rsv->reserved >= num_bytes)
3863 return 0;
3864
3865 num_bytes -= rsv->reserved;
3866
3867 /*
3868 * You should have reserved enough space before hand to do this, so this
3869 * should not fail.
3870 */
3871 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3872 BUG_ON(ret);
3873
3874 return 0;
3597} 3875}
3598 3876
3599int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3877int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3600 struct btrfs_root *root, 3878 struct btrfs_root *root,
3601 int num_items, int *retries) 3879 int num_items)
3602{ 3880{
3603 u64 num_bytes; 3881 u64 num_bytes;
3604 int ret; 3882 int ret;
@@ -3606,9 +3884,9 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3606 if (num_items == 0 || root->fs_info->chunk_root == root) 3884 if (num_items == 0 || root->fs_info->chunk_root == root)
3607 return 0; 3885 return 0;
3608 3886
3609 num_bytes = calc_trans_metadata_size(root, num_items); 3887 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3610 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3888 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3611 num_bytes, retries); 3889 num_bytes);
3612 if (!ret) { 3890 if (!ret) {
3613 trans->bytes_reserved += num_bytes; 3891 trans->bytes_reserved += num_bytes;
3614 trans->block_rsv = &root->fs_info->trans_block_rsv; 3892 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3636,23 +3914,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3636 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3914 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3637 3915
3638 /* 3916 /*
3639 * one for deleting orphan item, one for updating inode and 3917 * We need to hold space in order to delete our orphan item once we've
3640 * two for calling btrfs_truncate_inode_items. 3918 * added it, so this takes the reservation so we can release it later
3641 * 3919 * when we are truly done with the orphan item.
3642 * btrfs_truncate_inode_items is a delete operation, it frees
3643 * more space than it uses in most cases. So two units of
3644 * metadata space should be enough for calling it many times.
3645 * If all of the metadata space is used, we can commit
3646 * transaction and use space it freed.
3647 */ 3920 */
3648 u64 num_bytes = calc_trans_metadata_size(root, 4); 3921 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3649 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3922 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3650} 3923}
3651 3924
3652void btrfs_orphan_release_metadata(struct inode *inode) 3925void btrfs_orphan_release_metadata(struct inode *inode)
3653{ 3926{
3654 struct btrfs_root *root = BTRFS_I(inode)->root; 3927 struct btrfs_root *root = BTRFS_I(inode)->root;
3655 u64 num_bytes = calc_trans_metadata_size(root, 4); 3928 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3656 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3929 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3657} 3930}
3658 3931
@@ -3666,7 +3939,7 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3666 * two for root back/forward refs, two for directory entries 3939 * two for root back/forward refs, two for directory entries
3667 * and one for root of the snapshot. 3940 * and one for root of the snapshot.
3668 */ 3941 */
3669 u64 num_bytes = calc_trans_metadata_size(root, 5); 3942 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3670 dst_rsv->space_info = src_rsv->space_info; 3943 dst_rsv->space_info = src_rsv->space_info;
3671 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3944 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3672} 3945}
@@ -3682,43 +3955,37 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3682 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3955 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3683 u64 to_reserve; 3956 u64 to_reserve;
3684 int nr_extents; 3957 int nr_extents;
3685 int retries = 0; 3958 int reserved_extents;
3686 int ret; 3959 int ret;
3687 3960
3688 if (btrfs_transaction_in_commit(root->fs_info)) 3961 if (btrfs_transaction_in_commit(root->fs_info))
3689 schedule_timeout(1); 3962 schedule_timeout(1);
3690 3963
3691 num_bytes = ALIGN(num_bytes, root->sectorsize); 3964 num_bytes = ALIGN(num_bytes, root->sectorsize);
3692again: 3965
3693 spin_lock(&BTRFS_I(inode)->accounting_lock);
3694 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3966 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3695 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3967 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
3696 nr_extents -= BTRFS_I(inode)->reserved_extents; 3968
3697 to_reserve = calc_trans_metadata_size(root, nr_extents); 3969 if (nr_extents > reserved_extents) {
3970 nr_extents -= reserved_extents;
3971 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
3698 } else { 3972 } else {
3699 nr_extents = 0; 3973 nr_extents = 0;
3700 to_reserve = 0; 3974 to_reserve = 0;
3701 } 3975 }
3702 3976
3703 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3704 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3705 if (ret) { 3979 if (ret)
3706 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3707 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3708 &retries);
3709 if (ret > 0)
3710 goto again;
3711 return ret; 3980 return ret;
3712 }
3713 3981
3714 BTRFS_I(inode)->reserved_extents += nr_extents; 3982 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
3715 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3983 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3716 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3717 3984
3718 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3985 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3719 3986
3720 if (block_rsv->size > 512 * 1024 * 1024) 3987 if (block_rsv->size > 512 * 1024 * 1024)
3721 shrink_delalloc(NULL, root, to_reserve); 3988 shrink_delalloc(NULL, root, to_reserve, 0);
3722 3989
3723 return 0; 3990 return 0;
3724} 3991}
@@ -3728,23 +3995,34 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3728 struct btrfs_root *root = BTRFS_I(inode)->root; 3995 struct btrfs_root *root = BTRFS_I(inode)->root;
3729 u64 to_free; 3996 u64 to_free;
3730 int nr_extents; 3997 int nr_extents;
3998 int reserved_extents;
3731 3999
3732 num_bytes = ALIGN(num_bytes, root->sectorsize); 4000 num_bytes = ALIGN(num_bytes, root->sectorsize);
3733 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4001 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4002 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
3734 4003
3735 spin_lock(&BTRFS_I(inode)->accounting_lock); 4004 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
3736 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4005 do {
3737 if (nr_extents < BTRFS_I(inode)->reserved_extents) { 4006 int old, new;
3738 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; 4007
3739 BTRFS_I(inode)->reserved_extents -= nr_extents; 4008 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
3740 } else { 4009 if (nr_extents >= reserved_extents) {
3741 nr_extents = 0; 4010 nr_extents = 0;
3742 } 4011 break;
3743 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4012 }
4013 old = reserved_extents;
4014 nr_extents = reserved_extents - nr_extents;
4015 new = reserved_extents - nr_extents;
4016 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4017 reserved_extents, new);
4018 if (likely(old == reserved_extents))
4019 break;
4020 reserved_extents = old;
4021 } while (1);
3744 4022
3745 to_free = calc_csum_metadata_size(inode, num_bytes); 4023 to_free = calc_csum_metadata_size(inode, num_bytes);
3746 if (nr_extents > 0) 4024 if (nr_extents > 0)
3747 to_free += calc_trans_metadata_size(root, nr_extents); 4025 to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
3748 4026
3749 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4027 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
3750 to_free); 4028 to_free);
@@ -3777,12 +4055,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3777 struct btrfs_root *root, 4055 struct btrfs_root *root,
3778 u64 bytenr, u64 num_bytes, int alloc) 4056 u64 bytenr, u64 num_bytes, int alloc)
3779{ 4057{
3780 struct btrfs_block_group_cache *cache; 4058 struct btrfs_block_group_cache *cache = NULL;
3781 struct btrfs_fs_info *info = root->fs_info; 4059 struct btrfs_fs_info *info = root->fs_info;
3782 int factor;
3783 u64 total = num_bytes; 4060 u64 total = num_bytes;
3784 u64 old_val; 4061 u64 old_val;
3785 u64 byte_in_group; 4062 u64 byte_in_group;
4063 int factor;
3786 4064
3787 /* block accounting for super block */ 4065 /* block accounting for super block */
3788 spin_lock(&info->delalloc_lock); 4066 spin_lock(&info->delalloc_lock);
@@ -3804,11 +4082,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3804 factor = 2; 4082 factor = 2;
3805 else 4083 else
3806 factor = 1; 4084 factor = 1;
4085 /*
4086 * If this block group has free space cache written out, we
4087 * need to make sure to load it if we are removing space. This
4088 * is because we need the unpinning stage to actually add the
4089 * space back to the block group, otherwise we will leak space.
4090 */
4091 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4092 cache_block_group(cache, trans, NULL, 1);
4093
3807 byte_in_group = bytenr - cache->key.objectid; 4094 byte_in_group = bytenr - cache->key.objectid;
3808 WARN_ON(byte_in_group > cache->key.offset); 4095 WARN_ON(byte_in_group > cache->key.offset);
3809 4096
3810 spin_lock(&cache->space_info->lock); 4097 spin_lock(&cache->space_info->lock);
3811 spin_lock(&cache->lock); 4098 spin_lock(&cache->lock);
4099
4100 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4101 cache->disk_cache_state < BTRFS_DC_CLEAR)
4102 cache->disk_cache_state = BTRFS_DC_CLEAR;
4103
3812 cache->dirty = 1; 4104 cache->dirty = 1;
3813 old_val = btrfs_block_group_used(&cache->item); 4105 old_val = btrfs_block_group_used(&cache->item);
3814 num_bytes = min(total, cache->key.offset - byte_in_group); 4106 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -3817,6 +4109,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3817 btrfs_set_block_group_used(&cache->item, old_val); 4109 btrfs_set_block_group_used(&cache->item, old_val);
3818 cache->reserved -= num_bytes; 4110 cache->reserved -= num_bytes;
3819 cache->space_info->bytes_reserved -= num_bytes; 4111 cache->space_info->bytes_reserved -= num_bytes;
4112 cache->space_info->reservation_progress++;
3820 cache->space_info->bytes_used += num_bytes; 4113 cache->space_info->bytes_used += num_bytes;
3821 cache->space_info->disk_used += num_bytes * factor; 4114 cache->space_info->disk_used += num_bytes * factor;
3822 spin_unlock(&cache->lock); 4115 spin_unlock(&cache->lock);
@@ -3868,6 +4161,7 @@ static int pin_down_extent(struct btrfs_root *root,
3868 if (reserved) { 4161 if (reserved) {
3869 cache->reserved -= num_bytes; 4162 cache->reserved -= num_bytes;
3870 cache->space_info->bytes_reserved -= num_bytes; 4163 cache->space_info->bytes_reserved -= num_bytes;
4164 cache->space_info->reservation_progress++;
3871 } 4165 }
3872 spin_unlock(&cache->lock); 4166 spin_unlock(&cache->lock);
3873 spin_unlock(&cache->space_info->lock); 4167 spin_unlock(&cache->space_info->lock);
@@ -3898,8 +4192,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
3898 * update size of reserved extents. this function may return -EAGAIN 4192 * update size of reserved extents. this function may return -EAGAIN
3899 * if 'reserve' is true or 'sinfo' is false. 4193 * if 'reserve' is true or 'sinfo' is false.
3900 */ 4194 */
3901static int update_reserved_bytes(struct btrfs_block_group_cache *cache, 4195int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
3902 u64 num_bytes, int reserve, int sinfo) 4196 u64 num_bytes, int reserve, int sinfo)
3903{ 4197{
3904 int ret = 0; 4198 int ret = 0;
3905 if (sinfo) { 4199 if (sinfo) {
@@ -3918,6 +4212,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
3918 space_info->bytes_readonly += num_bytes; 4212 space_info->bytes_readonly += num_bytes;
3919 cache->reserved -= num_bytes; 4213 cache->reserved -= num_bytes;
3920 space_info->bytes_reserved -= num_bytes; 4214 space_info->bytes_reserved -= num_bytes;
4215 space_info->reservation_progress++;
3921 } 4216 }
3922 spin_unlock(&cache->lock); 4217 spin_unlock(&cache->lock);
3923 spin_unlock(&space_info->lock); 4218 spin_unlock(&space_info->lock);
@@ -4037,7 +4332,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4037 if (ret) 4332 if (ret)
4038 break; 4333 break;
4039 4334
4040 ret = btrfs_discard_extent(root, start, end + 1 - start); 4335 if (btrfs_test_opt(root, DISCARD))
4336 ret = btrfs_discard_extent(root, start,
4337 end + 1 - start, NULL);
4041 4338
4042 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4339 clear_extent_dirty(unpin, start, end, GFP_NOFS);
4043 unpin_extent_range(root, start, end); 4340 unpin_extent_range(root, start, end);
@@ -4134,7 +4431,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4134 NULL, refs_to_drop, 4431 NULL, refs_to_drop,
4135 is_data); 4432 is_data);
4136 BUG_ON(ret); 4433 BUG_ON(ret);
4137 btrfs_release_path(extent_root, path); 4434 btrfs_release_path(path);
4138 path->leave_spinning = 1; 4435 path->leave_spinning = 1;
4139 4436
4140 key.objectid = bytenr; 4437 key.objectid = bytenr;
@@ -4173,7 +4470,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4173 owner_objectid, 0); 4470 owner_objectid, 0);
4174 BUG_ON(ret < 0); 4471 BUG_ON(ret < 0);
4175 4472
4176 btrfs_release_path(extent_root, path); 4473 btrfs_release_path(path);
4177 path->leave_spinning = 1; 4474 path->leave_spinning = 1;
4178 4475
4179 key.objectid = bytenr; 4476 key.objectid = bytenr;
@@ -4243,7 +4540,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4243 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4540 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4244 num_to_del); 4541 num_to_del);
4245 BUG_ON(ret); 4542 BUG_ON(ret);
4246 btrfs_release_path(extent_root, path); 4543 btrfs_release_path(path);
4247 4544
4248 if (is_data) { 4545 if (is_data) {
4249 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4546 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
@@ -4378,10 +4675,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4378 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4675 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4379 4676
4380 btrfs_add_free_space(cache, buf->start, buf->len); 4677 btrfs_add_free_space(cache, buf->start, buf->len);
4381 ret = update_reserved_bytes(cache, buf->len, 0, 0); 4678 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
4382 if (ret == -EAGAIN) { 4679 if (ret == -EAGAIN) {
4383 /* block group became read-only */ 4680 /* block group became read-only */
4384 update_reserved_bytes(cache, buf->len, 0, 1); 4681 btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4385 goto out; 4682 goto out;
4386 } 4683 }
4387 4684
@@ -4396,6 +4693,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4396 if (ret) { 4693 if (ret) {
4397 spin_lock(&cache->space_info->lock); 4694 spin_lock(&cache->space_info->lock);
4398 cache->space_info->bytes_reserved -= buf->len; 4695 cache->space_info->bytes_reserved -= buf->len;
4696 cache->space_info->reservation_progress++;
4399 spin_unlock(&cache->space_info->lock); 4697 spin_unlock(&cache->space_info->lock);
4400 } 4698 }
4401 goto out; 4699 goto out;
@@ -4417,6 +4715,11 @@ pin:
4417 } 4715 }
4418 } 4716 }
4419out: 4717out:
4718 /*
4719 * Deleting the buffer, clear the corrupt flag since it doesn't matter
4720 * anymore.
4721 */
4722 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
4420 btrfs_put_block_group(cache); 4723 btrfs_put_block_group(cache);
4421} 4724}
4422 4725
@@ -4480,7 +4783,7 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4480 return 0; 4783 return 0;
4481 4784
4482 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4785 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4483 (cache->free_space >= num_bytes)); 4786 (cache->free_space_ctl->free_space >= num_bytes));
4484 4787
4485 put_caching_control(caching_ctl); 4788 put_caching_control(caching_ctl);
4486 return 0; 4789 return 0;
@@ -4539,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4539 u64 num_bytes, u64 empty_size, 4842 u64 num_bytes, u64 empty_size,
4540 u64 search_start, u64 search_end, 4843 u64 search_start, u64 search_end,
4541 u64 hint_byte, struct btrfs_key *ins, 4844 u64 hint_byte, struct btrfs_key *ins,
4542 int data) 4845 u64 data)
4543{ 4846{
4544 int ret = 0; 4847 int ret = 0;
4545 struct btrfs_root *root = orig_root->fs_info->extent_root; 4848 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4555,6 +4858,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4555 bool found_uncached_bg = false; 4858 bool found_uncached_bg = false;
4556 bool failed_cluster_refill = false; 4859 bool failed_cluster_refill = false;
4557 bool failed_alloc = false; 4860 bool failed_alloc = false;
4861 bool use_cluster = true;
4558 u64 ideal_cache_percent = 0; 4862 u64 ideal_cache_percent = 0;
4559 u64 ideal_cache_offset = 0; 4863 u64 ideal_cache_offset = 0;
4560 4864
@@ -4565,20 +4869,28 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4565 4869
4566 space_info = __find_space_info(root->fs_info, data); 4870 space_info = __find_space_info(root->fs_info, data);
4567 if (!space_info) { 4871 if (!space_info) {
4568 printk(KERN_ERR "No space info for %d\n", data); 4872 printk(KERN_ERR "No space info for %llu\n", data);
4569 return -ENOSPC; 4873 return -ENOSPC;
4570 } 4874 }
4571 4875
4876 /*
4877 * If the space info is for both data and metadata it means we have a
4878 * small filesystem and we can't use the clustering stuff.
4879 */
4880 if (btrfs_mixed_space_info(space_info))
4881 use_cluster = false;
4882
4572 if (orig_root->ref_cows || empty_size) 4883 if (orig_root->ref_cows || empty_size)
4573 allowed_chunk_alloc = 1; 4884 allowed_chunk_alloc = 1;
4574 4885
4575 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4886 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4576 last_ptr = &root->fs_info->meta_alloc_cluster; 4887 last_ptr = &root->fs_info->meta_alloc_cluster;
4577 if (!btrfs_test_opt(root, SSD)) 4888 if (!btrfs_test_opt(root, SSD))
4578 empty_cluster = 64 * 1024; 4889 empty_cluster = 64 * 1024;
4579 } 4890 }
4580 4891
4581 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4892 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
4893 btrfs_test_opt(root, SSD)) {
4582 last_ptr = &root->fs_info->data_alloc_cluster; 4894 last_ptr = &root->fs_info->data_alloc_cluster;
4583 } 4895 }
4584 4896
@@ -4638,10 +4950,34 @@ search:
4638 btrfs_get_block_group(block_group); 4950 btrfs_get_block_group(block_group);
4639 search_start = block_group->key.objectid; 4951 search_start = block_group->key.objectid;
4640 4952
4953 /*
4954 * this can happen if we end up cycling through all the
4955 * raid types, but we want to make sure we only allocate
4956 * for the proper type.
4957 */
4958 if (!block_group_bits(block_group, data)) {
4959 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4960 BTRFS_BLOCK_GROUP_RAID1 |
4961 BTRFS_BLOCK_GROUP_RAID10;
4962
4963 /*
4964 * if they asked for extra copies and this block group
4965 * doesn't provide them, bail. This does allow us to
4966 * fill raid0 from raid1.
4967 */
4968 if ((data & extra) && !(block_group->flags & extra))
4969 goto loop;
4970 }
4971
4641have_block_group: 4972have_block_group:
4642 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4973 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4643 u64 free_percent; 4974 u64 free_percent;
4644 4975
4976 ret = cache_block_group(block_group, trans,
4977 orig_root, 1);
4978 if (block_group->cached == BTRFS_CACHE_FINISHED)
4979 goto have_block_group;
4980
4645 free_percent = btrfs_block_group_used(&block_group->item); 4981 free_percent = btrfs_block_group_used(&block_group->item);
4646 free_percent *= 100; 4982 free_percent *= 100;
4647 free_percent = div64_u64(free_percent, 4983 free_percent = div64_u64(free_percent,
@@ -4662,7 +4998,8 @@ have_block_group:
4662 if (loop > LOOP_CACHING_NOWAIT || 4998 if (loop > LOOP_CACHING_NOWAIT ||
4663 (loop > LOOP_FIND_IDEAL && 4999 (loop > LOOP_FIND_IDEAL &&
4664 atomic_read(&space_info->caching_threads) < 2)) { 5000 atomic_read(&space_info->caching_threads) < 2)) {
4665 ret = cache_block_group(block_group); 5001 ret = cache_block_group(block_group, trans,
5002 orig_root, 0);
4666 BUG_ON(ret); 5003 BUG_ON(ret);
4667 } 5004 }
4668 found_uncached_bg = true; 5005 found_uncached_bg = true;
@@ -4682,6 +5019,15 @@ have_block_group:
4682 if (unlikely(block_group->ro)) 5019 if (unlikely(block_group->ro))
4683 goto loop; 5020 goto loop;
4684 5021
5022 spin_lock(&block_group->free_space_ctl->tree_lock);
5023 if (cached &&
5024 block_group->free_space_ctl->free_space <
5025 num_bytes + empty_size) {
5026 spin_unlock(&block_group->free_space_ctl->tree_lock);
5027 goto loop;
5028 }
5029 spin_unlock(&block_group->free_space_ctl->tree_lock);
5030
4685 /* 5031 /*
4686 * Ok we want to try and use the cluster allocator, so lets look 5032 * Ok we want to try and use the cluster allocator, so lets look
4687 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5033 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -4830,7 +5176,7 @@ checks:
4830 search_start - offset); 5176 search_start - offset);
4831 BUG_ON(offset > search_start); 5177 BUG_ON(offset > search_start);
4832 5178
4833 ret = update_reserved_bytes(block_group, num_bytes, 1, 5179 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
4834 (data & BTRFS_BLOCK_GROUP_DATA)); 5180 (data & BTRFS_BLOCK_GROUP_DATA));
4835 if (ret == -EAGAIN) { 5181 if (ret == -EAGAIN) {
4836 btrfs_add_free_space(block_group, offset, num_bytes); 5182 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -4845,6 +5191,7 @@ checks:
4845 btrfs_add_free_space(block_group, offset, 5191 btrfs_add_free_space(block_group, offset,
4846 search_start - offset); 5192 search_start - offset);
4847 BUG_ON(offset > search_start); 5193 BUG_ON(offset > search_start);
5194 btrfs_put_block_group(block_group);
4848 break; 5195 break;
4849loop: 5196loop:
4850 failed_cluster_refill = false; 5197 failed_cluster_refill = false;
@@ -4867,9 +5214,7 @@ loop:
4867 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5214 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
4868 * again 5215 * again
4869 */ 5216 */
4870 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5217 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
4871 (found_uncached_bg || empty_size || empty_cluster ||
4872 allowed_chunk_alloc)) {
4873 index = 0; 5218 index = 0;
4874 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4875 found_uncached_bg = false; 5220 found_uncached_bg = false;
@@ -4909,40 +5254,39 @@ loop:
4909 goto search; 5254 goto search;
4910 } 5255 }
4911 5256
4912 if (loop < LOOP_CACHING_WAIT) { 5257 loop++;
4913 loop++;
4914 goto search;
4915 }
4916 5258
4917 if (loop == LOOP_ALLOC_CHUNK) { 5259 if (loop == LOOP_ALLOC_CHUNK) {
4918 empty_size = 0; 5260 if (allowed_chunk_alloc) {
4919 empty_cluster = 0; 5261 ret = do_chunk_alloc(trans, root, num_bytes +
4920 } 5262 2 * 1024 * 1024, data,
5263 CHUNK_ALLOC_LIMITED);
5264 allowed_chunk_alloc = 0;
5265 if (ret == 1)
5266 done_chunk_alloc = 1;
5267 } else if (!done_chunk_alloc &&
5268 space_info->force_alloc ==
5269 CHUNK_ALLOC_NO_FORCE) {
5270 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5271 }
4921 5272
4922 if (allowed_chunk_alloc) { 5273 /*
4923 ret = do_chunk_alloc(trans, root, num_bytes + 5274 * We didn't allocate a chunk, go ahead and drop the
4924 2 * 1024 * 1024, data, 1); 5275 * empty size and loop again.
4925 allowed_chunk_alloc = 0; 5276 */
4926 done_chunk_alloc = 1; 5277 if (!done_chunk_alloc)
4927 } else if (!done_chunk_alloc) { 5278 loop = LOOP_NO_EMPTY_SIZE;
4928 space_info->force_alloc = 1;
4929 } 5279 }
4930 5280
4931 if (loop < LOOP_NO_EMPTY_SIZE) { 5281 if (loop == LOOP_NO_EMPTY_SIZE) {
4932 loop++; 5282 empty_size = 0;
4933 goto search; 5283 empty_cluster = 0;
4934 } 5284 }
4935 ret = -ENOSPC; 5285
5286 goto search;
4936 } else if (!ins->objectid) { 5287 } else if (!ins->objectid) {
4937 ret = -ENOSPC; 5288 ret = -ENOSPC;
4938 } 5289 } else if (ins->objectid) {
4939
4940 /* we found what we needed */
4941 if (ins->objectid) {
4942 if (!(data & BTRFS_BLOCK_GROUP_DATA))
4943 trans->block_group = block_group->key.objectid;
4944
4945 btrfs_put_block_group(block_group);
4946 ret = 0; 5290 ret = 0;
4947 } 5291 }
4948 5292
@@ -5011,7 +5355,8 @@ again:
5011 */ 5355 */
5012 if (empty_size || root->ref_cows) 5356 if (empty_size || root->ref_cows)
5013 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5357 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5014 num_bytes + 2 * 1024 * 1024, data, 0); 5358 num_bytes + 2 * 1024 * 1024, data,
5359 CHUNK_ALLOC_NO_FORCE);
5015 5360
5016 WARN_ON(num_bytes < root->sectorsize); 5361 WARN_ON(num_bytes < root->sectorsize);
5017 ret = find_free_extent(trans, root, num_bytes, empty_size, 5362 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5023,10 +5368,10 @@ again:
5023 num_bytes = num_bytes & ~(root->sectorsize - 1); 5368 num_bytes = num_bytes & ~(root->sectorsize - 1);
5024 num_bytes = max(num_bytes, min_alloc_size); 5369 num_bytes = max(num_bytes, min_alloc_size);
5025 do_chunk_alloc(trans, root->fs_info->extent_root, 5370 do_chunk_alloc(trans, root->fs_info->extent_root,
5026 num_bytes, data, 1); 5371 num_bytes, data, CHUNK_ALLOC_FORCE);
5027 goto again; 5372 goto again;
5028 } 5373 }
5029 if (ret == -ENOSPC) { 5374 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5030 struct btrfs_space_info *sinfo; 5375 struct btrfs_space_info *sinfo;
5031 5376
5032 sinfo = __find_space_info(root->fs_info, data); 5377 sinfo = __find_space_info(root->fs_info, data);
@@ -5036,6 +5381,8 @@ again:
5036 dump_space_info(sinfo, num_bytes, 1); 5381 dump_space_info(sinfo, num_bytes, 1);
5037 } 5382 }
5038 5383
5384 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
5385
5039 return ret; 5386 return ret;
5040} 5387}
5041 5388
@@ -5051,12 +5398,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5051 return -ENOSPC; 5398 return -ENOSPC;
5052 } 5399 }
5053 5400
5054 ret = btrfs_discard_extent(root, start, len); 5401 if (btrfs_test_opt(root, DISCARD))
5402 ret = btrfs_discard_extent(root, start, len, NULL);
5055 5403
5056 btrfs_add_free_space(cache, start, len); 5404 btrfs_add_free_space(cache, start, len);
5057 update_reserved_bytes(cache, len, 0, 1); 5405 btrfs_update_reserved_bytes(cache, len, 0, 1);
5058 btrfs_put_block_group(cache); 5406 btrfs_put_block_group(cache);
5059 5407
5408 trace_btrfs_reserved_extent_free(root, start, len);
5409
5060 return ret; 5410 return ret;
5061} 5411}
5062 5412
@@ -5083,7 +5433,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5083 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5433 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
5084 5434
5085 path = btrfs_alloc_path(); 5435 path = btrfs_alloc_path();
5086 BUG_ON(!path); 5436 if (!path)
5437 return -ENOMEM;
5087 5438
5088 path->leave_spinning = 1; 5439 path->leave_spinning = 1;
5089 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5440 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5219,7 +5570,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5219 u64 num_bytes = ins->offset; 5570 u64 num_bytes = ins->offset;
5220 5571
5221 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5572 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5222 cache_block_group(block_group); 5573 cache_block_group(block_group, trans, NULL, 0);
5223 caching_ctl = get_caching_control(block_group); 5574 caching_ctl = get_caching_control(block_group);
5224 5575
5225 if (!caching_ctl) { 5576 if (!caching_ctl) {
@@ -5253,7 +5604,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5253 put_caching_control(caching_ctl); 5604 put_caching_control(caching_ctl);
5254 } 5605 }
5255 5606
5256 ret = update_reserved_bytes(block_group, ins->offset, 1, 1); 5607 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
5257 BUG_ON(ret); 5608 BUG_ON(ret);
5258 btrfs_put_block_group(block_group); 5609 btrfs_put_block_group(block_group);
5259 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5610 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -5304,25 +5655,47 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5304 struct btrfs_root *root, u32 blocksize) 5655 struct btrfs_root *root, u32 blocksize)
5305{ 5656{
5306 struct btrfs_block_rsv *block_rsv; 5657 struct btrfs_block_rsv *block_rsv;
5658 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5307 int ret; 5659 int ret;
5308 5660
5309 block_rsv = get_block_rsv(trans, root); 5661 block_rsv = get_block_rsv(trans, root);
5310 5662
5311 if (block_rsv->size == 0) { 5663 if (block_rsv->size == 0) {
5312 ret = reserve_metadata_bytes(block_rsv, blocksize); 5664 ret = reserve_metadata_bytes(trans, root, block_rsv,
5313 if (ret) 5665 blocksize, 0);
5666 /*
5667 * If we couldn't reserve metadata bytes try and use some from
5668 * the global reserve.
5669 */
5670 if (ret && block_rsv != global_rsv) {
5671 ret = block_rsv_use_bytes(global_rsv, blocksize);
5672 if (!ret)
5673 return global_rsv;
5314 return ERR_PTR(ret); 5674 return ERR_PTR(ret);
5675 } else if (ret) {
5676 return ERR_PTR(ret);
5677 }
5315 return block_rsv; 5678 return block_rsv;
5316 } 5679 }
5317 5680
5318 ret = block_rsv_use_bytes(block_rsv, blocksize); 5681 ret = block_rsv_use_bytes(block_rsv, blocksize);
5319 if (!ret) 5682 if (!ret)
5320 return block_rsv; 5683 return block_rsv;
5321 5684 if (ret) {
5322 WARN_ON(1); 5685 WARN_ON(1);
5323 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", 5686 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
5324 block_rsv->size, block_rsv->reserved, 5687 0);
5325 block_rsv->freed[0], block_rsv->freed[1]); 5688 if (!ret) {
5689 spin_lock(&block_rsv->lock);
5690 block_rsv->size += blocksize;
5691 spin_unlock(&block_rsv->lock);
5692 return block_rsv;
5693 } else if (ret && block_rsv != global_rsv) {
5694 ret = block_rsv_use_bytes(global_rsv, blocksize);
5695 if (!ret)
5696 return global_rsv;
5697 }
5698 }
5326 5699
5327 return ERR_PTR(-ENOSPC); 5700 return ERR_PTR(-ENOSPC);
5328} 5701}
@@ -5422,7 +5795,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5422 u64 generation; 5795 u64 generation;
5423 u64 refs; 5796 u64 refs;
5424 u64 flags; 5797 u64 flags;
5425 u64 last = 0;
5426 u32 nritems; 5798 u32 nritems;
5427 u32 blocksize; 5799 u32 blocksize;
5428 struct btrfs_key key; 5800 struct btrfs_key key;
@@ -5490,7 +5862,6 @@ reada:
5490 generation); 5862 generation);
5491 if (ret) 5863 if (ret)
5492 break; 5864 break;
5493 last = bytenr + blocksize;
5494 nread++; 5865 nread++;
5495 } 5866 }
5496 wc->reada_slot = slot; 5867 wc->reada_slot = slot;
@@ -5666,6 +6037,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5666 if (reada && level == 1) 6037 if (reada && level == 1)
5667 reada_walk_down(trans, root, wc, path); 6038 reada_walk_down(trans, root, wc, path);
5668 next = read_tree_block(root, bytenr, blocksize, generation); 6039 next = read_tree_block(root, bytenr, blocksize, generation);
6040 if (!next)
6041 return -EIO;
5669 btrfs_tree_lock(next); 6042 btrfs_tree_lock(next);
5670 btrfs_set_lock_blocking(next); 6043 btrfs_set_lock_blocking(next);
5671 } 6044 }
@@ -5898,6 +6271,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
5898 BUG_ON(!wc); 6271 BUG_ON(!wc);
5899 6272
5900 trans = btrfs_start_transaction(tree_root, 0); 6273 trans = btrfs_start_transaction(tree_root, 0);
6274 BUG_ON(IS_ERR(trans));
6275
5901 if (block_rsv) 6276 if (block_rsv)
5902 trans->block_rsv = block_rsv; 6277 trans->block_rsv = block_rsv;
5903 6278
@@ -5995,11 +6370,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
5995 6370
5996 btrfs_end_transaction_throttle(trans, tree_root); 6371 btrfs_end_transaction_throttle(trans, tree_root);
5997 trans = btrfs_start_transaction(tree_root, 0); 6372 trans = btrfs_start_transaction(tree_root, 0);
6373 BUG_ON(IS_ERR(trans));
5998 if (block_rsv) 6374 if (block_rsv)
5999 trans->block_rsv = block_rsv; 6375 trans->block_rsv = block_rsv;
6000 } 6376 }
6001 } 6377 }
6002 btrfs_release_path(root, path); 6378 btrfs_release_path(path);
6003 BUG_ON(err); 6379 BUG_ON(err);
6004 6380
6005 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6381 ret = btrfs_del_root(trans, tree_root, &root->root_key);
@@ -6010,9 +6386,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6010 NULL, NULL); 6386 NULL, NULL);
6011 BUG_ON(ret < 0); 6387 BUG_ON(ret < 0);
6012 if (ret > 0) { 6388 if (ret > 0) {
6013 ret = btrfs_del_orphan_item(trans, tree_root, 6389 /* if we fail to delete the orphan item this time
6014 root->root_key.objectid); 6390 * around, it'll get picked up the next time.
6015 BUG_ON(ret); 6391 *
6392 * The most common failure here is just -ENOENT.
6393 */
6394 btrfs_del_orphan_item(trans, tree_root,
6395 root->root_key.objectid);
6016 } 6396 }
6017 } 6397 }
6018 6398
@@ -6050,10 +6430,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6050 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6430 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6051 6431
6052 path = btrfs_alloc_path(); 6432 path = btrfs_alloc_path();
6053 BUG_ON(!path); 6433 if (!path)
6434 return -ENOMEM;
6054 6435
6055 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6436 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6056 BUG_ON(!wc); 6437 if (!wc) {
6438 btrfs_free_path(path);
6439 return -ENOMEM;
6440 }
6057 6441
6058 btrfs_assert_tree_locked(parent); 6442 btrfs_assert_tree_locked(parent);
6059 parent_level = btrfs_header_level(parent); 6443 parent_level = btrfs_header_level(parent);
@@ -6095,1500 +6479,20 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6095 return ret; 6479 return ret;
6096} 6480}
6097 6481
6098#if 0
6099static unsigned long calc_ra(unsigned long start, unsigned long last,
6100 unsigned long nr)
6101{
6102 return min(last, start + nr - 1);
6103}
6104
6105static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6106 u64 len)
6107{
6108 u64 page_start;
6109 u64 page_end;
6110 unsigned long first_index;
6111 unsigned long last_index;
6112 unsigned long i;
6113 struct page *page;
6114 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6115 struct file_ra_state *ra;
6116 struct btrfs_ordered_extent *ordered;
6117 unsigned int total_read = 0;
6118 unsigned int total_dirty = 0;
6119 int ret = 0;
6120
6121 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6122
6123 mutex_lock(&inode->i_mutex);
6124 first_index = start >> PAGE_CACHE_SHIFT;
6125 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6126
6127 /* make sure the dirty trick played by the caller work */
6128 ret = invalidate_inode_pages2_range(inode->i_mapping,
6129 first_index, last_index);
6130 if (ret)
6131 goto out_unlock;
6132
6133 file_ra_state_init(ra, inode->i_mapping);
6134
6135 for (i = first_index ; i <= last_index; i++) {
6136 if (total_read % ra->ra_pages == 0) {
6137 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6138 calc_ra(i, last_index, ra->ra_pages));
6139 }
6140 total_read++;
6141again:
6142 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6143 BUG_ON(1);
6144 page = grab_cache_page(inode->i_mapping, i);
6145 if (!page) {
6146 ret = -ENOMEM;
6147 goto out_unlock;
6148 }
6149 if (!PageUptodate(page)) {
6150 btrfs_readpage(NULL, page);
6151 lock_page(page);
6152 if (!PageUptodate(page)) {
6153 unlock_page(page);
6154 page_cache_release(page);
6155 ret = -EIO;
6156 goto out_unlock;
6157 }
6158 }
6159 wait_on_page_writeback(page);
6160
6161 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6162 page_end = page_start + PAGE_CACHE_SIZE - 1;
6163 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6164
6165 ordered = btrfs_lookup_ordered_extent(inode, page_start);
6166 if (ordered) {
6167 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6168 unlock_page(page);
6169 page_cache_release(page);
6170 btrfs_start_ordered_extent(inode, ordered, 1);
6171 btrfs_put_ordered_extent(ordered);
6172 goto again;
6173 }
6174 set_page_extent_mapped(page);
6175
6176 if (i == first_index)
6177 set_extent_bits(io_tree, page_start, page_end,
6178 EXTENT_BOUNDARY, GFP_NOFS);
6179 btrfs_set_extent_delalloc(inode, page_start, page_end);
6180
6181 set_page_dirty(page);
6182 total_dirty++;
6183
6184 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6185 unlock_page(page);
6186 page_cache_release(page);
6187 }
6188
6189out_unlock:
6190 kfree(ra);
6191 mutex_unlock(&inode->i_mutex);
6192 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6193 return ret;
6194}
6195
6196static noinline int relocate_data_extent(struct inode *reloc_inode,
6197 struct btrfs_key *extent_key,
6198 u64 offset)
6199{
6200 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6201 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6202 struct extent_map *em;
6203 u64 start = extent_key->objectid - offset;
6204 u64 end = start + extent_key->offset - 1;
6205
6206 em = alloc_extent_map(GFP_NOFS);
6207 BUG_ON(!em || IS_ERR(em));
6208
6209 em->start = start;
6210 em->len = extent_key->offset;
6211 em->block_len = extent_key->offset;
6212 em->block_start = extent_key->objectid;
6213 em->bdev = root->fs_info->fs_devices->latest_bdev;
6214 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6215
6216 /* setup extent map to cheat btrfs_readpage */
6217 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6218 while (1) {
6219 int ret;
6220 write_lock(&em_tree->lock);
6221 ret = add_extent_mapping(em_tree, em);
6222 write_unlock(&em_tree->lock);
6223 if (ret != -EEXIST) {
6224 free_extent_map(em);
6225 break;
6226 }
6227 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6228 }
6229 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6230
6231 return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6232}
6233
6234struct btrfs_ref_path {
6235 u64 extent_start;
6236 u64 nodes[BTRFS_MAX_LEVEL];
6237 u64 root_objectid;
6238 u64 root_generation;
6239 u64 owner_objectid;
6240 u32 num_refs;
6241 int lowest_level;
6242 int current_level;
6243 int shared_level;
6244
6245 struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6246 u64 new_nodes[BTRFS_MAX_LEVEL];
6247};
6248
6249struct disk_extent {
6250 u64 ram_bytes;
6251 u64 disk_bytenr;
6252 u64 disk_num_bytes;
6253 u64 offset;
6254 u64 num_bytes;
6255 u8 compression;
6256 u8 encryption;
6257 u16 other_encoding;
6258};
6259
6260static int is_cowonly_root(u64 root_objectid)
6261{
6262 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6263 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6264 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6265 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6266 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6267 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6268 return 1;
6269 return 0;
6270}
6271
6272static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6273 struct btrfs_root *extent_root,
6274 struct btrfs_ref_path *ref_path,
6275 int first_time)
6276{
6277 struct extent_buffer *leaf;
6278 struct btrfs_path *path;
6279 struct btrfs_extent_ref *ref;
6280 struct btrfs_key key;
6281 struct btrfs_key found_key;
6282 u64 bytenr;
6283 u32 nritems;
6284 int level;
6285 int ret = 1;
6286
6287 path = btrfs_alloc_path();
6288 if (!path)
6289 return -ENOMEM;
6290
6291 if (first_time) {
6292 ref_path->lowest_level = -1;
6293 ref_path->current_level = -1;
6294 ref_path->shared_level = -1;
6295 goto walk_up;
6296 }
6297walk_down:
6298 level = ref_path->current_level - 1;
6299 while (level >= -1) {
6300 u64 parent;
6301 if (level < ref_path->lowest_level)
6302 break;
6303
6304 if (level >= 0)
6305 bytenr = ref_path->nodes[level];
6306 else
6307 bytenr = ref_path->extent_start;
6308 BUG_ON(bytenr == 0);
6309
6310 parent = ref_path->nodes[level + 1];
6311 ref_path->nodes[level + 1] = 0;
6312 ref_path->current_level = level;
6313 BUG_ON(parent == 0);
6314
6315 key.objectid = bytenr;
6316 key.offset = parent + 1;
6317 key.type = BTRFS_EXTENT_REF_KEY;
6318
6319 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6320 if (ret < 0)
6321 goto out;
6322 BUG_ON(ret == 0);
6323
6324 leaf = path->nodes[0];
6325 nritems = btrfs_header_nritems(leaf);
6326 if (path->slots[0] >= nritems) {
6327 ret = btrfs_next_leaf(extent_root, path);
6328 if (ret < 0)
6329 goto out;
6330 if (ret > 0)
6331 goto next;
6332 leaf = path->nodes[0];
6333 }
6334
6335 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6336 if (found_key.objectid == bytenr &&
6337 found_key.type == BTRFS_EXTENT_REF_KEY) {
6338 if (level < ref_path->shared_level)
6339 ref_path->shared_level = level;
6340 goto found;
6341 }
6342next:
6343 level--;
6344 btrfs_release_path(extent_root, path);
6345 cond_resched();
6346 }
6347 /* reached lowest level */
6348 ret = 1;
6349 goto out;
6350walk_up:
6351 level = ref_path->current_level;
6352 while (level < BTRFS_MAX_LEVEL - 1) {
6353 u64 ref_objectid;
6354
6355 if (level >= 0)
6356 bytenr = ref_path->nodes[level];
6357 else
6358 bytenr = ref_path->extent_start;
6359
6360 BUG_ON(bytenr == 0);
6361
6362 key.objectid = bytenr;
6363 key.offset = 0;
6364 key.type = BTRFS_EXTENT_REF_KEY;
6365
6366 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6367 if (ret < 0)
6368 goto out;
6369
6370 leaf = path->nodes[0];
6371 nritems = btrfs_header_nritems(leaf);
6372 if (path->slots[0] >= nritems) {
6373 ret = btrfs_next_leaf(extent_root, path);
6374 if (ret < 0)
6375 goto out;
6376 if (ret > 0) {
6377 /* the extent was freed by someone */
6378 if (ref_path->lowest_level == level)
6379 goto out;
6380 btrfs_release_path(extent_root, path);
6381 goto walk_down;
6382 }
6383 leaf = path->nodes[0];
6384 }
6385
6386 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6387 if (found_key.objectid != bytenr ||
6388 found_key.type != BTRFS_EXTENT_REF_KEY) {
6389 /* the extent was freed by someone */
6390 if (ref_path->lowest_level == level) {
6391 ret = 1;
6392 goto out;
6393 }
6394 btrfs_release_path(extent_root, path);
6395 goto walk_down;
6396 }
6397found:
6398 ref = btrfs_item_ptr(leaf, path->slots[0],
6399 struct btrfs_extent_ref);
6400 ref_objectid = btrfs_ref_objectid(leaf, ref);
6401 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6402 if (first_time) {
6403 level = (int)ref_objectid;
6404 BUG_ON(level >= BTRFS_MAX_LEVEL);
6405 ref_path->lowest_level = level;
6406 ref_path->current_level = level;
6407 ref_path->nodes[level] = bytenr;
6408 } else {
6409 WARN_ON(ref_objectid != level);
6410 }
6411 } else {
6412 WARN_ON(level != -1);
6413 }
6414 first_time = 0;
6415
6416 if (ref_path->lowest_level == level) {
6417 ref_path->owner_objectid = ref_objectid;
6418 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6419 }
6420
6421 /*
6422 * the block is tree root or the block isn't in reference
6423 * counted tree.
6424 */
6425 if (found_key.objectid == found_key.offset ||
6426 is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6427 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6428 ref_path->root_generation =
6429 btrfs_ref_generation(leaf, ref);
6430 if (level < 0) {
6431 /* special reference from the tree log */
6432 ref_path->nodes[0] = found_key.offset;
6433 ref_path->current_level = 0;
6434 }
6435 ret = 0;
6436 goto out;
6437 }
6438
6439 level++;
6440 BUG_ON(ref_path->nodes[level] != 0);
6441 ref_path->nodes[level] = found_key.offset;
6442 ref_path->current_level = level;
6443
6444 /*
6445 * the reference was created in the running transaction,
6446 * no need to continue walking up.
6447 */
6448 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6449 ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6450 ref_path->root_generation =
6451 btrfs_ref_generation(leaf, ref);
6452 ret = 0;
6453 goto out;
6454 }
6455
6456 btrfs_release_path(extent_root, path);
6457 cond_resched();
6458 }
6459 /* reached max tree level, but no tree root found. */
6460 BUG();
6461out:
6462 btrfs_free_path(path);
6463 return ret;
6464}
6465
6466static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6467 struct btrfs_root *extent_root,
6468 struct btrfs_ref_path *ref_path,
6469 u64 extent_start)
6470{
6471 memset(ref_path, 0, sizeof(*ref_path));
6472 ref_path->extent_start = extent_start;
6473
6474 return __next_ref_path(trans, extent_root, ref_path, 1);
6475}
6476
6477static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6478 struct btrfs_root *extent_root,
6479 struct btrfs_ref_path *ref_path)
6480{
6481 return __next_ref_path(trans, extent_root, ref_path, 0);
6482}
6483
6484static noinline int get_new_locations(struct inode *reloc_inode,
6485 struct btrfs_key *extent_key,
6486 u64 offset, int no_fragment,
6487 struct disk_extent **extents,
6488 int *nr_extents)
6489{
6490 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6491 struct btrfs_path *path;
6492 struct btrfs_file_extent_item *fi;
6493 struct extent_buffer *leaf;
6494 struct disk_extent *exts = *extents;
6495 struct btrfs_key found_key;
6496 u64 cur_pos;
6497 u64 last_byte;
6498 u32 nritems;
6499 int nr = 0;
6500 int max = *nr_extents;
6501 int ret;
6502
6503 WARN_ON(!no_fragment && *extents);
6504 if (!exts) {
6505 max = 1;
6506 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6507 if (!exts)
6508 return -ENOMEM;
6509 }
6510
6511 path = btrfs_alloc_path();
6512 BUG_ON(!path);
6513
6514 cur_pos = extent_key->objectid - offset;
6515 last_byte = extent_key->objectid + extent_key->offset;
6516 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
6517 cur_pos, 0);
6518 if (ret < 0)
6519 goto out;
6520 if (ret > 0) {
6521 ret = -ENOENT;
6522 goto out;
6523 }
6524
6525 while (1) {
6526 leaf = path->nodes[0];
6527 nritems = btrfs_header_nritems(leaf);
6528 if (path->slots[0] >= nritems) {
6529 ret = btrfs_next_leaf(root, path);
6530 if (ret < 0)
6531 goto out;
6532 if (ret > 0)
6533 break;
6534 leaf = path->nodes[0];
6535 }
6536
6537 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6538 if (found_key.offset != cur_pos ||
6539 found_key.type != BTRFS_EXTENT_DATA_KEY ||
6540 found_key.objectid != reloc_inode->i_ino)
6541 break;
6542
6543 fi = btrfs_item_ptr(leaf, path->slots[0],
6544 struct btrfs_file_extent_item);
6545 if (btrfs_file_extent_type(leaf, fi) !=
6546 BTRFS_FILE_EXTENT_REG ||
6547 btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
6548 break;
6549
6550 if (nr == max) {
6551 struct disk_extent *old = exts;
6552 max *= 2;
6553 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
6554 memcpy(exts, old, sizeof(*exts) * nr);
6555 if (old != *extents)
6556 kfree(old);
6557 }
6558
6559 exts[nr].disk_bytenr =
6560 btrfs_file_extent_disk_bytenr(leaf, fi);
6561 exts[nr].disk_num_bytes =
6562 btrfs_file_extent_disk_num_bytes(leaf, fi);
6563 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
6564 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6565 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6566 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
6567 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
6568 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
6569 fi);
6570 BUG_ON(exts[nr].offset > 0);
6571 BUG_ON(exts[nr].compression || exts[nr].encryption);
6572 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
6573
6574 cur_pos += exts[nr].num_bytes;
6575 nr++;
6576
6577 if (cur_pos + offset >= last_byte)
6578 break;
6579
6580 if (no_fragment) {
6581 ret = 1;
6582 goto out;
6583 }
6584 path->slots[0]++;
6585 }
6586
6587 BUG_ON(cur_pos + offset > last_byte);
6588 if (cur_pos + offset < last_byte) {
6589 ret = -ENOENT;
6590 goto out;
6591 }
6592 ret = 0;
6593out:
6594 btrfs_free_path(path);
6595 if (ret) {
6596 if (exts != *extents)
6597 kfree(exts);
6598 } else {
6599 *extents = exts;
6600 *nr_extents = nr;
6601 }
6602 return ret;
6603}
6604
6605static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
6606 struct btrfs_root *root,
6607 struct btrfs_path *path,
6608 struct btrfs_key *extent_key,
6609 struct btrfs_key *leaf_key,
6610 struct btrfs_ref_path *ref_path,
6611 struct disk_extent *new_extents,
6612 int nr_extents)
6613{
6614 struct extent_buffer *leaf;
6615 struct btrfs_file_extent_item *fi;
6616 struct inode *inode = NULL;
6617 struct btrfs_key key;
6618 u64 lock_start = 0;
6619 u64 lock_end = 0;
6620 u64 num_bytes;
6621 u64 ext_offset;
6622 u64 search_end = (u64)-1;
6623 u32 nritems;
6624 int nr_scaned = 0;
6625 int extent_locked = 0;
6626 int extent_type;
6627 int ret;
6628
6629 memcpy(&key, leaf_key, sizeof(key));
6630 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
6631 if (key.objectid < ref_path->owner_objectid ||
6632 (key.objectid == ref_path->owner_objectid &&
6633 key.type < BTRFS_EXTENT_DATA_KEY)) {
6634 key.objectid = ref_path->owner_objectid;
6635 key.type = BTRFS_EXTENT_DATA_KEY;
6636 key.offset = 0;
6637 }
6638 }
6639
6640 while (1) {
6641 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6642 if (ret < 0)
6643 goto out;
6644
6645 leaf = path->nodes[0];
6646 nritems = btrfs_header_nritems(leaf);
6647next:
6648 if (extent_locked && ret > 0) {
6649 /*
6650 * the file extent item was modified by someone
6651 * before the extent got locked.
6652 */
6653 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6654 lock_end, GFP_NOFS);
6655 extent_locked = 0;
6656 }
6657
6658 if (path->slots[0] >= nritems) {
6659 if (++nr_scaned > 2)
6660 break;
6661
6662 BUG_ON(extent_locked);
6663 ret = btrfs_next_leaf(root, path);
6664 if (ret < 0)
6665 goto out;
6666 if (ret > 0)
6667 break;
6668 leaf = path->nodes[0];
6669 nritems = btrfs_header_nritems(leaf);
6670 }
6671
6672 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6673
6674 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
6675 if ((key.objectid > ref_path->owner_objectid) ||
6676 (key.objectid == ref_path->owner_objectid &&
6677 key.type > BTRFS_EXTENT_DATA_KEY) ||
6678 key.offset >= search_end)
6679 break;
6680 }
6681
6682 if (inode && key.objectid != inode->i_ino) {
6683 BUG_ON(extent_locked);
6684 btrfs_release_path(root, path);
6685 mutex_unlock(&inode->i_mutex);
6686 iput(inode);
6687 inode = NULL;
6688 continue;
6689 }
6690
6691 if (key.type != BTRFS_EXTENT_DATA_KEY) {
6692 path->slots[0]++;
6693 ret = 1;
6694 goto next;
6695 }
6696 fi = btrfs_item_ptr(leaf, path->slots[0],
6697 struct btrfs_file_extent_item);
6698 extent_type = btrfs_file_extent_type(leaf, fi);
6699 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
6700 extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
6701 (btrfs_file_extent_disk_bytenr(leaf, fi) !=
6702 extent_key->objectid)) {
6703 path->slots[0]++;
6704 ret = 1;
6705 goto next;
6706 }
6707
6708 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6709 ext_offset = btrfs_file_extent_offset(leaf, fi);
6710
6711 if (search_end == (u64)-1) {
6712 search_end = key.offset - ext_offset +
6713 btrfs_file_extent_ram_bytes(leaf, fi);
6714 }
6715
6716 if (!extent_locked) {
6717 lock_start = key.offset;
6718 lock_end = lock_start + num_bytes - 1;
6719 } else {
6720 if (lock_start > key.offset ||
6721 lock_end + 1 < key.offset + num_bytes) {
6722 unlock_extent(&BTRFS_I(inode)->io_tree,
6723 lock_start, lock_end, GFP_NOFS);
6724 extent_locked = 0;
6725 }
6726 }
6727
6728 if (!inode) {
6729 btrfs_release_path(root, path);
6730
6731 inode = btrfs_iget_locked(root->fs_info->sb,
6732 key.objectid, root);
6733 if (inode->i_state & I_NEW) {
6734 BTRFS_I(inode)->root = root;
6735 BTRFS_I(inode)->location.objectid =
6736 key.objectid;
6737 BTRFS_I(inode)->location.type =
6738 BTRFS_INODE_ITEM_KEY;
6739 BTRFS_I(inode)->location.offset = 0;
6740 btrfs_read_locked_inode(inode);
6741 unlock_new_inode(inode);
6742 }
6743 /*
6744 * some code call btrfs_commit_transaction while
6745 * holding the i_mutex, so we can't use mutex_lock
6746 * here.
6747 */
6748 if (is_bad_inode(inode) ||
6749 !mutex_trylock(&inode->i_mutex)) {
6750 iput(inode);
6751 inode = NULL;
6752 key.offset = (u64)-1;
6753 goto skip;
6754 }
6755 }
6756
6757 if (!extent_locked) {
6758 struct btrfs_ordered_extent *ordered;
6759
6760 btrfs_release_path(root, path);
6761
6762 lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6763 lock_end, GFP_NOFS);
6764 ordered = btrfs_lookup_first_ordered_extent(inode,
6765 lock_end);
6766 if (ordered &&
6767 ordered->file_offset <= lock_end &&
6768 ordered->file_offset + ordered->len > lock_start) {
6769 unlock_extent(&BTRFS_I(inode)->io_tree,
6770 lock_start, lock_end, GFP_NOFS);
6771 btrfs_start_ordered_extent(inode, ordered, 1);
6772 btrfs_put_ordered_extent(ordered);
6773 key.offset += num_bytes;
6774 goto skip;
6775 }
6776 if (ordered)
6777 btrfs_put_ordered_extent(ordered);
6778
6779 extent_locked = 1;
6780 continue;
6781 }
6782
6783 if (nr_extents == 1) {
6784 /* update extent pointer in place */
6785 btrfs_set_file_extent_disk_bytenr(leaf, fi,
6786 new_extents[0].disk_bytenr);
6787 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
6788 new_extents[0].disk_num_bytes);
6789 btrfs_mark_buffer_dirty(leaf);
6790
6791 btrfs_drop_extent_cache(inode, key.offset,
6792 key.offset + num_bytes - 1, 0);
6793
6794 ret = btrfs_inc_extent_ref(trans, root,
6795 new_extents[0].disk_bytenr,
6796 new_extents[0].disk_num_bytes,
6797 leaf->start,
6798 root->root_key.objectid,
6799 trans->transid,
6800 key.objectid);
6801 BUG_ON(ret);
6802
6803 ret = btrfs_free_extent(trans, root,
6804 extent_key->objectid,
6805 extent_key->offset,
6806 leaf->start,
6807 btrfs_header_owner(leaf),
6808 btrfs_header_generation(leaf),
6809 key.objectid, 0);
6810 BUG_ON(ret);
6811
6812 btrfs_release_path(root, path);
6813 key.offset += num_bytes;
6814 } else {
6815 BUG_ON(1);
6816#if 0
6817 u64 alloc_hint;
6818 u64 extent_len;
6819 int i;
6820 /*
6821 * drop old extent pointer at first, then insert the
6822 * new pointers one bye one
6823 */
6824 btrfs_release_path(root, path);
6825 ret = btrfs_drop_extents(trans, root, inode, key.offset,
6826 key.offset + num_bytes,
6827 key.offset, &alloc_hint);
6828 BUG_ON(ret);
6829
6830 for (i = 0; i < nr_extents; i++) {
6831 if (ext_offset >= new_extents[i].num_bytes) {
6832 ext_offset -= new_extents[i].num_bytes;
6833 continue;
6834 }
6835 extent_len = min(new_extents[i].num_bytes -
6836 ext_offset, num_bytes);
6837
6838 ret = btrfs_insert_empty_item(trans, root,
6839 path, &key,
6840 sizeof(*fi));
6841 BUG_ON(ret);
6842
6843 leaf = path->nodes[0];
6844 fi = btrfs_item_ptr(leaf, path->slots[0],
6845 struct btrfs_file_extent_item);
6846 btrfs_set_file_extent_generation(leaf, fi,
6847 trans->transid);
6848 btrfs_set_file_extent_type(leaf, fi,
6849 BTRFS_FILE_EXTENT_REG);
6850 btrfs_set_file_extent_disk_bytenr(leaf, fi,
6851 new_extents[i].disk_bytenr);
6852 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
6853 new_extents[i].disk_num_bytes);
6854 btrfs_set_file_extent_ram_bytes(leaf, fi,
6855 new_extents[i].ram_bytes);
6856
6857 btrfs_set_file_extent_compression(leaf, fi,
6858 new_extents[i].compression);
6859 btrfs_set_file_extent_encryption(leaf, fi,
6860 new_extents[i].encryption);
6861 btrfs_set_file_extent_other_encoding(leaf, fi,
6862 new_extents[i].other_encoding);
6863
6864 btrfs_set_file_extent_num_bytes(leaf, fi,
6865 extent_len);
6866 ext_offset += new_extents[i].offset;
6867 btrfs_set_file_extent_offset(leaf, fi,
6868 ext_offset);
6869 btrfs_mark_buffer_dirty(leaf);
6870
6871 btrfs_drop_extent_cache(inode, key.offset,
6872 key.offset + extent_len - 1, 0);
6873
6874 ret = btrfs_inc_extent_ref(trans, root,
6875 new_extents[i].disk_bytenr,
6876 new_extents[i].disk_num_bytes,
6877 leaf->start,
6878 root->root_key.objectid,
6879 trans->transid, key.objectid);
6880 BUG_ON(ret);
6881 btrfs_release_path(root, path);
6882
6883 inode_add_bytes(inode, extent_len);
6884
6885 ext_offset = 0;
6886 num_bytes -= extent_len;
6887 key.offset += extent_len;
6888
6889 if (num_bytes == 0)
6890 break;
6891 }
6892 BUG_ON(i >= nr_extents);
6893#endif
6894 }
6895
6896 if (extent_locked) {
6897 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6898 lock_end, GFP_NOFS);
6899 extent_locked = 0;
6900 }
6901skip:
6902 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
6903 key.offset >= search_end)
6904 break;
6905
6906 cond_resched();
6907 }
6908 ret = 0;
6909out:
6910 btrfs_release_path(root, path);
6911 if (inode) {
6912 mutex_unlock(&inode->i_mutex);
6913 if (extent_locked) {
6914 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
6915 lock_end, GFP_NOFS);
6916 }
6917 iput(inode);
6918 }
6919 return ret;
6920}
6921
6922int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
6923 struct btrfs_root *root,
6924 struct extent_buffer *buf, u64 orig_start)
6925{
6926 int level;
6927 int ret;
6928
6929 BUG_ON(btrfs_header_generation(buf) != trans->transid);
6930 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6931
6932 level = btrfs_header_level(buf);
6933 if (level == 0) {
6934 struct btrfs_leaf_ref *ref;
6935 struct btrfs_leaf_ref *orig_ref;
6936
6937 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
6938 if (!orig_ref)
6939 return -ENOENT;
6940
6941 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
6942 if (!ref) {
6943 btrfs_free_leaf_ref(root, orig_ref);
6944 return -ENOMEM;
6945 }
6946
6947 ref->nritems = orig_ref->nritems;
6948 memcpy(ref->extents, orig_ref->extents,
6949 sizeof(ref->extents[0]) * ref->nritems);
6950
6951 btrfs_free_leaf_ref(root, orig_ref);
6952
6953 ref->root_gen = trans->transid;
6954 ref->bytenr = buf->start;
6955 ref->owner = btrfs_header_owner(buf);
6956 ref->generation = btrfs_header_generation(buf);
6957
6958 ret = btrfs_add_leaf_ref(root, ref, 0);
6959 WARN_ON(ret);
6960 btrfs_free_leaf_ref(root, ref);
6961 }
6962 return 0;
6963}
6964
6965static noinline int invalidate_extent_cache(struct btrfs_root *root,
6966 struct extent_buffer *leaf,
6967 struct btrfs_block_group_cache *group,
6968 struct btrfs_root *target_root)
6969{
6970 struct btrfs_key key;
6971 struct inode *inode = NULL;
6972 struct btrfs_file_extent_item *fi;
6973 struct extent_state *cached_state = NULL;
6974 u64 num_bytes;
6975 u64 skip_objectid = 0;
6976 u32 nritems;
6977 u32 i;
6978
6979 nritems = btrfs_header_nritems(leaf);
6980 for (i = 0; i < nritems; i++) {
6981 btrfs_item_key_to_cpu(leaf, &key, i);
6982 if (key.objectid == skip_objectid ||
6983 key.type != BTRFS_EXTENT_DATA_KEY)
6984 continue;
6985 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
6986 if (btrfs_file_extent_type(leaf, fi) ==
6987 BTRFS_FILE_EXTENT_INLINE)
6988 continue;
6989 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
6990 continue;
6991 if (!inode || inode->i_ino != key.objectid) {
6992 iput(inode);
6993 inode = btrfs_ilookup(target_root->fs_info->sb,
6994 key.objectid, target_root, 1);
6995 }
6996 if (!inode) {
6997 skip_objectid = key.objectid;
6998 continue;
6999 }
7000 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7001
7002 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7003 key.offset + num_bytes - 1, 0, &cached_state,
7004 GFP_NOFS);
7005 btrfs_drop_extent_cache(inode, key.offset,
7006 key.offset + num_bytes - 1, 1);
7007 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7008 key.offset + num_bytes - 1, &cached_state,
7009 GFP_NOFS);
7010 cond_resched();
7011 }
7012 iput(inode);
7013 return 0;
7014}
7015
7016static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7017 struct btrfs_root *root,
7018 struct extent_buffer *leaf,
7019 struct btrfs_block_group_cache *group,
7020 struct inode *reloc_inode)
7021{
7022 struct btrfs_key key;
7023 struct btrfs_key extent_key;
7024 struct btrfs_file_extent_item *fi;
7025 struct btrfs_leaf_ref *ref;
7026 struct disk_extent *new_extent;
7027 u64 bytenr;
7028 u64 num_bytes;
7029 u32 nritems;
7030 u32 i;
7031 int ext_index;
7032 int nr_extent;
7033 int ret;
7034
7035 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7036 BUG_ON(!new_extent);
7037
7038 ref = btrfs_lookup_leaf_ref(root, leaf->start);
7039 BUG_ON(!ref);
7040
7041 ext_index = -1;
7042 nritems = btrfs_header_nritems(leaf);
7043 for (i = 0; i < nritems; i++) {
7044 btrfs_item_key_to_cpu(leaf, &key, i);
7045 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7046 continue;
7047 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7048 if (btrfs_file_extent_type(leaf, fi) ==
7049 BTRFS_FILE_EXTENT_INLINE)
7050 continue;
7051 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7052 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7053 if (bytenr == 0)
7054 continue;
7055
7056 ext_index++;
7057 if (bytenr >= group->key.objectid + group->key.offset ||
7058 bytenr + num_bytes <= group->key.objectid)
7059 continue;
7060
7061 extent_key.objectid = bytenr;
7062 extent_key.offset = num_bytes;
7063 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7064 nr_extent = 1;
7065 ret = get_new_locations(reloc_inode, &extent_key,
7066 group->key.objectid, 1,
7067 &new_extent, &nr_extent);
7068 if (ret > 0)
7069 continue;
7070 BUG_ON(ret < 0);
7071
7072 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7073 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7074 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7075 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7076
7077 btrfs_set_file_extent_disk_bytenr(leaf, fi,
7078 new_extent->disk_bytenr);
7079 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7080 new_extent->disk_num_bytes);
7081 btrfs_mark_buffer_dirty(leaf);
7082
7083 ret = btrfs_inc_extent_ref(trans, root,
7084 new_extent->disk_bytenr,
7085 new_extent->disk_num_bytes,
7086 leaf->start,
7087 root->root_key.objectid,
7088 trans->transid, key.objectid);
7089 BUG_ON(ret);
7090
7091 ret = btrfs_free_extent(trans, root,
7092 bytenr, num_bytes, leaf->start,
7093 btrfs_header_owner(leaf),
7094 btrfs_header_generation(leaf),
7095 key.objectid, 0);
7096 BUG_ON(ret);
7097 cond_resched();
7098 }
7099 kfree(new_extent);
7100 BUG_ON(ext_index + 1 != ref->nritems);
7101 btrfs_free_leaf_ref(root, ref);
7102 return 0;
7103}
7104
7105int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7106 struct btrfs_root *root)
7107{
7108 struct btrfs_root *reloc_root;
7109 int ret;
7110
7111 if (root->reloc_root) {
7112 reloc_root = root->reloc_root;
7113 root->reloc_root = NULL;
7114 list_add(&reloc_root->dead_list,
7115 &root->fs_info->dead_reloc_roots);
7116
7117 btrfs_set_root_bytenr(&reloc_root->root_item,
7118 reloc_root->node->start);
7119 btrfs_set_root_level(&root->root_item,
7120 btrfs_header_level(reloc_root->node));
7121 memset(&reloc_root->root_item.drop_progress, 0,
7122 sizeof(struct btrfs_disk_key));
7123 reloc_root->root_item.drop_level = 0;
7124
7125 ret = btrfs_update_root(trans, root->fs_info->tree_root,
7126 &reloc_root->root_key,
7127 &reloc_root->root_item);
7128 BUG_ON(ret);
7129 }
7130 return 0;
7131}
7132
7133int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7134{
7135 struct btrfs_trans_handle *trans;
7136 struct btrfs_root *reloc_root;
7137 struct btrfs_root *prev_root = NULL;
7138 struct list_head dead_roots;
7139 int ret;
7140 unsigned long nr;
7141
7142 INIT_LIST_HEAD(&dead_roots);
7143 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7144
7145 while (!list_empty(&dead_roots)) {
7146 reloc_root = list_entry(dead_roots.prev,
7147 struct btrfs_root, dead_list);
7148 list_del_init(&reloc_root->dead_list);
7149
7150 BUG_ON(reloc_root->commit_root != NULL);
7151 while (1) {
7152 trans = btrfs_join_transaction(root, 1);
7153 BUG_ON(!trans);
7154
7155 mutex_lock(&root->fs_info->drop_mutex);
7156 ret = btrfs_drop_snapshot(trans, reloc_root);
7157 if (ret != -EAGAIN)
7158 break;
7159 mutex_unlock(&root->fs_info->drop_mutex);
7160
7161 nr = trans->blocks_used;
7162 ret = btrfs_end_transaction(trans, root);
7163 BUG_ON(ret);
7164 btrfs_btree_balance_dirty(root, nr);
7165 }
7166
7167 free_extent_buffer(reloc_root->node);
7168
7169 ret = btrfs_del_root(trans, root->fs_info->tree_root,
7170 &reloc_root->root_key);
7171 BUG_ON(ret);
7172 mutex_unlock(&root->fs_info->drop_mutex);
7173
7174 nr = trans->blocks_used;
7175 ret = btrfs_end_transaction(trans, root);
7176 BUG_ON(ret);
7177 btrfs_btree_balance_dirty(root, nr);
7178
7179 kfree(prev_root);
7180 prev_root = reloc_root;
7181 }
7182 if (prev_root) {
7183 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7184 kfree(prev_root);
7185 }
7186 return 0;
7187}
7188
7189int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7190{
7191 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7192 return 0;
7193}
7194
7195int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7196{
7197 struct btrfs_root *reloc_root;
7198 struct btrfs_trans_handle *trans;
7199 struct btrfs_key location;
7200 int found;
7201 int ret;
7202
7203 mutex_lock(&root->fs_info->tree_reloc_mutex);
7204 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7205 BUG_ON(ret);
7206 found = !list_empty(&root->fs_info->dead_reloc_roots);
7207 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7208
7209 if (found) {
7210 trans = btrfs_start_transaction(root, 1);
7211 BUG_ON(!trans);
7212 ret = btrfs_commit_transaction(trans, root);
7213 BUG_ON(ret);
7214 }
7215
7216 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7217 location.offset = (u64)-1;
7218 location.type = BTRFS_ROOT_ITEM_KEY;
7219
7220 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7221 BUG_ON(!reloc_root);
7222 btrfs_orphan_cleanup(reloc_root);
7223 return 0;
7224}
7225
7226static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7227 struct btrfs_root *root)
7228{
7229 struct btrfs_root *reloc_root;
7230 struct extent_buffer *eb;
7231 struct btrfs_root_item *root_item;
7232 struct btrfs_key root_key;
7233 int ret;
7234
7235 BUG_ON(!root->ref_cows);
7236 if (root->reloc_root)
7237 return 0;
7238
7239 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7240 BUG_ON(!root_item);
7241
7242 ret = btrfs_copy_root(trans, root, root->commit_root,
7243 &eb, BTRFS_TREE_RELOC_OBJECTID);
7244 BUG_ON(ret);
7245
7246 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7247 root_key.offset = root->root_key.objectid;
7248 root_key.type = BTRFS_ROOT_ITEM_KEY;
7249
7250 memcpy(root_item, &root->root_item, sizeof(root_item));
7251 btrfs_set_root_refs(root_item, 0);
7252 btrfs_set_root_bytenr(root_item, eb->start);
7253 btrfs_set_root_level(root_item, btrfs_header_level(eb));
7254 btrfs_set_root_generation(root_item, trans->transid);
7255
7256 btrfs_tree_unlock(eb);
7257 free_extent_buffer(eb);
7258
7259 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7260 &root_key, root_item);
7261 BUG_ON(ret);
7262 kfree(root_item);
7263
7264 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7265 &root_key);
7266 BUG_ON(!reloc_root);
7267 reloc_root->last_trans = trans->transid;
7268 reloc_root->commit_root = NULL;
7269 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7270
7271 root->reloc_root = reloc_root;
7272 return 0;
7273}
7274
7275/*
7276 * Core function of space balance.
7277 *
7278 * The idea is using reloc trees to relocate tree blocks in reference
7279 * counted roots. There is one reloc tree for each subvol, and all
7280 * reloc trees share same root key objectid. Reloc trees are snapshots
7281 * of the latest committed roots of subvols (root->commit_root).
7282 *
7283 * To relocate a tree block referenced by a subvol, there are two steps.
7284 * COW the block through subvol's reloc tree, then update block pointer
7285 * in the subvol to point to the new block. Since all reloc trees share
7286 * same root key objectid, doing special handing for tree blocks owned
7287 * by them is easy. Once a tree block has been COWed in one reloc tree,
7288 * we can use the resulting new block directly when the same block is
7289 * required to COW again through other reloc trees. By this way, relocated
7290 * tree blocks are shared between reloc trees, so they are also shared
7291 * between subvols.
7292 */
7293static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7294 struct btrfs_root *root,
7295 struct btrfs_path *path,
7296 struct btrfs_key *first_key,
7297 struct btrfs_ref_path *ref_path,
7298 struct btrfs_block_group_cache *group,
7299 struct inode *reloc_inode)
7300{
7301 struct btrfs_root *reloc_root;
7302 struct extent_buffer *eb = NULL;
7303 struct btrfs_key *keys;
7304 u64 *nodes;
7305 int level;
7306 int shared_level;
7307 int lowest_level = 0;
7308 int ret;
7309
7310 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7311 lowest_level = ref_path->owner_objectid;
7312
7313 if (!root->ref_cows) {
7314 path->lowest_level = lowest_level;
7315 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7316 BUG_ON(ret < 0);
7317 path->lowest_level = 0;
7318 btrfs_release_path(root, path);
7319 return 0;
7320 }
7321
7322 mutex_lock(&root->fs_info->tree_reloc_mutex);
7323 ret = init_reloc_tree(trans, root);
7324 BUG_ON(ret);
7325 reloc_root = root->reloc_root;
7326
7327 shared_level = ref_path->shared_level;
7328 ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7329
7330 keys = ref_path->node_keys;
7331 nodes = ref_path->new_nodes;
7332 memset(&keys[shared_level + 1], 0,
7333 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7334 memset(&nodes[shared_level + 1], 0,
7335 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7336
7337 if (nodes[lowest_level] == 0) {
7338 path->lowest_level = lowest_level;
7339 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7340 0, 1);
7341 BUG_ON(ret);
7342 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7343 eb = path->nodes[level];
7344 if (!eb || eb == reloc_root->node)
7345 break;
7346 nodes[level] = eb->start;
7347 if (level == 0)
7348 btrfs_item_key_to_cpu(eb, &keys[level], 0);
7349 else
7350 btrfs_node_key_to_cpu(eb, &keys[level], 0);
7351 }
7352 if (nodes[0] &&
7353 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7354 eb = path->nodes[0];
7355 ret = replace_extents_in_leaf(trans, reloc_root, eb,
7356 group, reloc_inode);
7357 BUG_ON(ret);
7358 }
7359 btrfs_release_path(reloc_root, path);
7360 } else {
7361 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7362 lowest_level);
7363 BUG_ON(ret);
7364 }
7365
7366 /*
7367 * replace tree blocks in the fs tree with tree blocks in
7368 * the reloc tree.
7369 */
7370 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7371 BUG_ON(ret < 0);
7372
7373 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7374 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7375 0, 0);
7376 BUG_ON(ret);
7377 extent_buffer_get(path->nodes[0]);
7378 eb = path->nodes[0];
7379 btrfs_release_path(reloc_root, path);
7380 ret = invalidate_extent_cache(reloc_root, eb, group, root);
7381 BUG_ON(ret);
7382 free_extent_buffer(eb);
7383 }
7384
7385 mutex_unlock(&root->fs_info->tree_reloc_mutex);
7386 path->lowest_level = 0;
7387 return 0;
7388}
7389
7390static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7391 struct btrfs_root *root,
7392 struct btrfs_path *path,
7393 struct btrfs_key *first_key,
7394 struct btrfs_ref_path *ref_path)
7395{
7396 int ret;
7397
7398 ret = relocate_one_path(trans, root, path, first_key,
7399 ref_path, NULL, NULL);
7400 BUG_ON(ret);
7401
7402 return 0;
7403}
7404
7405static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7406 struct btrfs_root *extent_root,
7407 struct btrfs_path *path,
7408 struct btrfs_key *extent_key)
7409{
7410 int ret;
7411
7412 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7413 if (ret)
7414 goto out;
7415 ret = btrfs_del_item(trans, extent_root, path);
7416out:
7417 btrfs_release_path(extent_root, path);
7418 return ret;
7419}
7420
7421static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7422 struct btrfs_ref_path *ref_path)
7423{
7424 struct btrfs_key root_key;
7425
7426 root_key.objectid = ref_path->root_objectid;
7427 root_key.type = BTRFS_ROOT_ITEM_KEY;
7428 if (is_cowonly_root(ref_path->root_objectid))
7429 root_key.offset = 0;
7430 else
7431 root_key.offset = (u64)-1;
7432
7433 return btrfs_read_fs_root_no_name(fs_info, &root_key);
7434}
7435
7436static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7437 struct btrfs_path *path,
7438 struct btrfs_key *extent_key,
7439 struct btrfs_block_group_cache *group,
7440 struct inode *reloc_inode, int pass)
7441{
7442 struct btrfs_trans_handle *trans;
7443 struct btrfs_root *found_root;
7444 struct btrfs_ref_path *ref_path = NULL;
7445 struct disk_extent *new_extents = NULL;
7446 int nr_extents = 0;
7447 int loops;
7448 int ret;
7449 int level;
7450 struct btrfs_key first_key;
7451 u64 prev_block = 0;
7452
7453
7454 trans = btrfs_start_transaction(extent_root, 1);
7455 BUG_ON(!trans);
7456
7457 if (extent_key->objectid == 0) {
7458 ret = del_extent_zero(trans, extent_root, path, extent_key);
7459 goto out;
7460 }
7461
7462 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7463 if (!ref_path) {
7464 ret = -ENOMEM;
7465 goto out;
7466 }
7467
7468 for (loops = 0; ; loops++) {
7469 if (loops == 0) {
7470 ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7471 extent_key->objectid);
7472 } else {
7473 ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7474 }
7475 if (ret < 0)
7476 goto out;
7477 if (ret > 0)
7478 break;
7479
7480 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7481 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7482 continue;
7483
7484 found_root = read_ref_root(extent_root->fs_info, ref_path);
7485 BUG_ON(!found_root);
7486 /*
7487 * for reference counted tree, only process reference paths
7488 * rooted at the latest committed root.
7489 */
7490 if (found_root->ref_cows &&
7491 ref_path->root_generation != found_root->root_key.offset)
7492 continue;
7493
7494 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7495 if (pass == 0) {
7496 /*
7497 * copy data extents to new locations
7498 */
7499 u64 group_start = group->key.objectid;
7500 ret = relocate_data_extent(reloc_inode,
7501 extent_key,
7502 group_start);
7503 if (ret < 0)
7504 goto out;
7505 break;
7506 }
7507 level = 0;
7508 } else {
7509 level = ref_path->owner_objectid;
7510 }
7511
7512 if (prev_block != ref_path->nodes[level]) {
7513 struct extent_buffer *eb;
7514 u64 block_start = ref_path->nodes[level];
7515 u64 block_size = btrfs_level_size(found_root, level);
7516
7517 eb = read_tree_block(found_root, block_start,
7518 block_size, 0);
7519 btrfs_tree_lock(eb);
7520 BUG_ON(level != btrfs_header_level(eb));
7521
7522 if (level == 0)
7523 btrfs_item_key_to_cpu(eb, &first_key, 0);
7524 else
7525 btrfs_node_key_to_cpu(eb, &first_key, 0);
7526
7527 btrfs_tree_unlock(eb);
7528 free_extent_buffer(eb);
7529 prev_block = block_start;
7530 }
7531
7532 mutex_lock(&extent_root->fs_info->trans_mutex);
7533 btrfs_record_root_in_trans(found_root);
7534 mutex_unlock(&extent_root->fs_info->trans_mutex);
7535 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7536 /*
7537 * try to update data extent references while
7538 * keeping metadata shared between snapshots.
7539 */
7540 if (pass == 1) {
7541 ret = relocate_one_path(trans, found_root,
7542 path, &first_key, ref_path,
7543 group, reloc_inode);
7544 if (ret < 0)
7545 goto out;
7546 continue;
7547 }
7548 /*
7549 * use fallback method to process the remaining
7550 * references.
7551 */
7552 if (!new_extents) {
7553 u64 group_start = group->key.objectid;
7554 new_extents = kmalloc(sizeof(*new_extents),
7555 GFP_NOFS);
7556 nr_extents = 1;
7557 ret = get_new_locations(reloc_inode,
7558 extent_key,
7559 group_start, 1,
7560 &new_extents,
7561 &nr_extents);
7562 if (ret)
7563 goto out;
7564 }
7565 ret = replace_one_extent(trans, found_root,
7566 path, extent_key,
7567 &first_key, ref_path,
7568 new_extents, nr_extents);
7569 } else {
7570 ret = relocate_tree_block(trans, found_root, path,
7571 &first_key, ref_path);
7572 }
7573 if (ret < 0)
7574 goto out;
7575 }
7576 ret = 0;
7577out:
7578 btrfs_end_transaction(trans, extent_root);
7579 kfree(new_extents);
7580 kfree(ref_path);
7581 return ret;
7582}
7583#endif
7584
7585static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6482static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7586{ 6483{
7587 u64 num_devices; 6484 u64 num_devices;
7588 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6485 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7589 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6486 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7590 6487
7591 num_devices = root->fs_info->fs_devices->rw_devices; 6488 /*
6489 * we add in the count of missing devices because we want
6490 * to make sure that any RAID levels on a degraded FS
6491 * continue to be honored.
6492 */
6493 num_devices = root->fs_info->fs_devices->rw_devices +
6494 root->fs_info->fs_devices->missing_devices;
6495
7592 if (num_devices == 1) { 6496 if (num_devices == 1) {
7593 stripped |= BTRFS_BLOCK_GROUP_DUP; 6497 stripped |= BTRFS_BLOCK_GROUP_DUP;
7594 stripped = flags & ~stripped; 6498 stripped = flags & ~stripped;
@@ -7636,13 +6540,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
7636 6540
7637 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6541 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
7638 sinfo->bytes_may_use + sinfo->bytes_readonly + 6542 sinfo->bytes_may_use + sinfo->bytes_readonly +
7639 cache->reserved_pinned + num_bytes < sinfo->total_bytes) { 6543 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
7640 sinfo->bytes_readonly += num_bytes; 6544 sinfo->bytes_readonly += num_bytes;
7641 sinfo->bytes_reserved += cache->reserved_pinned; 6545 sinfo->bytes_reserved += cache->reserved_pinned;
7642 cache->reserved_pinned = 0; 6546 cache->reserved_pinned = 0;
7643 cache->ro = 1; 6547 cache->ro = 1;
7644 ret = 0; 6548 ret = 0;
7645 } 6549 }
6550
7646 spin_unlock(&cache->lock); 6551 spin_unlock(&cache->lock);
7647 spin_unlock(&sinfo->lock); 6552 spin_unlock(&sinfo->lock);
7648 return ret; 6553 return ret;
@@ -7658,18 +6563,20 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
7658 6563
7659 BUG_ON(cache->ro); 6564 BUG_ON(cache->ro);
7660 6565
7661 trans = btrfs_join_transaction(root, 1); 6566 trans = btrfs_join_transaction(root);
7662 BUG_ON(IS_ERR(trans)); 6567 BUG_ON(IS_ERR(trans));
7663 6568
7664 alloc_flags = update_block_group_flags(root, cache->flags); 6569 alloc_flags = update_block_group_flags(root, cache->flags);
7665 if (alloc_flags != cache->flags) 6570 if (alloc_flags != cache->flags)
7666 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 6571 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
6572 CHUNK_ALLOC_FORCE);
7667 6573
7668 ret = set_block_group_ro(cache); 6574 ret = set_block_group_ro(cache);
7669 if (!ret) 6575 if (!ret)
7670 goto out; 6576 goto out;
7671 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6577 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
7672 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 6578 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
6579 CHUNK_ALLOC_FORCE);
7673 if (ret < 0) 6580 if (ret < 0)
7674 goto out; 6581 goto out;
7675 ret = set_block_group_ro(cache); 6582 ret = set_block_group_ro(cache);
@@ -7678,6 +6585,70 @@ out:
7678 return ret; 6585 return ret;
7679} 6586}
7680 6587
6588int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
6589 struct btrfs_root *root, u64 type)
6590{
6591 u64 alloc_flags = get_alloc_profile(root, type);
6592 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
6593 CHUNK_ALLOC_FORCE);
6594}
6595
6596/*
6597 * helper to account the unused space of all the readonly block group in the
6598 * list. takes mirrors into account.
6599 */
6600static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
6601{
6602 struct btrfs_block_group_cache *block_group;
6603 u64 free_bytes = 0;
6604 int factor;
6605
6606 list_for_each_entry(block_group, groups_list, list) {
6607 spin_lock(&block_group->lock);
6608
6609 if (!block_group->ro) {
6610 spin_unlock(&block_group->lock);
6611 continue;
6612 }
6613
6614 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
6615 BTRFS_BLOCK_GROUP_RAID10 |
6616 BTRFS_BLOCK_GROUP_DUP))
6617 factor = 2;
6618 else
6619 factor = 1;
6620
6621 free_bytes += (block_group->key.offset -
6622 btrfs_block_group_used(&block_group->item)) *
6623 factor;
6624
6625 spin_unlock(&block_group->lock);
6626 }
6627
6628 return free_bytes;
6629}
6630
6631/*
6632 * helper to account the unused space of all the readonly block group in the
6633 * space_info. takes mirrors into account.
6634 */
6635u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
6636{
6637 int i;
6638 u64 free_bytes = 0;
6639
6640 spin_lock(&sinfo->lock);
6641
6642 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
6643 if (!list_empty(&sinfo->block_groups[i]))
6644 free_bytes += __btrfs_get_ro_block_group_free_space(
6645 &sinfo->block_groups[i]);
6646
6647 spin_unlock(&sinfo->lock);
6648
6649 return free_bytes;
6650}
6651
7681int btrfs_set_block_group_rw(struct btrfs_root *root, 6652int btrfs_set_block_group_rw(struct btrfs_root *root,
7682 struct btrfs_block_group_cache *cache) 6653 struct btrfs_block_group_cache *cache)
7683{ 6654{
@@ -7758,7 +6729,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7758 mutex_lock(&root->fs_info->chunk_mutex); 6729 mutex_lock(&root->fs_info->chunk_mutex);
7759 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6730 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
7760 u64 min_free = btrfs_block_group_used(&block_group->item); 6731 u64 min_free = btrfs_block_group_used(&block_group->item);
7761 u64 dev_offset, max_avail; 6732 u64 dev_offset;
7762 6733
7763 /* 6734 /*
7764 * check to make sure we can actually find a chunk with enough 6735 * check to make sure we can actually find a chunk with enough
@@ -7766,7 +6737,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7766 */ 6737 */
7767 if (device->total_bytes > device->bytes_used + min_free) { 6738 if (device->total_bytes > device->bytes_used + min_free) {
7768 ret = find_free_dev_extent(NULL, device, min_free, 6739 ret = find_free_dev_extent(NULL, device, min_free,
7769 &dev_offset, &max_avail); 6740 &dev_offset, NULL);
7770 if (!ret) 6741 if (!ret)
7771 break; 6742 break;
7772 ret = -1; 6743 ret = -1;
@@ -7814,6 +6785,40 @@ out:
7814 return ret; 6785 return ret;
7815} 6786}
7816 6787
6788void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
6789{
6790 struct btrfs_block_group_cache *block_group;
6791 u64 last = 0;
6792
6793 while (1) {
6794 struct inode *inode;
6795
6796 block_group = btrfs_lookup_first_block_group(info, last);
6797 while (block_group) {
6798 spin_lock(&block_group->lock);
6799 if (block_group->iref)
6800 break;
6801 spin_unlock(&block_group->lock);
6802 block_group = next_block_group(info->tree_root,
6803 block_group);
6804 }
6805 if (!block_group) {
6806 if (last == 0)
6807 break;
6808 last = 0;
6809 continue;
6810 }
6811
6812 inode = block_group->inode;
6813 block_group->iref = 0;
6814 block_group->inode = NULL;
6815 spin_unlock(&block_group->lock);
6816 iput(inode);
6817 last = block_group->key.objectid + block_group->key.offset;
6818 btrfs_put_block_group(block_group);
6819 }
6820}
6821
7817int btrfs_free_block_groups(struct btrfs_fs_info *info) 6822int btrfs_free_block_groups(struct btrfs_fs_info *info)
7818{ 6823{
7819 struct btrfs_block_group_cache *block_group; 6824 struct btrfs_block_group_cache *block_group;
@@ -7845,6 +6850,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7845 if (block_group->cached == BTRFS_CACHE_STARTED) 6850 if (block_group->cached == BTRFS_CACHE_STARTED)
7846 wait_block_group_cache_done(block_group); 6851 wait_block_group_cache_done(block_group);
7847 6852
6853 /*
6854 * We haven't cached this block group, which means we could
6855 * possibly have excluded extents on this block group.
6856 */
6857 if (block_group->cached == BTRFS_CACHE_NO)
6858 free_excluded_extents(info->extent_root, block_group);
6859
7848 btrfs_remove_free_space_cache(block_group); 6860 btrfs_remove_free_space_cache(block_group);
7849 btrfs_put_block_group(block_group); 6861 btrfs_put_block_group(block_group);
7850 6862
@@ -7897,6 +6909,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7897 struct btrfs_key key; 6909 struct btrfs_key key;
7898 struct btrfs_key found_key; 6910 struct btrfs_key found_key;
7899 struct extent_buffer *leaf; 6911 struct extent_buffer *leaf;
6912 int need_clear = 0;
6913 u64 cache_gen;
7900 6914
7901 root = info->extent_root; 6915 root = info->extent_root;
7902 key.objectid = 0; 6916 key.objectid = 0;
@@ -7905,6 +6919,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7905 path = btrfs_alloc_path(); 6919 path = btrfs_alloc_path();
7906 if (!path) 6920 if (!path)
7907 return -ENOMEM; 6921 return -ENOMEM;
6922 path->reada = 1;
6923
6924 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
6925 if (cache_gen != 0 &&
6926 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
6927 need_clear = 1;
6928 if (btrfs_test_opt(root, CLEAR_CACHE))
6929 need_clear = 1;
6930 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
6931 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
7908 6932
7909 while (1) { 6933 while (1) {
7910 ret = find_first_block_group(root, path, &key); 6934 ret = find_first_block_group(root, path, &key);
@@ -7912,7 +6936,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7912 break; 6936 break;
7913 if (ret != 0) 6937 if (ret != 0)
7914 goto error; 6938 goto error;
7915
7916 leaf = path->nodes[0]; 6939 leaf = path->nodes[0];
7917 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6940 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7918 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6941 cache = kzalloc(sizeof(*cache), GFP_NOFS);
@@ -7920,21 +6943,22 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7920 ret = -ENOMEM; 6943 ret = -ENOMEM;
7921 goto error; 6944 goto error;
7922 } 6945 }
6946 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
6947 GFP_NOFS);
6948 if (!cache->free_space_ctl) {
6949 kfree(cache);
6950 ret = -ENOMEM;
6951 goto error;
6952 }
7923 6953
7924 atomic_set(&cache->count, 1); 6954 atomic_set(&cache->count, 1);
7925 spin_lock_init(&cache->lock); 6955 spin_lock_init(&cache->lock);
7926 spin_lock_init(&cache->tree_lock);
7927 cache->fs_info = info; 6956 cache->fs_info = info;
7928 INIT_LIST_HEAD(&cache->list); 6957 INIT_LIST_HEAD(&cache->list);
7929 INIT_LIST_HEAD(&cache->cluster_list); 6958 INIT_LIST_HEAD(&cache->cluster_list);
7930 6959
7931 /* 6960 if (need_clear)
7932 * we only want to have 32k of ram per block group for keeping 6961 cache->disk_cache_state = BTRFS_DC_CLEAR;
7933 * track of free space, and if we pass 1/2 of that we want to
7934 * start converting things over to using bitmaps
7935 */
7936 cache->extents_thresh = ((1024 * 32) / 2) /
7937 sizeof(struct btrfs_free_space);
7938 6962
7939 read_extent_buffer(leaf, &cache->item, 6963 read_extent_buffer(leaf, &cache->item,
7940 btrfs_item_ptr_offset(leaf, path->slots[0]), 6964 btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -7942,10 +6966,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7942 memcpy(&cache->key, &found_key, sizeof(found_key)); 6966 memcpy(&cache->key, &found_key, sizeof(found_key));
7943 6967
7944 key.objectid = found_key.objectid + found_key.offset; 6968 key.objectid = found_key.objectid + found_key.offset;
7945 btrfs_release_path(root, path); 6969 btrfs_release_path(path);
7946 cache->flags = btrfs_block_group_flags(&cache->item); 6970 cache->flags = btrfs_block_group_flags(&cache->item);
7947 cache->sectorsize = root->sectorsize; 6971 cache->sectorsize = root->sectorsize;
7948 6972
6973 btrfs_init_free_space_ctl(cache);
6974
6975 /*
6976 * We need to exclude the super stripes now so that the space
6977 * info has super bytes accounted for, otherwise we'll think
6978 * we have more space than we actually do.
6979 */
6980 exclude_super_stripes(root, cache);
6981
7949 /* 6982 /*
7950 * check for two cases, either we are full, and therefore 6983 * check for two cases, either we are full, and therefore
7951 * don't need to bother with the caching work since we won't 6984 * don't need to bother with the caching work since we won't
@@ -7954,12 +6987,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7954 * time, particularly in the full case. 6987 * time, particularly in the full case.
7955 */ 6988 */
7956 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6989 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7957 exclude_super_stripes(root, cache);
7958 cache->last_byte_to_unpin = (u64)-1; 6990 cache->last_byte_to_unpin = (u64)-1;
7959 cache->cached = BTRFS_CACHE_FINISHED; 6991 cache->cached = BTRFS_CACHE_FINISHED;
7960 free_excluded_extents(root, cache); 6992 free_excluded_extents(root, cache);
7961 } else if (btrfs_block_group_used(&cache->item) == 0) { 6993 } else if (btrfs_block_group_used(&cache->item) == 0) {
7962 exclude_super_stripes(root, cache);
7963 cache->last_byte_to_unpin = (u64)-1; 6994 cache->last_byte_to_unpin = (u64)-1;
7964 cache->cached = BTRFS_CACHE_FINISHED; 6995 cache->cached = BTRFS_CACHE_FINISHED;
7965 add_new_free_space(cache, root->fs_info, 6996 add_new_free_space(cache, root->fs_info,
@@ -8027,25 +7058,26 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8027 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7058 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8028 if (!cache) 7059 if (!cache)
8029 return -ENOMEM; 7060 return -ENOMEM;
7061 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
7062 GFP_NOFS);
7063 if (!cache->free_space_ctl) {
7064 kfree(cache);
7065 return -ENOMEM;
7066 }
8030 7067
8031 cache->key.objectid = chunk_offset; 7068 cache->key.objectid = chunk_offset;
8032 cache->key.offset = size; 7069 cache->key.offset = size;
8033 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7070 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8034 cache->sectorsize = root->sectorsize; 7071 cache->sectorsize = root->sectorsize;
7072 cache->fs_info = root->fs_info;
8035 7073
8036 /*
8037 * we only want to have 32k of ram per block group for keeping track
8038 * of free space, and if we pass 1/2 of that we want to start
8039 * converting things over to using bitmaps
8040 */
8041 cache->extents_thresh = ((1024 * 32) / 2) /
8042 sizeof(struct btrfs_free_space);
8043 atomic_set(&cache->count, 1); 7074 atomic_set(&cache->count, 1);
8044 spin_lock_init(&cache->lock); 7075 spin_lock_init(&cache->lock);
8045 spin_lock_init(&cache->tree_lock);
8046 INIT_LIST_HEAD(&cache->list); 7076 INIT_LIST_HEAD(&cache->list);
8047 INIT_LIST_HEAD(&cache->cluster_list); 7077 INIT_LIST_HEAD(&cache->cluster_list);
8048 7078
7079 btrfs_init_free_space_ctl(cache);
7080
8049 btrfs_set_block_group_used(&cache->item, bytes_used); 7081 btrfs_set_block_group_used(&cache->item, bytes_used);
8050 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7082 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8051 cache->flags = type; 7083 cache->flags = type;
@@ -8088,8 +7120,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8088 struct btrfs_path *path; 7120 struct btrfs_path *path;
8089 struct btrfs_block_group_cache *block_group; 7121 struct btrfs_block_group_cache *block_group;
8090 struct btrfs_free_cluster *cluster; 7122 struct btrfs_free_cluster *cluster;
7123 struct btrfs_root *tree_root = root->fs_info->tree_root;
8091 struct btrfs_key key; 7124 struct btrfs_key key;
7125 struct inode *inode;
8092 int ret; 7126 int ret;
7127 int factor;
8093 7128
8094 root = root->fs_info->extent_root; 7129 root = root->fs_info->extent_root;
8095 7130
@@ -8097,7 +7132,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group); 7132 BUG_ON(!block_group);
8098 BUG_ON(!block_group->ro); 7133 BUG_ON(!block_group->ro);
8099 7134
7135 /*
7136 * Free the reserved super bytes from this block group before
7137 * remove it.
7138 */
7139 free_excluded_extents(root, block_group);
7140
8100 memcpy(&key, &block_group->key, sizeof(key)); 7141 memcpy(&key, &block_group->key, sizeof(key));
7142 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
7143 BTRFS_BLOCK_GROUP_RAID1 |
7144 BTRFS_BLOCK_GROUP_RAID10))
7145 factor = 2;
7146 else
7147 factor = 1;
8101 7148
8102 /* make sure this block group isn't part of an allocation cluster */ 7149 /* make sure this block group isn't part of an allocation cluster */
8103 cluster = &root->fs_info->data_alloc_cluster; 7150 cluster = &root->fs_info->data_alloc_cluster;
@@ -8117,6 +7164,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8117 path = btrfs_alloc_path(); 7164 path = btrfs_alloc_path();
8118 BUG_ON(!path); 7165 BUG_ON(!path);
8119 7166
7167 inode = lookup_free_space_inode(root, block_group, path);
7168 if (!IS_ERR(inode)) {
7169 btrfs_orphan_add(trans, inode);
7170 clear_nlink(inode);
7171 /* One for the block groups ref */
7172 spin_lock(&block_group->lock);
7173 if (block_group->iref) {
7174 block_group->iref = 0;
7175 block_group->inode = NULL;
7176 spin_unlock(&block_group->lock);
7177 iput(inode);
7178 } else {
7179 spin_unlock(&block_group->lock);
7180 }
7181 /* One for our lookup ref */
7182 iput(inode);
7183 }
7184
7185 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
7186 key.offset = block_group->key.objectid;
7187 key.type = 0;
7188
7189 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
7190 if (ret < 0)
7191 goto out;
7192 if (ret > 0)
7193 btrfs_release_path(path);
7194 if (ret == 0) {
7195 ret = btrfs_del_item(trans, tree_root, path);
7196 if (ret)
7197 goto out;
7198 btrfs_release_path(path);
7199 }
7200
8120 spin_lock(&root->fs_info->block_group_cache_lock); 7201 spin_lock(&root->fs_info->block_group_cache_lock);
8121 rb_erase(&block_group->cache_node, 7202 rb_erase(&block_group->cache_node,
8122 &root->fs_info->block_group_cache_tree); 7203 &root->fs_info->block_group_cache_tree);
@@ -8138,8 +7219,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8138 spin_lock(&block_group->space_info->lock); 7219 spin_lock(&block_group->space_info->lock);
8139 block_group->space_info->total_bytes -= block_group->key.offset; 7220 block_group->space_info->total_bytes -= block_group->key.offset;
8140 block_group->space_info->bytes_readonly -= block_group->key.offset; 7221 block_group->space_info->bytes_readonly -= block_group->key.offset;
7222 block_group->space_info->disk_total -= block_group->key.offset * factor;
8141 spin_unlock(&block_group->space_info->lock); 7223 spin_unlock(&block_group->space_info->lock);
8142 7224
7225 memcpy(&key, &block_group->key, sizeof(key));
7226
8143 btrfs_clear_space_info_full(root->fs_info); 7227 btrfs_clear_space_info_full(root->fs_info);
8144 7228
8145 btrfs_put_block_group(block_group); 7229 btrfs_put_block_group(block_group);
@@ -8156,3 +7240,100 @@ out:
8156 btrfs_free_path(path); 7240 btrfs_free_path(path);
8157 return ret; 7241 return ret;
8158} 7242}
7243
7244int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
7245{
7246 struct btrfs_space_info *space_info;
7247 struct btrfs_super_block *disk_super;
7248 u64 features;
7249 u64 flags;
7250 int mixed = 0;
7251 int ret;
7252
7253 disk_super = &fs_info->super_copy;
7254 if (!btrfs_super_root(disk_super))
7255 return 1;
7256
7257 features = btrfs_super_incompat_flags(disk_super);
7258 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
7259 mixed = 1;
7260
7261 flags = BTRFS_BLOCK_GROUP_SYSTEM;
7262 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7263 if (ret)
7264 goto out;
7265
7266 if (mixed) {
7267 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
7268 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7269 } else {
7270 flags = BTRFS_BLOCK_GROUP_METADATA;
7271 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7272 if (ret)
7273 goto out;
7274
7275 flags = BTRFS_BLOCK_GROUP_DATA;
7276 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
7277 }
7278out:
7279 return ret;
7280}
7281
7282int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
7283{
7284 return unpin_extent_range(root, start, end);
7285}
7286
7287int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
7288 u64 num_bytes, u64 *actual_bytes)
7289{
7290 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
7291}
7292
7293int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
7294{
7295 struct btrfs_fs_info *fs_info = root->fs_info;
7296 struct btrfs_block_group_cache *cache = NULL;
7297 u64 group_trimmed;
7298 u64 start;
7299 u64 end;
7300 u64 trimmed = 0;
7301 int ret = 0;
7302
7303 cache = btrfs_lookup_block_group(fs_info, range->start);
7304
7305 while (cache) {
7306 if (cache->key.objectid >= (range->start + range->len)) {
7307 btrfs_put_block_group(cache);
7308 break;
7309 }
7310
7311 start = max(range->start, cache->key.objectid);
7312 end = min(range->start + range->len,
7313 cache->key.objectid + cache->key.offset);
7314
7315 if (end - start >= range->minlen) {
7316 if (!block_group_cache_done(cache)) {
7317 ret = cache_block_group(cache, NULL, root, 0);
7318 if (!ret)
7319 wait_block_group_cache_done(cache);
7320 }
7321 ret = btrfs_trim_block_group(cache,
7322 &group_trimmed,
7323 start,
7324 end,
7325 range->minlen);
7326
7327 trimmed += group_trimmed;
7328 if (ret) {
7329 btrfs_put_block_group(cache);
7330 break;
7331 }
7332 }
7333
7334 cache = next_block_group(fs_info->tree_root, cache);
7335 }
7336
7337 range->len = trimmed;
7338 return ret;
7339}