aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c519
1 files changed, 415 insertions, 104 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 293da650873f..0a5d796c9f7e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -19,7 +19,7 @@
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/version.h> 22#include <linux/sort.h>
23#include "compat.h" 23#include "compat.h"
24#include "hash.h" 24#include "hash.h"
25#include "crc32c.h" 25#include "crc32c.h"
@@ -30,7 +30,6 @@
30#include "volumes.h" 30#include "volumes.h"
31#include "locking.h" 31#include "locking.h"
32#include "ref-cache.h" 32#include "ref-cache.h"
33#include "compat.h"
34 33
35#define PENDING_EXTENT_INSERT 0 34#define PENDING_EXTENT_INSERT 0
36#define PENDING_EXTENT_DELETE 1 35#define PENDING_EXTENT_DELETE 1
@@ -326,10 +325,8 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
326 u64 flags) 325 u64 flags)
327{ 326{
328 struct list_head *head = &info->space_info; 327 struct list_head *head = &info->space_info;
329 struct list_head *cur;
330 struct btrfs_space_info *found; 328 struct btrfs_space_info *found;
331 list_for_each(cur, head) { 329 list_for_each_entry(found, head, list) {
332 found = list_entry(cur, struct btrfs_space_info, list);
333 if (found->flags == flags) 330 if (found->flags == flags)
334 return found; 331 return found;
335 } 332 }
@@ -1326,8 +1323,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1326int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1323int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1327 struct btrfs_root *root) 1324 struct btrfs_root *root)
1328{ 1325{
1329 finish_current_insert(trans, root->fs_info->extent_root, 1); 1326 u64 start;
1330 del_pending_extents(trans, root->fs_info->extent_root, 1); 1327 u64 end;
1328 int ret;
1329
1330 while(1) {
1331 finish_current_insert(trans, root->fs_info->extent_root, 1);
1332 del_pending_extents(trans, root->fs_info->extent_root, 1);
1333
1334 /* is there more work to do? */
1335 ret = find_first_extent_bit(&root->fs_info->pending_del,
1336 0, &start, &end, EXTENT_WRITEBACK);
1337 if (!ret)
1338 continue;
1339 ret = find_first_extent_bit(&root->fs_info->extent_ins,
1340 0, &start, &end, EXTENT_WRITEBACK);
1341 if (!ret)
1342 continue;
1343 break;
1344 }
1331 return 0; 1345 return 0;
1332} 1346}
1333 1347
@@ -1525,15 +1539,55 @@ out:
1525 return ret; 1539 return ret;
1526} 1540}
1527 1541
1528int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1542/* when a block goes through cow, we update the reference counts of
1529 struct extent_buffer *orig_buf, struct extent_buffer *buf, 1543 * everything that block points to. The internal pointers of the block
1530 u32 *nr_extents) 1544 * can be in just about any order, and it is likely to have clusters of
1545 * things that are close together and clusters of things that are not.
1546 *
1547 * To help reduce the seeks that come with updating all of these reference
1548 * counts, sort them by byte number before actual updates are done.
1549 *
1550 * struct refsort is used to match byte number to slot in the btree block.
1551 * we sort based on the byte number and then use the slot to actually
1552 * find the item.
1553 *
1554 * struct refsort is smaller than strcut btrfs_item and smaller than
1555 * struct btrfs_key_ptr. Since we're currently limited to the page size
1556 * for a btree block, there's no way for a kmalloc of refsorts for a
1557 * single node to be bigger than a page.
1558 */
1559struct refsort {
1560 u64 bytenr;
1561 u32 slot;
1562};
1563
1564/*
1565 * for passing into sort()
1566 */
1567static int refsort_cmp(const void *a_void, const void *b_void)
1568{
1569 const struct refsort *a = a_void;
1570 const struct refsort *b = b_void;
1571
1572 if (a->bytenr < b->bytenr)
1573 return -1;
1574 if (a->bytenr > b->bytenr)
1575 return 1;
1576 return 0;
1577}
1578
1579
1580noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1581 struct btrfs_root *root,
1582 struct extent_buffer *orig_buf,
1583 struct extent_buffer *buf, u32 *nr_extents)
1531{ 1584{
1532 u64 bytenr; 1585 u64 bytenr;
1533 u64 ref_root; 1586 u64 ref_root;
1534 u64 orig_root; 1587 u64 orig_root;
1535 u64 ref_generation; 1588 u64 ref_generation;
1536 u64 orig_generation; 1589 u64 orig_generation;
1590 struct refsort *sorted;
1537 u32 nritems; 1591 u32 nritems;
1538 u32 nr_file_extents = 0; 1592 u32 nr_file_extents = 0;
1539 struct btrfs_key key; 1593 struct btrfs_key key;
@@ -1542,6 +1596,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1542 int level; 1596 int level;
1543 int ret = 0; 1597 int ret = 0;
1544 int faili = 0; 1598 int faili = 0;
1599 int refi = 0;
1600 int slot;
1545 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1601 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1546 u64, u64, u64, u64, u64, u64, u64, u64); 1602 u64, u64, u64, u64, u64, u64, u64, u64);
1547 1603
@@ -1553,6 +1609,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1553 nritems = btrfs_header_nritems(buf); 1609 nritems = btrfs_header_nritems(buf);
1554 level = btrfs_header_level(buf); 1610 level = btrfs_header_level(buf);
1555 1611
1612 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
1613 BUG_ON(!sorted);
1614
1556 if (root->ref_cows) { 1615 if (root->ref_cows) {
1557 process_func = __btrfs_inc_extent_ref; 1616 process_func = __btrfs_inc_extent_ref;
1558 } else { 1617 } else {
@@ -1565,6 +1624,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1565 process_func = __btrfs_update_extent_ref; 1624 process_func = __btrfs_update_extent_ref;
1566 } 1625 }
1567 1626
1627 /*
1628 * we make two passes through the items. In the first pass we
1629 * only record the byte number and slot. Then we sort based on
1630 * byte number and do the actual work based on the sorted results
1631 */
1568 for (i = 0; i < nritems; i++) { 1632 for (i = 0; i < nritems; i++) {
1569 cond_resched(); 1633 cond_resched();
1570 if (level == 0) { 1634 if (level == 0) {
@@ -1581,6 +1645,32 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1581 continue; 1645 continue;
1582 1646
1583 nr_file_extents++; 1647 nr_file_extents++;
1648 sorted[refi].bytenr = bytenr;
1649 sorted[refi].slot = i;
1650 refi++;
1651 } else {
1652 bytenr = btrfs_node_blockptr(buf, i);
1653 sorted[refi].bytenr = bytenr;
1654 sorted[refi].slot = i;
1655 refi++;
1656 }
1657 }
1658 /*
1659 * if refi == 0, we didn't actually put anything into the sorted
1660 * array and we're done
1661 */
1662 if (refi == 0)
1663 goto out;
1664
1665 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1666
1667 for (i = 0; i < refi; i++) {
1668 cond_resched();
1669 slot = sorted[i].slot;
1670 bytenr = sorted[i].bytenr;
1671
1672 if (level == 0) {
1673 btrfs_item_key_to_cpu(buf, &key, slot);
1584 1674
1585 ret = process_func(trans, root, bytenr, 1675 ret = process_func(trans, root, bytenr,
1586 orig_buf->start, buf->start, 1676 orig_buf->start, buf->start,
@@ -1589,25 +1679,25 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1589 key.objectid); 1679 key.objectid);
1590 1680
1591 if (ret) { 1681 if (ret) {
1592 faili = i; 1682 faili = slot;
1593 WARN_ON(1); 1683 WARN_ON(1);
1594 goto fail; 1684 goto fail;
1595 } 1685 }
1596 } else { 1686 } else {
1597 bytenr = btrfs_node_blockptr(buf, i);
1598 ret = process_func(trans, root, bytenr, 1687 ret = process_func(trans, root, bytenr,
1599 orig_buf->start, buf->start, 1688 orig_buf->start, buf->start,
1600 orig_root, ref_root, 1689 orig_root, ref_root,
1601 orig_generation, ref_generation, 1690 orig_generation, ref_generation,
1602 level - 1); 1691 level - 1);
1603 if (ret) { 1692 if (ret) {
1604 faili = i; 1693 faili = slot;
1605 WARN_ON(1); 1694 WARN_ON(1);
1606 goto fail; 1695 goto fail;
1607 } 1696 }
1608 } 1697 }
1609 } 1698 }
1610out: 1699out:
1700 kfree(sorted);
1611 if (nr_extents) { 1701 if (nr_extents) {
1612 if (level == 0) 1702 if (level == 0)
1613 *nr_extents = nr_file_extents; 1703 *nr_extents = nr_file_extents;
@@ -1616,6 +1706,7 @@ out:
1616 } 1706 }
1617 return 0; 1707 return 0;
1618fail: 1708fail:
1709 kfree(sorted);
1619 WARN_ON(1); 1710 WARN_ON(1);
1620 return ret; 1711 return ret;
1621} 1712}
@@ -2137,13 +2228,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
2137 u64 end; 2228 u64 end;
2138 u64 priv; 2229 u64 priv;
2139 u64 search = 0; 2230 u64 search = 0;
2140 u64 skipped = 0;
2141 struct btrfs_fs_info *info = extent_root->fs_info; 2231 struct btrfs_fs_info *info = extent_root->fs_info;
2142 struct btrfs_path *path; 2232 struct btrfs_path *path;
2143 struct pending_extent_op *extent_op, *tmp; 2233 struct pending_extent_op *extent_op, *tmp;
2144 struct list_head insert_list, update_list; 2234 struct list_head insert_list, update_list;
2145 int ret; 2235 int ret;
2146 int num_inserts = 0, max_inserts; 2236 int num_inserts = 0, max_inserts, restart = 0;
2147 2237
2148 path = btrfs_alloc_path(); 2238 path = btrfs_alloc_path();
2149 INIT_LIST_HEAD(&insert_list); 2239 INIT_LIST_HEAD(&insert_list);
@@ -2159,18 +2249,19 @@ again:
2159 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2249 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2160 &end, EXTENT_WRITEBACK); 2250 &end, EXTENT_WRITEBACK);
2161 if (ret) { 2251 if (ret) {
2162 if (skipped && all && !num_inserts) { 2252 if (restart && !num_inserts &&
2163 skipped = 0; 2253 list_empty(&update_list)) {
2254 restart = 0;
2164 search = 0; 2255 search = 0;
2165 continue; 2256 continue;
2166 } 2257 }
2167 mutex_unlock(&info->extent_ins_mutex);
2168 break; 2258 break;
2169 } 2259 }
2170 2260
2171 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2261 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2172 if (!ret) { 2262 if (!ret) {
2173 skipped = 1; 2263 if (all)
2264 restart = 1;
2174 search = end + 1; 2265 search = end + 1;
2175 if (need_resched()) { 2266 if (need_resched()) {
2176 mutex_unlock(&info->extent_ins_mutex); 2267 mutex_unlock(&info->extent_ins_mutex);
@@ -2189,7 +2280,7 @@ again:
2189 list_add_tail(&extent_op->list, &insert_list); 2280 list_add_tail(&extent_op->list, &insert_list);
2190 search = end + 1; 2281 search = end + 1;
2191 if (num_inserts == max_inserts) { 2282 if (num_inserts == max_inserts) {
2192 mutex_unlock(&info->extent_ins_mutex); 2283 restart = 1;
2193 break; 2284 break;
2194 } 2285 }
2195 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2286 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
@@ -2205,7 +2296,6 @@ again:
2205 * somebody marked this thing for deletion then just unlock it and be 2296 * somebody marked this thing for deletion then just unlock it and be
2206 * done, the free_extents will handle it 2297 * done, the free_extents will handle it
2207 */ 2298 */
2208 mutex_lock(&info->extent_ins_mutex);
2209 list_for_each_entry_safe(extent_op, tmp, &update_list, list) { 2299 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2210 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2300 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2211 extent_op->bytenr + extent_op->num_bytes - 1, 2301 extent_op->bytenr + extent_op->num_bytes - 1,
@@ -2227,6 +2317,10 @@ again:
2227 if (!list_empty(&update_list)) { 2317 if (!list_empty(&update_list)) {
2228 ret = update_backrefs(trans, extent_root, path, &update_list); 2318 ret = update_backrefs(trans, extent_root, path, &update_list);
2229 BUG_ON(ret); 2319 BUG_ON(ret);
2320
2321 /* we may have COW'ed new blocks, so lets start over */
2322 if (all)
2323 restart = 1;
2230 } 2324 }
2231 2325
2232 /* 2326 /*
@@ -2234,9 +2328,9 @@ again:
2234 * need to make sure everything is cleaned then reset everything and 2328 * need to make sure everything is cleaned then reset everything and
2235 * go back to the beginning 2329 * go back to the beginning
2236 */ 2330 */
2237 if (!num_inserts && all && skipped) { 2331 if (!num_inserts && restart) {
2238 search = 0; 2332 search = 0;
2239 skipped = 0; 2333 restart = 0;
2240 INIT_LIST_HEAD(&update_list); 2334 INIT_LIST_HEAD(&update_list);
2241 INIT_LIST_HEAD(&insert_list); 2335 INIT_LIST_HEAD(&insert_list);
2242 goto again; 2336 goto again;
@@ -2293,27 +2387,19 @@ again:
2293 BUG_ON(ret); 2387 BUG_ON(ret);
2294 2388
2295 /* 2389 /*
2296 * if we broke out of the loop in order to insert stuff because we hit 2390 * if restart is set for whatever reason we need to go back and start
2297 * the maximum number of inserts at a time we can handle, then loop 2391 * searching through the pending list again.
2298 * back and pick up where we left off 2392 *
2299 */ 2393 * We just inserted some extents, which could have resulted in new
2300 if (num_inserts == max_inserts) { 2394 * blocks being allocated, which would result in new blocks needing
2301 INIT_LIST_HEAD(&insert_list); 2395 * updates, so if all is set we _must_ restart to get the updated
2302 INIT_LIST_HEAD(&update_list); 2396 * blocks.
2303 num_inserts = 0;
2304 goto again;
2305 }
2306
2307 /*
2308 * again, if we need to make absolutely sure there are no more pending
2309 * extent operations left and we know that we skipped some, go back to
2310 * the beginning and do it all again
2311 */ 2397 */
2312 if (all && skipped) { 2398 if (restart || all) {
2313 INIT_LIST_HEAD(&insert_list); 2399 INIT_LIST_HEAD(&insert_list);
2314 INIT_LIST_HEAD(&update_list); 2400 INIT_LIST_HEAD(&update_list);
2315 search = 0; 2401 search = 0;
2316 skipped = 0; 2402 restart = 0;
2317 num_inserts = 0; 2403 num_inserts = 0;
2318 goto again; 2404 goto again;
2319 } 2405 }
@@ -2547,6 +2633,7 @@ again:
2547 if (ret) { 2633 if (ret) {
2548 if (all && skipped && !nr) { 2634 if (all && skipped && !nr) {
2549 search = 0; 2635 search = 0;
2636 skipped = 0;
2550 continue; 2637 continue;
2551 } 2638 }
2552 mutex_unlock(&info->extent_ins_mutex); 2639 mutex_unlock(&info->extent_ins_mutex);
@@ -2633,6 +2720,8 @@ again:
2633 goto again; 2720 goto again;
2634 } 2721 }
2635 2722
2723 if (!err)
2724 finish_current_insert(trans, extent_root, 0);
2636 return err; 2725 return err;
2637} 2726}
2638 2727
@@ -2700,13 +2789,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2700 /* if metadata always pin */ 2789 /* if metadata always pin */
2701 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2790 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2702 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2791 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2703 struct btrfs_block_group_cache *cache; 2792 mutex_lock(&root->fs_info->pinned_mutex);
2704 2793 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2705 /* btrfs_free_reserved_extent */ 2794 mutex_unlock(&root->fs_info->pinned_mutex);
2706 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2707 BUG_ON(!cache);
2708 btrfs_add_free_space(cache, bytenr, num_bytes);
2709 put_block_group(cache);
2710 update_reserved_extents(root, bytenr, num_bytes, 0); 2795 update_reserved_extents(root, bytenr, num_bytes, 0);
2711 return 0; 2796 return 0;
2712 } 2797 }
@@ -2787,7 +2872,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2787 2872
2788 if (data & BTRFS_BLOCK_GROUP_METADATA) { 2873 if (data & BTRFS_BLOCK_GROUP_METADATA) {
2789 last_ptr = &root->fs_info->last_alloc; 2874 last_ptr = &root->fs_info->last_alloc;
2790 empty_cluster = 64 * 1024; 2875 if (!btrfs_test_opt(root, SSD))
2876 empty_cluster = 64 * 1024;
2791 } 2877 }
2792 2878
2793 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) 2879 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
@@ -3014,7 +3100,6 @@ loop_check:
3014static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3100static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3015{ 3101{
3016 struct btrfs_block_group_cache *cache; 3102 struct btrfs_block_group_cache *cache;
3017 struct list_head *l;
3018 3103
3019 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3104 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
3020 (unsigned long long)(info->total_bytes - info->bytes_used - 3105 (unsigned long long)(info->total_bytes - info->bytes_used -
@@ -3022,8 +3107,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3022 (info->full) ? "" : "not "); 3107 (info->full) ? "" : "not ");
3023 3108
3024 down_read(&info->groups_sem); 3109 down_read(&info->groups_sem);
3025 list_for_each(l, &info->block_groups) { 3110 list_for_each_entry(cache, &info->block_groups, list) {
3026 cache = list_entry(l, struct btrfs_block_group_cache, list);
3027 spin_lock(&cache->lock); 3111 spin_lock(&cache->lock);
3028 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3112 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
3029 "%llu pinned %llu reserved\n", 3113 "%llu pinned %llu reserved\n",
@@ -3332,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3332 3416
3333struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 3417struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3334 struct btrfs_root *root, 3418 struct btrfs_root *root,
3335 u64 bytenr, u32 blocksize) 3419 u64 bytenr, u32 blocksize,
3420 int level)
3336{ 3421{
3337 struct extent_buffer *buf; 3422 struct extent_buffer *buf;
3338 3423
@@ -3340,9 +3425,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3340 if (!buf) 3425 if (!buf)
3341 return ERR_PTR(-ENOMEM); 3426 return ERR_PTR(-ENOMEM);
3342 btrfs_set_header_generation(buf, trans->transid); 3427 btrfs_set_header_generation(buf, trans->transid);
3428 btrfs_set_buffer_lockdep_class(buf, level);
3343 btrfs_tree_lock(buf); 3429 btrfs_tree_lock(buf);
3344 clean_tree_block(trans, root, buf); 3430 clean_tree_block(trans, root, buf);
3431
3432 btrfs_set_lock_blocking(buf);
3345 btrfs_set_buffer_uptodate(buf); 3433 btrfs_set_buffer_uptodate(buf);
3434
3346 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3435 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3347 set_extent_dirty(&root->dirty_log_pages, buf->start, 3436 set_extent_dirty(&root->dirty_log_pages, buf->start,
3348 buf->start + buf->len - 1, GFP_NOFS); 3437 buf->start + buf->len - 1, GFP_NOFS);
@@ -3351,6 +3440,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3351 buf->start + buf->len - 1, GFP_NOFS); 3440 buf->start + buf->len - 1, GFP_NOFS);
3352 } 3441 }
3353 trans->blocks_used++; 3442 trans->blocks_used++;
3443 /* this returns a buffer locked for blocking */
3354 return buf; 3444 return buf;
3355} 3445}
3356 3446
@@ -3379,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3379 return ERR_PTR(ret); 3469 return ERR_PTR(ret);
3380 } 3470 }
3381 3471
3382 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); 3472 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
3473 blocksize, level);
3383 return buf; 3474 return buf;
3384} 3475}
3385 3476
@@ -3388,36 +3479,73 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3388{ 3479{
3389 u64 leaf_owner; 3480 u64 leaf_owner;
3390 u64 leaf_generation; 3481 u64 leaf_generation;
3482 struct refsort *sorted;
3391 struct btrfs_key key; 3483 struct btrfs_key key;
3392 struct btrfs_file_extent_item *fi; 3484 struct btrfs_file_extent_item *fi;
3393 int i; 3485 int i;
3394 int nritems; 3486 int nritems;
3395 int ret; 3487 int ret;
3488 int refi = 0;
3489 int slot;
3396 3490
3397 BUG_ON(!btrfs_is_leaf(leaf)); 3491 BUG_ON(!btrfs_is_leaf(leaf));
3398 nritems = btrfs_header_nritems(leaf); 3492 nritems = btrfs_header_nritems(leaf);
3399 leaf_owner = btrfs_header_owner(leaf); 3493 leaf_owner = btrfs_header_owner(leaf);
3400 leaf_generation = btrfs_header_generation(leaf); 3494 leaf_generation = btrfs_header_generation(leaf);
3401 3495
3496 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3497 /* we do this loop twice. The first time we build a list
3498 * of the extents we have a reference on, then we sort the list
3499 * by bytenr. The second time around we actually do the
3500 * extent freeing.
3501 */
3402 for (i = 0; i < nritems; i++) { 3502 for (i = 0; i < nritems; i++) {
3403 u64 disk_bytenr; 3503 u64 disk_bytenr;
3404 cond_resched(); 3504 cond_resched();
3405 3505
3406 btrfs_item_key_to_cpu(leaf, &key, i); 3506 btrfs_item_key_to_cpu(leaf, &key, i);
3507
3508 /* only extents have references, skip everything else */
3407 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3509 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3408 continue; 3510 continue;
3511
3409 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3512 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3513
3514 /* inline extents live in the btree, they don't have refs */
3410 if (btrfs_file_extent_type(leaf, fi) == 3515 if (btrfs_file_extent_type(leaf, fi) ==
3411 BTRFS_FILE_EXTENT_INLINE) 3516 BTRFS_FILE_EXTENT_INLINE)
3412 continue; 3517 continue;
3413 /* 3518
3414 * FIXME make sure to insert a trans record that
3415 * repeats the snapshot del on crash
3416 */
3417 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3519 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3520
3521 /* holes don't have refs */
3418 if (disk_bytenr == 0) 3522 if (disk_bytenr == 0)
3419 continue; 3523 continue;
3420 3524
3525 sorted[refi].bytenr = disk_bytenr;
3526 sorted[refi].slot = i;
3527 refi++;
3528 }
3529
3530 if (refi == 0)
3531 goto out;
3532
3533 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3534
3535 for (i = 0; i < refi; i++) {
3536 u64 disk_bytenr;
3537
3538 disk_bytenr = sorted[i].bytenr;
3539 slot = sorted[i].slot;
3540
3541 cond_resched();
3542
3543 btrfs_item_key_to_cpu(leaf, &key, slot);
3544 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3545 continue;
3546
3547 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3548
3421 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3549 ret = __btrfs_free_extent(trans, root, disk_bytenr,
3422 btrfs_file_extent_disk_num_bytes(leaf, fi), 3550 btrfs_file_extent_disk_num_bytes(leaf, fi),
3423 leaf->start, leaf_owner, leaf_generation, 3551 leaf->start, leaf_owner, leaf_generation,
@@ -3428,6 +3556,8 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3428 wake_up(&root->fs_info->transaction_throttle); 3556 wake_up(&root->fs_info->transaction_throttle);
3429 cond_resched(); 3557 cond_resched();
3430 } 3558 }
3559out:
3560 kfree(sorted);
3431 return 0; 3561 return 0;
3432} 3562}
3433 3563
@@ -3437,9 +3567,25 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3437{ 3567{
3438 int i; 3568 int i;
3439 int ret; 3569 int ret;
3440 struct btrfs_extent_info *info = ref->extents; 3570 struct btrfs_extent_info *info;
3571 struct refsort *sorted;
3572
3573 if (ref->nritems == 0)
3574 return 0;
3575
3576 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
3577 for (i = 0; i < ref->nritems; i++) {
3578 sorted[i].bytenr = ref->extents[i].bytenr;
3579 sorted[i].slot = i;
3580 }
3581 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
3441 3582
3583 /*
3584 * the items in the ref were sorted when the ref was inserted
3585 * into the ref cache, so this is already in order
3586 */
3442 for (i = 0; i < ref->nritems; i++) { 3587 for (i = 0; i < ref->nritems; i++) {
3588 info = ref->extents + sorted[i].slot;
3443 ret = __btrfs_free_extent(trans, root, info->bytenr, 3589 ret = __btrfs_free_extent(trans, root, info->bytenr,
3444 info->num_bytes, ref->bytenr, 3590 info->num_bytes, ref->bytenr,
3445 ref->owner, ref->generation, 3591 ref->owner, ref->generation,
@@ -3453,6 +3599,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3453 info++; 3599 info++;
3454 } 3600 }
3455 3601
3602 kfree(sorted);
3456 return 0; 3603 return 0;
3457} 3604}
3458 3605
@@ -3497,6 +3644,152 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start,
3497} 3644}
3498 3645
3499/* 3646/*
3647 * this is used while deleting old snapshots, and it drops the refs
3648 * on a whole subtree starting from a level 1 node.
3649 *
3650 * The idea is to sort all the leaf pointers, and then drop the
3651 * ref on all the leaves in order. Most of the time the leaves
3652 * will have ref cache entries, so no leaf IOs will be required to
3653 * find the extents they have references on.
3654 *
3655 * For each leaf, any references it has are also dropped in order
3656 *
3657 * This ends up dropping the references in something close to optimal
3658 * order for reading and modifying the extent allocation tree.
3659 */
3660static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
3661 struct btrfs_root *root,
3662 struct btrfs_path *path)
3663{
3664 u64 bytenr;
3665 u64 root_owner;
3666 u64 root_gen;
3667 struct extent_buffer *eb = path->nodes[1];
3668 struct extent_buffer *leaf;
3669 struct btrfs_leaf_ref *ref;
3670 struct refsort *sorted = NULL;
3671 int nritems = btrfs_header_nritems(eb);
3672 int ret;
3673 int i;
3674 int refi = 0;
3675 int slot = path->slots[1];
3676 u32 blocksize = btrfs_level_size(root, 0);
3677 u32 refs;
3678
3679 if (nritems == 0)
3680 goto out;
3681
3682 root_owner = btrfs_header_owner(eb);
3683 root_gen = btrfs_header_generation(eb);
3684 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3685
3686 /*
3687 * step one, sort all the leaf pointers so we don't scribble
3688 * randomly into the extent allocation tree
3689 */
3690 for (i = slot; i < nritems; i++) {
3691 sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
3692 sorted[refi].slot = i;
3693 refi++;
3694 }
3695
3696 /*
3697 * nritems won't be zero, but if we're picking up drop_snapshot
3698 * after a crash, slot might be > 0, so double check things
3699 * just in case.
3700 */
3701 if (refi == 0)
3702 goto out;
3703
3704 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3705
3706 /*
3707 * the first loop frees everything the leaves point to
3708 */
3709 for (i = 0; i < refi; i++) {
3710 u64 ptr_gen;
3711
3712 bytenr = sorted[i].bytenr;
3713
3714 /*
3715 * check the reference count on this leaf. If it is > 1
3716 * we just decrement it below and don't update any
3717 * of the refs the leaf points to.
3718 */
3719 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
3720 BUG_ON(ret);
3721 if (refs != 1)
3722 continue;
3723
3724 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
3725
3726 /*
3727 * the leaf only had one reference, which means the
3728 * only thing pointing to this leaf is the snapshot
3729 * we're deleting. It isn't possible for the reference
3730 * count to increase again later
3731 *
3732 * The reference cache is checked for the leaf,
3733 * and if found we'll be able to drop any refs held by
3734 * the leaf without needing to read it in.
3735 */
3736 ref = btrfs_lookup_leaf_ref(root, bytenr);
3737 if (ref && ref->generation != ptr_gen) {
3738 btrfs_free_leaf_ref(root, ref);
3739 ref = NULL;
3740 }
3741 if (ref) {
3742 ret = cache_drop_leaf_ref(trans, root, ref);
3743 BUG_ON(ret);
3744 btrfs_remove_leaf_ref(root, ref);
3745 btrfs_free_leaf_ref(root, ref);
3746 } else {
3747 /*
3748 * the leaf wasn't in the reference cache, so
3749 * we have to read it.
3750 */
3751 leaf = read_tree_block(root, bytenr, blocksize,
3752 ptr_gen);
3753 ret = btrfs_drop_leaf_ref(trans, root, leaf);
3754 BUG_ON(ret);
3755 free_extent_buffer(leaf);
3756 }
3757 atomic_inc(&root->fs_info->throttle_gen);
3758 wake_up(&root->fs_info->transaction_throttle);
3759 cond_resched();
3760 }
3761
3762 /*
3763 * run through the loop again to free the refs on the leaves.
3764 * This is faster than doing it in the loop above because
3765 * the leaves are likely to be clustered together. We end up
3766 * working in nice chunks on the extent allocation tree.
3767 */
3768 for (i = 0; i < refi; i++) {
3769 bytenr = sorted[i].bytenr;
3770 ret = __btrfs_free_extent(trans, root, bytenr,
3771 blocksize, eb->start,
3772 root_owner, root_gen, 0, 1);
3773 BUG_ON(ret);
3774
3775 atomic_inc(&root->fs_info->throttle_gen);
3776 wake_up(&root->fs_info->transaction_throttle);
3777 cond_resched();
3778 }
3779out:
3780 kfree(sorted);
3781
3782 /*
3783 * update the path to show we've processed the entire level 1
3784 * node. This will get saved into the root's drop_snapshot_progress
3785 * field so these drops are not repeated again if this transaction
3786 * commits.
3787 */
3788 path->slots[1] = nritems;
3789 return 0;
3790}
3791
3792/*
3500 * helper function for drop_snapshot, this walks down the tree dropping ref 3793 * helper function for drop_snapshot, this walks down the tree dropping ref
3501 * counts as it goes. 3794 * counts as it goes.
3502 */ 3795 */
@@ -3511,7 +3804,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3511 struct extent_buffer *next; 3804 struct extent_buffer *next;
3512 struct extent_buffer *cur; 3805 struct extent_buffer *cur;
3513 struct extent_buffer *parent; 3806 struct extent_buffer *parent;
3514 struct btrfs_leaf_ref *ref;
3515 u32 blocksize; 3807 u32 blocksize;
3516 int ret; 3808 int ret;
3517 u32 refs; 3809 u32 refs;
@@ -3538,17 +3830,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3538 if (path->slots[*level] >= 3830 if (path->slots[*level] >=
3539 btrfs_header_nritems(cur)) 3831 btrfs_header_nritems(cur))
3540 break; 3832 break;
3833
3834 /* the new code goes down to level 1 and does all the
3835 * leaves pointed to that node in bulk. So, this check
3836 * for level 0 will always be false.
3837 *
3838 * But, the disk format allows the drop_snapshot_progress
3839 * field in the root to leave things in a state where
3840 * a leaf will need cleaning up here. If someone crashes
3841 * with the old code and then boots with the new code,
3842 * we might find a leaf here.
3843 */
3541 if (*level == 0) { 3844 if (*level == 0) {
3542 ret = btrfs_drop_leaf_ref(trans, root, cur); 3845 ret = btrfs_drop_leaf_ref(trans, root, cur);
3543 BUG_ON(ret); 3846 BUG_ON(ret);
3544 break; 3847 break;
3545 } 3848 }
3849
3850 /*
3851 * once we get to level one, process the whole node
3852 * at once, including everything below it.
3853 */
3854 if (*level == 1) {
3855 ret = drop_level_one_refs(trans, root, path);
3856 BUG_ON(ret);
3857 break;
3858 }
3859
3546 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3860 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3547 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3861 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3548 blocksize = btrfs_level_size(root, *level - 1); 3862 blocksize = btrfs_level_size(root, *level - 1);
3549 3863
3550 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3864 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
3551 BUG_ON(ret); 3865 BUG_ON(ret);
3866
3867 /*
3868 * if there is more than one reference, we don't need
3869 * to read that node to drop any references it has. We
3870 * just drop the ref we hold on that node and move on to the
3871 * next slot in this level.
3872 */
3552 if (refs != 1) { 3873 if (refs != 1) {
3553 parent = path->nodes[*level]; 3874 parent = path->nodes[*level];
3554 root_owner = btrfs_header_owner(parent); 3875 root_owner = btrfs_header_owner(parent);
@@ -3567,46 +3888,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3567 3888
3568 continue; 3889 continue;
3569 } 3890 }
3891
3570 /* 3892 /*
3571 * at this point, we have a single ref, and since the 3893 * we need to keep freeing things in the next level down.
3572 * only place referencing this extent is a dead root 3894 * read the block and loop around to process it
3573 * the reference count should never go higher.
3574 * So, we don't need to check it again
3575 */ 3895 */
3576 if (*level == 1) { 3896 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3577 ref = btrfs_lookup_leaf_ref(root, bytenr);
3578 if (ref && ref->generation != ptr_gen) {
3579 btrfs_free_leaf_ref(root, ref);
3580 ref = NULL;
3581 }
3582 if (ref) {
3583 ret = cache_drop_leaf_ref(trans, root, ref);
3584 BUG_ON(ret);
3585 btrfs_remove_leaf_ref(root, ref);
3586 btrfs_free_leaf_ref(root, ref);
3587 *level = 0;
3588 break;
3589 }
3590 }
3591 next = btrfs_find_tree_block(root, bytenr, blocksize);
3592 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
3593 free_extent_buffer(next);
3594
3595 next = read_tree_block(root, bytenr, blocksize,
3596 ptr_gen);
3597 cond_resched();
3598#if 0
3599 /*
3600 * this is a debugging check and can go away
3601 * the ref should never go all the way down to 1
3602 * at this point
3603 */
3604 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
3605 &refs);
3606 BUG_ON(ret);
3607 WARN_ON(refs != 1);
3608#endif
3609 }
3610 WARN_ON(*level <= 0); 3897 WARN_ON(*level <= 0);
3611 if (path->nodes[*level-1]) 3898 if (path->nodes[*level-1])
3612 free_extent_buffer(path->nodes[*level-1]); 3899 free_extent_buffer(path->nodes[*level-1]);
@@ -3631,11 +3918,16 @@ out:
3631 root_owner = btrfs_header_owner(parent); 3918 root_owner = btrfs_header_owner(parent);
3632 root_gen = btrfs_header_generation(parent); 3919 root_gen = btrfs_header_generation(parent);
3633 3920
3921 /*
3922 * cleanup and free the reference on the last node
3923 * we processed
3924 */
3634 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3925 ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3635 parent->start, root_owner, root_gen, 3926 parent->start, root_owner, root_gen,
3636 *level, 1); 3927 *level, 1);
3637 free_extent_buffer(path->nodes[*level]); 3928 free_extent_buffer(path->nodes[*level]);
3638 path->nodes[*level] = NULL; 3929 path->nodes[*level] = NULL;
3930
3639 *level += 1; 3931 *level += 1;
3640 BUG_ON(ret); 3932 BUG_ON(ret);
3641 3933
@@ -3687,6 +3979,7 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3687 3979
3688 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3980 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3689 btrfs_tree_lock(next); 3981 btrfs_tree_lock(next);
3982 btrfs_set_lock_blocking(next);
3690 3983
3691 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3984 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3692 &refs); 3985 &refs);
@@ -3754,6 +4047,13 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3754 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4047 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3755 struct extent_buffer *node; 4048 struct extent_buffer *node;
3756 struct btrfs_disk_key disk_key; 4049 struct btrfs_disk_key disk_key;
4050
4051 /*
4052 * there is more work to do in this level.
4053 * Update the drop_progress marker to reflect
4054 * the work we've done so far, and then bump
4055 * the slot number
4056 */
3757 node = path->nodes[i]; 4057 node = path->nodes[i];
3758 path->slots[i]++; 4058 path->slots[i]++;
3759 *level = i; 4059 *level = i;
@@ -3765,6 +4065,11 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3765 return 0; 4065 return 0;
3766 } else { 4066 } else {
3767 struct extent_buffer *parent; 4067 struct extent_buffer *parent;
4068
4069 /*
4070 * this whole node is done, free our reference
4071 * on it and go up one level
4072 */
3768 if (path->nodes[*level] == root->node) 4073 if (path->nodes[*level] == root->node)
3769 parent = path->nodes[*level]; 4074 parent = path->nodes[*level];
3770 else 4075 else
@@ -4444,7 +4749,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
4444 u64 lock_end = 0; 4749 u64 lock_end = 0;
4445 u64 num_bytes; 4750 u64 num_bytes;
4446 u64 ext_offset; 4751 u64 ext_offset;
4447 u64 first_pos; 4752 u64 search_end = (u64)-1;
4448 u32 nritems; 4753 u32 nritems;
4449 int nr_scaned = 0; 4754 int nr_scaned = 0;
4450 int extent_locked = 0; 4755 int extent_locked = 0;
@@ -4452,7 +4757,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
4452 int ret; 4757 int ret;
4453 4758
4454 memcpy(&key, leaf_key, sizeof(key)); 4759 memcpy(&key, leaf_key, sizeof(key));
4455 first_pos = INT_LIMIT(loff_t) - extent_key->offset;
4456 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4760 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4457 if (key.objectid < ref_path->owner_objectid || 4761 if (key.objectid < ref_path->owner_objectid ||
4458 (key.objectid == ref_path->owner_objectid && 4762 (key.objectid == ref_path->owner_objectid &&
@@ -4501,7 +4805,7 @@ next:
4501 if ((key.objectid > ref_path->owner_objectid) || 4805 if ((key.objectid > ref_path->owner_objectid) ||
4502 (key.objectid == ref_path->owner_objectid && 4806 (key.objectid == ref_path->owner_objectid &&
4503 key.type > BTRFS_EXTENT_DATA_KEY) || 4807 key.type > BTRFS_EXTENT_DATA_KEY) ||
4504 (key.offset >= first_pos + extent_key->offset)) 4808 key.offset >= search_end)
4505 break; 4809 break;
4506 } 4810 }
4507 4811
@@ -4534,8 +4838,10 @@ next:
4534 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4838 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4535 ext_offset = btrfs_file_extent_offset(leaf, fi); 4839 ext_offset = btrfs_file_extent_offset(leaf, fi);
4536 4840
4537 if (first_pos > key.offset - ext_offset) 4841 if (search_end == (u64)-1) {
4538 first_pos = key.offset - ext_offset; 4842 search_end = key.offset - ext_offset +
4843 btrfs_file_extent_ram_bytes(leaf, fi);
4844 }
4539 4845
4540 if (!extent_locked) { 4846 if (!extent_locked) {
4541 lock_start = key.offset; 4847 lock_start = key.offset;
@@ -4724,7 +5030,7 @@ next:
4724 } 5030 }
4725skip: 5031skip:
4726 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5032 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4727 key.offset >= first_pos + extent_key->offset) 5033 key.offset >= search_end)
4728 break; 5034 break;
4729 5035
4730 cond_resched(); 5036 cond_resched();
@@ -4778,6 +5084,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4778 ref->bytenr = buf->start; 5084 ref->bytenr = buf->start;
4779 ref->owner = btrfs_header_owner(buf); 5085 ref->owner = btrfs_header_owner(buf);
4780 ref->generation = btrfs_header_generation(buf); 5086 ref->generation = btrfs_header_generation(buf);
5087
4781 ret = btrfs_add_leaf_ref(root, ref, 0); 5088 ret = btrfs_add_leaf_ref(root, ref, 0);
4782 WARN_ON(ret); 5089 WARN_ON(ret);
4783 btrfs_free_leaf_ref(root, ref); 5090 btrfs_free_leaf_ref(root, ref);
@@ -5351,7 +5658,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
5351 prev_block = block_start; 5658 prev_block = block_start;
5352 } 5659 }
5353 5660
5661 mutex_lock(&extent_root->fs_info->trans_mutex);
5354 btrfs_record_root_in_trans(found_root); 5662 btrfs_record_root_in_trans(found_root);
5663 mutex_unlock(&extent_root->fs_info->trans_mutex);
5355 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5664 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5356 /* 5665 /*
5357 * try to update data extent references while 5666 * try to update data extent references while
@@ -5957,9 +6266,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5957 path = btrfs_alloc_path(); 6266 path = btrfs_alloc_path();
5958 BUG_ON(!path); 6267 BUG_ON(!path);
5959 6268
5960 btrfs_remove_free_space_cache(block_group); 6269 spin_lock(&root->fs_info->block_group_cache_lock);
5961 rb_erase(&block_group->cache_node, 6270 rb_erase(&block_group->cache_node,
5962 &root->fs_info->block_group_cache_tree); 6271 &root->fs_info->block_group_cache_tree);
6272 spin_unlock(&root->fs_info->block_group_cache_lock);
6273 btrfs_remove_free_space_cache(block_group);
5963 down_write(&block_group->space_info->groups_sem); 6274 down_write(&block_group->space_info->groups_sem);
5964 list_del(&block_group->list); 6275 list_del(&block_group->list);
5965 up_write(&block_group->space_info->groups_sem); 6276 up_write(&block_group->space_info->groups_sem);