diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 519 |
1 files changed, 415 insertions, 104 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 293da650873f..0a5d796c9f7e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/version.h> | 22 | #include <linux/sort.h> |
23 | #include "compat.h" | 23 | #include "compat.h" |
24 | #include "hash.h" | 24 | #include "hash.h" |
25 | #include "crc32c.h" | 25 | #include "crc32c.h" |
@@ -30,7 +30,6 @@ | |||
30 | #include "volumes.h" | 30 | #include "volumes.h" |
31 | #include "locking.h" | 31 | #include "locking.h" |
32 | #include "ref-cache.h" | 32 | #include "ref-cache.h" |
33 | #include "compat.h" | ||
34 | 33 | ||
35 | #define PENDING_EXTENT_INSERT 0 | 34 | #define PENDING_EXTENT_INSERT 0 |
36 | #define PENDING_EXTENT_DELETE 1 | 35 | #define PENDING_EXTENT_DELETE 1 |
@@ -326,10 +325,8 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
326 | u64 flags) | 325 | u64 flags) |
327 | { | 326 | { |
328 | struct list_head *head = &info->space_info; | 327 | struct list_head *head = &info->space_info; |
329 | struct list_head *cur; | ||
330 | struct btrfs_space_info *found; | 328 | struct btrfs_space_info *found; |
331 | list_for_each(cur, head) { | 329 | list_for_each_entry(found, head, list) { |
332 | found = list_entry(cur, struct btrfs_space_info, list); | ||
333 | if (found->flags == flags) | 330 | if (found->flags == flags) |
334 | return found; | 331 | return found; |
335 | } | 332 | } |
@@ -1326,8 +1323,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1326 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 1323 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, |
1327 | struct btrfs_root *root) | 1324 | struct btrfs_root *root) |
1328 | { | 1325 | { |
1329 | finish_current_insert(trans, root->fs_info->extent_root, 1); | 1326 | u64 start; |
1330 | del_pending_extents(trans, root->fs_info->extent_root, 1); | 1327 | u64 end; |
1328 | int ret; | ||
1329 | |||
1330 | while(1) { | ||
1331 | finish_current_insert(trans, root->fs_info->extent_root, 1); | ||
1332 | del_pending_extents(trans, root->fs_info->extent_root, 1); | ||
1333 | |||
1334 | /* is there more work to do? */ | ||
1335 | ret = find_first_extent_bit(&root->fs_info->pending_del, | ||
1336 | 0, &start, &end, EXTENT_WRITEBACK); | ||
1337 | if (!ret) | ||
1338 | continue; | ||
1339 | ret = find_first_extent_bit(&root->fs_info->extent_ins, | ||
1340 | 0, &start, &end, EXTENT_WRITEBACK); | ||
1341 | if (!ret) | ||
1342 | continue; | ||
1343 | break; | ||
1344 | } | ||
1331 | return 0; | 1345 | return 0; |
1332 | } | 1346 | } |
1333 | 1347 | ||
@@ -1525,15 +1539,55 @@ out: | |||
1525 | return ret; | 1539 | return ret; |
1526 | } | 1540 | } |
1527 | 1541 | ||
1528 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1542 | /* when a block goes through cow, we update the reference counts of |
1529 | struct extent_buffer *orig_buf, struct extent_buffer *buf, | 1543 | * everything that block points to. The internal pointers of the block |
1530 | u32 *nr_extents) | 1544 | * can be in just about any order, and it is likely to have clusters of |
1545 | * things that are close together and clusters of things that are not. | ||
1546 | * | ||
1547 | * To help reduce the seeks that come with updating all of these reference | ||
1548 | * counts, sort them by byte number before actual updates are done. | ||
1549 | * | ||
1550 | * struct refsort is used to match byte number to slot in the btree block. | ||
1551 | * we sort based on the byte number and then use the slot to actually | ||
1552 | * find the item. | ||
1553 | * | ||
1554 | * struct refsort is smaller than strcut btrfs_item and smaller than | ||
1555 | * struct btrfs_key_ptr. Since we're currently limited to the page size | ||
1556 | * for a btree block, there's no way for a kmalloc of refsorts for a | ||
1557 | * single node to be bigger than a page. | ||
1558 | */ | ||
1559 | struct refsort { | ||
1560 | u64 bytenr; | ||
1561 | u32 slot; | ||
1562 | }; | ||
1563 | |||
1564 | /* | ||
1565 | * for passing into sort() | ||
1566 | */ | ||
1567 | static int refsort_cmp(const void *a_void, const void *b_void) | ||
1568 | { | ||
1569 | const struct refsort *a = a_void; | ||
1570 | const struct refsort *b = b_void; | ||
1571 | |||
1572 | if (a->bytenr < b->bytenr) | ||
1573 | return -1; | ||
1574 | if (a->bytenr > b->bytenr) | ||
1575 | return 1; | ||
1576 | return 0; | ||
1577 | } | ||
1578 | |||
1579 | |||
1580 | noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | ||
1581 | struct btrfs_root *root, | ||
1582 | struct extent_buffer *orig_buf, | ||
1583 | struct extent_buffer *buf, u32 *nr_extents) | ||
1531 | { | 1584 | { |
1532 | u64 bytenr; | 1585 | u64 bytenr; |
1533 | u64 ref_root; | 1586 | u64 ref_root; |
1534 | u64 orig_root; | 1587 | u64 orig_root; |
1535 | u64 ref_generation; | 1588 | u64 ref_generation; |
1536 | u64 orig_generation; | 1589 | u64 orig_generation; |
1590 | struct refsort *sorted; | ||
1537 | u32 nritems; | 1591 | u32 nritems; |
1538 | u32 nr_file_extents = 0; | 1592 | u32 nr_file_extents = 0; |
1539 | struct btrfs_key key; | 1593 | struct btrfs_key key; |
@@ -1542,6 +1596,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1542 | int level; | 1596 | int level; |
1543 | int ret = 0; | 1597 | int ret = 0; |
1544 | int faili = 0; | 1598 | int faili = 0; |
1599 | int refi = 0; | ||
1600 | int slot; | ||
1545 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 1601 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1546 | u64, u64, u64, u64, u64, u64, u64, u64); | 1602 | u64, u64, u64, u64, u64, u64, u64, u64); |
1547 | 1603 | ||
@@ -1553,6 +1609,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1553 | nritems = btrfs_header_nritems(buf); | 1609 | nritems = btrfs_header_nritems(buf); |
1554 | level = btrfs_header_level(buf); | 1610 | level = btrfs_header_level(buf); |
1555 | 1611 | ||
1612 | sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); | ||
1613 | BUG_ON(!sorted); | ||
1614 | |||
1556 | if (root->ref_cows) { | 1615 | if (root->ref_cows) { |
1557 | process_func = __btrfs_inc_extent_ref; | 1616 | process_func = __btrfs_inc_extent_ref; |
1558 | } else { | 1617 | } else { |
@@ -1565,6 +1624,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1565 | process_func = __btrfs_update_extent_ref; | 1624 | process_func = __btrfs_update_extent_ref; |
1566 | } | 1625 | } |
1567 | 1626 | ||
1627 | /* | ||
1628 | * we make two passes through the items. In the first pass we | ||
1629 | * only record the byte number and slot. Then we sort based on | ||
1630 | * byte number and do the actual work based on the sorted results | ||
1631 | */ | ||
1568 | for (i = 0; i < nritems; i++) { | 1632 | for (i = 0; i < nritems; i++) { |
1569 | cond_resched(); | 1633 | cond_resched(); |
1570 | if (level == 0) { | 1634 | if (level == 0) { |
@@ -1581,6 +1645,32 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1581 | continue; | 1645 | continue; |
1582 | 1646 | ||
1583 | nr_file_extents++; | 1647 | nr_file_extents++; |
1648 | sorted[refi].bytenr = bytenr; | ||
1649 | sorted[refi].slot = i; | ||
1650 | refi++; | ||
1651 | } else { | ||
1652 | bytenr = btrfs_node_blockptr(buf, i); | ||
1653 | sorted[refi].bytenr = bytenr; | ||
1654 | sorted[refi].slot = i; | ||
1655 | refi++; | ||
1656 | } | ||
1657 | } | ||
1658 | /* | ||
1659 | * if refi == 0, we didn't actually put anything into the sorted | ||
1660 | * array and we're done | ||
1661 | */ | ||
1662 | if (refi == 0) | ||
1663 | goto out; | ||
1664 | |||
1665 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
1666 | |||
1667 | for (i = 0; i < refi; i++) { | ||
1668 | cond_resched(); | ||
1669 | slot = sorted[i].slot; | ||
1670 | bytenr = sorted[i].bytenr; | ||
1671 | |||
1672 | if (level == 0) { | ||
1673 | btrfs_item_key_to_cpu(buf, &key, slot); | ||
1584 | 1674 | ||
1585 | ret = process_func(trans, root, bytenr, | 1675 | ret = process_func(trans, root, bytenr, |
1586 | orig_buf->start, buf->start, | 1676 | orig_buf->start, buf->start, |
@@ -1589,25 +1679,25 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1589 | key.objectid); | 1679 | key.objectid); |
1590 | 1680 | ||
1591 | if (ret) { | 1681 | if (ret) { |
1592 | faili = i; | 1682 | faili = slot; |
1593 | WARN_ON(1); | 1683 | WARN_ON(1); |
1594 | goto fail; | 1684 | goto fail; |
1595 | } | 1685 | } |
1596 | } else { | 1686 | } else { |
1597 | bytenr = btrfs_node_blockptr(buf, i); | ||
1598 | ret = process_func(trans, root, bytenr, | 1687 | ret = process_func(trans, root, bytenr, |
1599 | orig_buf->start, buf->start, | 1688 | orig_buf->start, buf->start, |
1600 | orig_root, ref_root, | 1689 | orig_root, ref_root, |
1601 | orig_generation, ref_generation, | 1690 | orig_generation, ref_generation, |
1602 | level - 1); | 1691 | level - 1); |
1603 | if (ret) { | 1692 | if (ret) { |
1604 | faili = i; | 1693 | faili = slot; |
1605 | WARN_ON(1); | 1694 | WARN_ON(1); |
1606 | goto fail; | 1695 | goto fail; |
1607 | } | 1696 | } |
1608 | } | 1697 | } |
1609 | } | 1698 | } |
1610 | out: | 1699 | out: |
1700 | kfree(sorted); | ||
1611 | if (nr_extents) { | 1701 | if (nr_extents) { |
1612 | if (level == 0) | 1702 | if (level == 0) |
1613 | *nr_extents = nr_file_extents; | 1703 | *nr_extents = nr_file_extents; |
@@ -1616,6 +1706,7 @@ out: | |||
1616 | } | 1706 | } |
1617 | return 0; | 1707 | return 0; |
1618 | fail: | 1708 | fail: |
1709 | kfree(sorted); | ||
1619 | WARN_ON(1); | 1710 | WARN_ON(1); |
1620 | return ret; | 1711 | return ret; |
1621 | } | 1712 | } |
@@ -2137,13 +2228,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, | |||
2137 | u64 end; | 2228 | u64 end; |
2138 | u64 priv; | 2229 | u64 priv; |
2139 | u64 search = 0; | 2230 | u64 search = 0; |
2140 | u64 skipped = 0; | ||
2141 | struct btrfs_fs_info *info = extent_root->fs_info; | 2231 | struct btrfs_fs_info *info = extent_root->fs_info; |
2142 | struct btrfs_path *path; | 2232 | struct btrfs_path *path; |
2143 | struct pending_extent_op *extent_op, *tmp; | 2233 | struct pending_extent_op *extent_op, *tmp; |
2144 | struct list_head insert_list, update_list; | 2234 | struct list_head insert_list, update_list; |
2145 | int ret; | 2235 | int ret; |
2146 | int num_inserts = 0, max_inserts; | 2236 | int num_inserts = 0, max_inserts, restart = 0; |
2147 | 2237 | ||
2148 | path = btrfs_alloc_path(); | 2238 | path = btrfs_alloc_path(); |
2149 | INIT_LIST_HEAD(&insert_list); | 2239 | INIT_LIST_HEAD(&insert_list); |
@@ -2159,18 +2249,19 @@ again: | |||
2159 | ret = find_first_extent_bit(&info->extent_ins, search, &start, | 2249 | ret = find_first_extent_bit(&info->extent_ins, search, &start, |
2160 | &end, EXTENT_WRITEBACK); | 2250 | &end, EXTENT_WRITEBACK); |
2161 | if (ret) { | 2251 | if (ret) { |
2162 | if (skipped && all && !num_inserts) { | 2252 | if (restart && !num_inserts && |
2163 | skipped = 0; | 2253 | list_empty(&update_list)) { |
2254 | restart = 0; | ||
2164 | search = 0; | 2255 | search = 0; |
2165 | continue; | 2256 | continue; |
2166 | } | 2257 | } |
2167 | mutex_unlock(&info->extent_ins_mutex); | ||
2168 | break; | 2258 | break; |
2169 | } | 2259 | } |
2170 | 2260 | ||
2171 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); | 2261 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); |
2172 | if (!ret) { | 2262 | if (!ret) { |
2173 | skipped = 1; | 2263 | if (all) |
2264 | restart = 1; | ||
2174 | search = end + 1; | 2265 | search = end + 1; |
2175 | if (need_resched()) { | 2266 | if (need_resched()) { |
2176 | mutex_unlock(&info->extent_ins_mutex); | 2267 | mutex_unlock(&info->extent_ins_mutex); |
@@ -2189,7 +2280,7 @@ again: | |||
2189 | list_add_tail(&extent_op->list, &insert_list); | 2280 | list_add_tail(&extent_op->list, &insert_list); |
2190 | search = end + 1; | 2281 | search = end + 1; |
2191 | if (num_inserts == max_inserts) { | 2282 | if (num_inserts == max_inserts) { |
2192 | mutex_unlock(&info->extent_ins_mutex); | 2283 | restart = 1; |
2193 | break; | 2284 | break; |
2194 | } | 2285 | } |
2195 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | 2286 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { |
@@ -2205,7 +2296,6 @@ again: | |||
2205 | * somebody marked this thing for deletion then just unlock it and be | 2296 | * somebody marked this thing for deletion then just unlock it and be |
2206 | * done, the free_extents will handle it | 2297 | * done, the free_extents will handle it |
2207 | */ | 2298 | */ |
2208 | mutex_lock(&info->extent_ins_mutex); | ||
2209 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { | 2299 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { |
2210 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | 2300 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, |
2211 | extent_op->bytenr + extent_op->num_bytes - 1, | 2301 | extent_op->bytenr + extent_op->num_bytes - 1, |
@@ -2227,6 +2317,10 @@ again: | |||
2227 | if (!list_empty(&update_list)) { | 2317 | if (!list_empty(&update_list)) { |
2228 | ret = update_backrefs(trans, extent_root, path, &update_list); | 2318 | ret = update_backrefs(trans, extent_root, path, &update_list); |
2229 | BUG_ON(ret); | 2319 | BUG_ON(ret); |
2320 | |||
2321 | /* we may have COW'ed new blocks, so lets start over */ | ||
2322 | if (all) | ||
2323 | restart = 1; | ||
2230 | } | 2324 | } |
2231 | 2325 | ||
2232 | /* | 2326 | /* |
@@ -2234,9 +2328,9 @@ again: | |||
2234 | * need to make sure everything is cleaned then reset everything and | 2328 | * need to make sure everything is cleaned then reset everything and |
2235 | * go back to the beginning | 2329 | * go back to the beginning |
2236 | */ | 2330 | */ |
2237 | if (!num_inserts && all && skipped) { | 2331 | if (!num_inserts && restart) { |
2238 | search = 0; | 2332 | search = 0; |
2239 | skipped = 0; | 2333 | restart = 0; |
2240 | INIT_LIST_HEAD(&update_list); | 2334 | INIT_LIST_HEAD(&update_list); |
2241 | INIT_LIST_HEAD(&insert_list); | 2335 | INIT_LIST_HEAD(&insert_list); |
2242 | goto again; | 2336 | goto again; |
@@ -2293,27 +2387,19 @@ again: | |||
2293 | BUG_ON(ret); | 2387 | BUG_ON(ret); |
2294 | 2388 | ||
2295 | /* | 2389 | /* |
2296 | * if we broke out of the loop in order to insert stuff because we hit | 2390 | * if restart is set for whatever reason we need to go back and start |
2297 | * the maximum number of inserts at a time we can handle, then loop | 2391 | * searching through the pending list again. |
2298 | * back and pick up where we left off | 2392 | * |
2299 | */ | 2393 | * We just inserted some extents, which could have resulted in new |
2300 | if (num_inserts == max_inserts) { | 2394 | * blocks being allocated, which would result in new blocks needing |
2301 | INIT_LIST_HEAD(&insert_list); | 2395 | * updates, so if all is set we _must_ restart to get the updated |
2302 | INIT_LIST_HEAD(&update_list); | 2396 | * blocks. |
2303 | num_inserts = 0; | ||
2304 | goto again; | ||
2305 | } | ||
2306 | |||
2307 | /* | ||
2308 | * again, if we need to make absolutely sure there are no more pending | ||
2309 | * extent operations left and we know that we skipped some, go back to | ||
2310 | * the beginning and do it all again | ||
2311 | */ | 2397 | */ |
2312 | if (all && skipped) { | 2398 | if (restart || all) { |
2313 | INIT_LIST_HEAD(&insert_list); | 2399 | INIT_LIST_HEAD(&insert_list); |
2314 | INIT_LIST_HEAD(&update_list); | 2400 | INIT_LIST_HEAD(&update_list); |
2315 | search = 0; | 2401 | search = 0; |
2316 | skipped = 0; | 2402 | restart = 0; |
2317 | num_inserts = 0; | 2403 | num_inserts = 0; |
2318 | goto again; | 2404 | goto again; |
2319 | } | 2405 | } |
@@ -2547,6 +2633,7 @@ again: | |||
2547 | if (ret) { | 2633 | if (ret) { |
2548 | if (all && skipped && !nr) { | 2634 | if (all && skipped && !nr) { |
2549 | search = 0; | 2635 | search = 0; |
2636 | skipped = 0; | ||
2550 | continue; | 2637 | continue; |
2551 | } | 2638 | } |
2552 | mutex_unlock(&info->extent_ins_mutex); | 2639 | mutex_unlock(&info->extent_ins_mutex); |
@@ -2633,6 +2720,8 @@ again: | |||
2633 | goto again; | 2720 | goto again; |
2634 | } | 2721 | } |
2635 | 2722 | ||
2723 | if (!err) | ||
2724 | finish_current_insert(trans, extent_root, 0); | ||
2636 | return err; | 2725 | return err; |
2637 | } | 2726 | } |
2638 | 2727 | ||
@@ -2700,13 +2789,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2700 | /* if metadata always pin */ | 2789 | /* if metadata always pin */ |
2701 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | 2790 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { |
2702 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 2791 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
2703 | struct btrfs_block_group_cache *cache; | 2792 | mutex_lock(&root->fs_info->pinned_mutex); |
2704 | 2793 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | |
2705 | /* btrfs_free_reserved_extent */ | 2794 | mutex_unlock(&root->fs_info->pinned_mutex); |
2706 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
2707 | BUG_ON(!cache); | ||
2708 | btrfs_add_free_space(cache, bytenr, num_bytes); | ||
2709 | put_block_group(cache); | ||
2710 | update_reserved_extents(root, bytenr, num_bytes, 0); | 2795 | update_reserved_extents(root, bytenr, num_bytes, 0); |
2711 | return 0; | 2796 | return 0; |
2712 | } | 2797 | } |
@@ -2787,7 +2872,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2787 | 2872 | ||
2788 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 2873 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
2789 | last_ptr = &root->fs_info->last_alloc; | 2874 | last_ptr = &root->fs_info->last_alloc; |
2790 | empty_cluster = 64 * 1024; | 2875 | if (!btrfs_test_opt(root, SSD)) |
2876 | empty_cluster = 64 * 1024; | ||
2791 | } | 2877 | } |
2792 | 2878 | ||
2793 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 2879 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) |
@@ -3014,7 +3100,6 @@ loop_check: | |||
3014 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 3100 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) |
3015 | { | 3101 | { |
3016 | struct btrfs_block_group_cache *cache; | 3102 | struct btrfs_block_group_cache *cache; |
3017 | struct list_head *l; | ||
3018 | 3103 | ||
3019 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 3104 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
3020 | (unsigned long long)(info->total_bytes - info->bytes_used - | 3105 | (unsigned long long)(info->total_bytes - info->bytes_used - |
@@ -3022,8 +3107,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3022 | (info->full) ? "" : "not "); | 3107 | (info->full) ? "" : "not "); |
3023 | 3108 | ||
3024 | down_read(&info->groups_sem); | 3109 | down_read(&info->groups_sem); |
3025 | list_for_each(l, &info->block_groups) { | 3110 | list_for_each_entry(cache, &info->block_groups, list) { |
3026 | cache = list_entry(l, struct btrfs_block_group_cache, list); | ||
3027 | spin_lock(&cache->lock); | 3111 | spin_lock(&cache->lock); |
3028 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 3112 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
3029 | "%llu pinned %llu reserved\n", | 3113 | "%llu pinned %llu reserved\n", |
@@ -3332,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
3332 | 3416 | ||
3333 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 3417 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
3334 | struct btrfs_root *root, | 3418 | struct btrfs_root *root, |
3335 | u64 bytenr, u32 blocksize) | 3419 | u64 bytenr, u32 blocksize, |
3420 | int level) | ||
3336 | { | 3421 | { |
3337 | struct extent_buffer *buf; | 3422 | struct extent_buffer *buf; |
3338 | 3423 | ||
@@ -3340,9 +3425,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3340 | if (!buf) | 3425 | if (!buf) |
3341 | return ERR_PTR(-ENOMEM); | 3426 | return ERR_PTR(-ENOMEM); |
3342 | btrfs_set_header_generation(buf, trans->transid); | 3427 | btrfs_set_header_generation(buf, trans->transid); |
3428 | btrfs_set_buffer_lockdep_class(buf, level); | ||
3343 | btrfs_tree_lock(buf); | 3429 | btrfs_tree_lock(buf); |
3344 | clean_tree_block(trans, root, buf); | 3430 | clean_tree_block(trans, root, buf); |
3431 | |||
3432 | btrfs_set_lock_blocking(buf); | ||
3345 | btrfs_set_buffer_uptodate(buf); | 3433 | btrfs_set_buffer_uptodate(buf); |
3434 | |||
3346 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 3435 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
3347 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 3436 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
3348 | buf->start + buf->len - 1, GFP_NOFS); | 3437 | buf->start + buf->len - 1, GFP_NOFS); |
@@ -3351,6 +3440,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3351 | buf->start + buf->len - 1, GFP_NOFS); | 3440 | buf->start + buf->len - 1, GFP_NOFS); |
3352 | } | 3441 | } |
3353 | trans->blocks_used++; | 3442 | trans->blocks_used++; |
3443 | /* this returns a buffer locked for blocking */ | ||
3354 | return buf; | 3444 | return buf; |
3355 | } | 3445 | } |
3356 | 3446 | ||
@@ -3379,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
3379 | return ERR_PTR(ret); | 3469 | return ERR_PTR(ret); |
3380 | } | 3470 | } |
3381 | 3471 | ||
3382 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); | 3472 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
3473 | blocksize, level); | ||
3383 | return buf; | 3474 | return buf; |
3384 | } | 3475 | } |
3385 | 3476 | ||
@@ -3388,36 +3479,73 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3388 | { | 3479 | { |
3389 | u64 leaf_owner; | 3480 | u64 leaf_owner; |
3390 | u64 leaf_generation; | 3481 | u64 leaf_generation; |
3482 | struct refsort *sorted; | ||
3391 | struct btrfs_key key; | 3483 | struct btrfs_key key; |
3392 | struct btrfs_file_extent_item *fi; | 3484 | struct btrfs_file_extent_item *fi; |
3393 | int i; | 3485 | int i; |
3394 | int nritems; | 3486 | int nritems; |
3395 | int ret; | 3487 | int ret; |
3488 | int refi = 0; | ||
3489 | int slot; | ||
3396 | 3490 | ||
3397 | BUG_ON(!btrfs_is_leaf(leaf)); | 3491 | BUG_ON(!btrfs_is_leaf(leaf)); |
3398 | nritems = btrfs_header_nritems(leaf); | 3492 | nritems = btrfs_header_nritems(leaf); |
3399 | leaf_owner = btrfs_header_owner(leaf); | 3493 | leaf_owner = btrfs_header_owner(leaf); |
3400 | leaf_generation = btrfs_header_generation(leaf); | 3494 | leaf_generation = btrfs_header_generation(leaf); |
3401 | 3495 | ||
3496 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
3497 | /* we do this loop twice. The first time we build a list | ||
3498 | * of the extents we have a reference on, then we sort the list | ||
3499 | * by bytenr. The second time around we actually do the | ||
3500 | * extent freeing. | ||
3501 | */ | ||
3402 | for (i = 0; i < nritems; i++) { | 3502 | for (i = 0; i < nritems; i++) { |
3403 | u64 disk_bytenr; | 3503 | u64 disk_bytenr; |
3404 | cond_resched(); | 3504 | cond_resched(); |
3405 | 3505 | ||
3406 | btrfs_item_key_to_cpu(leaf, &key, i); | 3506 | btrfs_item_key_to_cpu(leaf, &key, i); |
3507 | |||
3508 | /* only extents have references, skip everything else */ | ||
3407 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | 3509 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) |
3408 | continue; | 3510 | continue; |
3511 | |||
3409 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | 3512 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); |
3513 | |||
3514 | /* inline extents live in the btree, they don't have refs */ | ||
3410 | if (btrfs_file_extent_type(leaf, fi) == | 3515 | if (btrfs_file_extent_type(leaf, fi) == |
3411 | BTRFS_FILE_EXTENT_INLINE) | 3516 | BTRFS_FILE_EXTENT_INLINE) |
3412 | continue; | 3517 | continue; |
3413 | /* | 3518 | |
3414 | * FIXME make sure to insert a trans record that | ||
3415 | * repeats the snapshot del on crash | ||
3416 | */ | ||
3417 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 3519 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
3520 | |||
3521 | /* holes don't have refs */ | ||
3418 | if (disk_bytenr == 0) | 3522 | if (disk_bytenr == 0) |
3419 | continue; | 3523 | continue; |
3420 | 3524 | ||
3525 | sorted[refi].bytenr = disk_bytenr; | ||
3526 | sorted[refi].slot = i; | ||
3527 | refi++; | ||
3528 | } | ||
3529 | |||
3530 | if (refi == 0) | ||
3531 | goto out; | ||
3532 | |||
3533 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
3534 | |||
3535 | for (i = 0; i < refi; i++) { | ||
3536 | u64 disk_bytenr; | ||
3537 | |||
3538 | disk_bytenr = sorted[i].bytenr; | ||
3539 | slot = sorted[i].slot; | ||
3540 | |||
3541 | cond_resched(); | ||
3542 | |||
3543 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
3544 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
3545 | continue; | ||
3546 | |||
3547 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
3548 | |||
3421 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 3549 | ret = __btrfs_free_extent(trans, root, disk_bytenr, |
3422 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 3550 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
3423 | leaf->start, leaf_owner, leaf_generation, | 3551 | leaf->start, leaf_owner, leaf_generation, |
@@ -3428,6 +3556,8 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3428 | wake_up(&root->fs_info->transaction_throttle); | 3556 | wake_up(&root->fs_info->transaction_throttle); |
3429 | cond_resched(); | 3557 | cond_resched(); |
3430 | } | 3558 | } |
3559 | out: | ||
3560 | kfree(sorted); | ||
3431 | return 0; | 3561 | return 0; |
3432 | } | 3562 | } |
3433 | 3563 | ||
@@ -3437,9 +3567,25 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3437 | { | 3567 | { |
3438 | int i; | 3568 | int i; |
3439 | int ret; | 3569 | int ret; |
3440 | struct btrfs_extent_info *info = ref->extents; | 3570 | struct btrfs_extent_info *info; |
3571 | struct refsort *sorted; | ||
3572 | |||
3573 | if (ref->nritems == 0) | ||
3574 | return 0; | ||
3575 | |||
3576 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
3577 | for (i = 0; i < ref->nritems; i++) { | ||
3578 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
3579 | sorted[i].slot = i; | ||
3580 | } | ||
3581 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
3441 | 3582 | ||
3583 | /* | ||
3584 | * the items in the ref were sorted when the ref was inserted | ||
3585 | * into the ref cache, so this is already in order | ||
3586 | */ | ||
3442 | for (i = 0; i < ref->nritems; i++) { | 3587 | for (i = 0; i < ref->nritems; i++) { |
3588 | info = ref->extents + sorted[i].slot; | ||
3443 | ret = __btrfs_free_extent(trans, root, info->bytenr, | 3589 | ret = __btrfs_free_extent(trans, root, info->bytenr, |
3444 | info->num_bytes, ref->bytenr, | 3590 | info->num_bytes, ref->bytenr, |
3445 | ref->owner, ref->generation, | 3591 | ref->owner, ref->generation, |
@@ -3453,6 +3599,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3453 | info++; | 3599 | info++; |
3454 | } | 3600 | } |
3455 | 3601 | ||
3602 | kfree(sorted); | ||
3456 | return 0; | 3603 | return 0; |
3457 | } | 3604 | } |
3458 | 3605 | ||
@@ -3497,6 +3644,152 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, | |||
3497 | } | 3644 | } |
3498 | 3645 | ||
3499 | /* | 3646 | /* |
3647 | * this is used while deleting old snapshots, and it drops the refs | ||
3648 | * on a whole subtree starting from a level 1 node. | ||
3649 | * | ||
3650 | * The idea is to sort all the leaf pointers, and then drop the | ||
3651 | * ref on all the leaves in order. Most of the time the leaves | ||
3652 | * will have ref cache entries, so no leaf IOs will be required to | ||
3653 | * find the extents they have references on. | ||
3654 | * | ||
3655 | * For each leaf, any references it has are also dropped in order | ||
3656 | * | ||
3657 | * This ends up dropping the references in something close to optimal | ||
3658 | * order for reading and modifying the extent allocation tree. | ||
3659 | */ | ||
3660 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
3661 | struct btrfs_root *root, | ||
3662 | struct btrfs_path *path) | ||
3663 | { | ||
3664 | u64 bytenr; | ||
3665 | u64 root_owner; | ||
3666 | u64 root_gen; | ||
3667 | struct extent_buffer *eb = path->nodes[1]; | ||
3668 | struct extent_buffer *leaf; | ||
3669 | struct btrfs_leaf_ref *ref; | ||
3670 | struct refsort *sorted = NULL; | ||
3671 | int nritems = btrfs_header_nritems(eb); | ||
3672 | int ret; | ||
3673 | int i; | ||
3674 | int refi = 0; | ||
3675 | int slot = path->slots[1]; | ||
3676 | u32 blocksize = btrfs_level_size(root, 0); | ||
3677 | u32 refs; | ||
3678 | |||
3679 | if (nritems == 0) | ||
3680 | goto out; | ||
3681 | |||
3682 | root_owner = btrfs_header_owner(eb); | ||
3683 | root_gen = btrfs_header_generation(eb); | ||
3684 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
3685 | |||
3686 | /* | ||
3687 | * step one, sort all the leaf pointers so we don't scribble | ||
3688 | * randomly into the extent allocation tree | ||
3689 | */ | ||
3690 | for (i = slot; i < nritems; i++) { | ||
3691 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | ||
3692 | sorted[refi].slot = i; | ||
3693 | refi++; | ||
3694 | } | ||
3695 | |||
3696 | /* | ||
3697 | * nritems won't be zero, but if we're picking up drop_snapshot | ||
3698 | * after a crash, slot might be > 0, so double check things | ||
3699 | * just in case. | ||
3700 | */ | ||
3701 | if (refi == 0) | ||
3702 | goto out; | ||
3703 | |||
3704 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
3705 | |||
3706 | /* | ||
3707 | * the first loop frees everything the leaves point to | ||
3708 | */ | ||
3709 | for (i = 0; i < refi; i++) { | ||
3710 | u64 ptr_gen; | ||
3711 | |||
3712 | bytenr = sorted[i].bytenr; | ||
3713 | |||
3714 | /* | ||
3715 | * check the reference count on this leaf. If it is > 1 | ||
3716 | * we just decrement it below and don't update any | ||
3717 | * of the refs the leaf points to. | ||
3718 | */ | ||
3719 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | ||
3720 | BUG_ON(ret); | ||
3721 | if (refs != 1) | ||
3722 | continue; | ||
3723 | |||
3724 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | ||
3725 | |||
3726 | /* | ||
3727 | * the leaf only had one reference, which means the | ||
3728 | * only thing pointing to this leaf is the snapshot | ||
3729 | * we're deleting. It isn't possible for the reference | ||
3730 | * count to increase again later | ||
3731 | * | ||
3732 | * The reference cache is checked for the leaf, | ||
3733 | * and if found we'll be able to drop any refs held by | ||
3734 | * the leaf without needing to read it in. | ||
3735 | */ | ||
3736 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
3737 | if (ref && ref->generation != ptr_gen) { | ||
3738 | btrfs_free_leaf_ref(root, ref); | ||
3739 | ref = NULL; | ||
3740 | } | ||
3741 | if (ref) { | ||
3742 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
3743 | BUG_ON(ret); | ||
3744 | btrfs_remove_leaf_ref(root, ref); | ||
3745 | btrfs_free_leaf_ref(root, ref); | ||
3746 | } else { | ||
3747 | /* | ||
3748 | * the leaf wasn't in the reference cache, so | ||
3749 | * we have to read it. | ||
3750 | */ | ||
3751 | leaf = read_tree_block(root, bytenr, blocksize, | ||
3752 | ptr_gen); | ||
3753 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
3754 | BUG_ON(ret); | ||
3755 | free_extent_buffer(leaf); | ||
3756 | } | ||
3757 | atomic_inc(&root->fs_info->throttle_gen); | ||
3758 | wake_up(&root->fs_info->transaction_throttle); | ||
3759 | cond_resched(); | ||
3760 | } | ||
3761 | |||
3762 | /* | ||
3763 | * run through the loop again to free the refs on the leaves. | ||
3764 | * This is faster than doing it in the loop above because | ||
3765 | * the leaves are likely to be clustered together. We end up | ||
3766 | * working in nice chunks on the extent allocation tree. | ||
3767 | */ | ||
3768 | for (i = 0; i < refi; i++) { | ||
3769 | bytenr = sorted[i].bytenr; | ||
3770 | ret = __btrfs_free_extent(trans, root, bytenr, | ||
3771 | blocksize, eb->start, | ||
3772 | root_owner, root_gen, 0, 1); | ||
3773 | BUG_ON(ret); | ||
3774 | |||
3775 | atomic_inc(&root->fs_info->throttle_gen); | ||
3776 | wake_up(&root->fs_info->transaction_throttle); | ||
3777 | cond_resched(); | ||
3778 | } | ||
3779 | out: | ||
3780 | kfree(sorted); | ||
3781 | |||
3782 | /* | ||
3783 | * update the path to show we've processed the entire level 1 | ||
3784 | * node. This will get saved into the root's drop_snapshot_progress | ||
3785 | * field so these drops are not repeated again if this transaction | ||
3786 | * commits. | ||
3787 | */ | ||
3788 | path->slots[1] = nritems; | ||
3789 | return 0; | ||
3790 | } | ||
3791 | |||
3792 | /* | ||
3500 | * helper function for drop_snapshot, this walks down the tree dropping ref | 3793 | * helper function for drop_snapshot, this walks down the tree dropping ref |
3501 | * counts as it goes. | 3794 | * counts as it goes. |
3502 | */ | 3795 | */ |
@@ -3511,7 +3804,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3511 | struct extent_buffer *next; | 3804 | struct extent_buffer *next; |
3512 | struct extent_buffer *cur; | 3805 | struct extent_buffer *cur; |
3513 | struct extent_buffer *parent; | 3806 | struct extent_buffer *parent; |
3514 | struct btrfs_leaf_ref *ref; | ||
3515 | u32 blocksize; | 3807 | u32 blocksize; |
3516 | int ret; | 3808 | int ret; |
3517 | u32 refs; | 3809 | u32 refs; |
@@ -3538,17 +3830,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3538 | if (path->slots[*level] >= | 3830 | if (path->slots[*level] >= |
3539 | btrfs_header_nritems(cur)) | 3831 | btrfs_header_nritems(cur)) |
3540 | break; | 3832 | break; |
3833 | |||
3834 | /* the new code goes down to level 1 and does all the | ||
3835 | * leaves pointed to that node in bulk. So, this check | ||
3836 | * for level 0 will always be false. | ||
3837 | * | ||
3838 | * But, the disk format allows the drop_snapshot_progress | ||
3839 | * field in the root to leave things in a state where | ||
3840 | * a leaf will need cleaning up here. If someone crashes | ||
3841 | * with the old code and then boots with the new code, | ||
3842 | * we might find a leaf here. | ||
3843 | */ | ||
3541 | if (*level == 0) { | 3844 | if (*level == 0) { |
3542 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 3845 | ret = btrfs_drop_leaf_ref(trans, root, cur); |
3543 | BUG_ON(ret); | 3846 | BUG_ON(ret); |
3544 | break; | 3847 | break; |
3545 | } | 3848 | } |
3849 | |||
3850 | /* | ||
3851 | * once we get to level one, process the whole node | ||
3852 | * at once, including everything below it. | ||
3853 | */ | ||
3854 | if (*level == 1) { | ||
3855 | ret = drop_level_one_refs(trans, root, path); | ||
3856 | BUG_ON(ret); | ||
3857 | break; | ||
3858 | } | ||
3859 | |||
3546 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | 3860 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); |
3547 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 3861 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
3548 | blocksize = btrfs_level_size(root, *level - 1); | 3862 | blocksize = btrfs_level_size(root, *level - 1); |
3549 | 3863 | ||
3550 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3864 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); |
3551 | BUG_ON(ret); | 3865 | BUG_ON(ret); |
3866 | |||
3867 | /* | ||
3868 | * if there is more than one reference, we don't need | ||
3869 | * to read that node to drop any references it has. We | ||
3870 | * just drop the ref we hold on that node and move on to the | ||
3871 | * next slot in this level. | ||
3872 | */ | ||
3552 | if (refs != 1) { | 3873 | if (refs != 1) { |
3553 | parent = path->nodes[*level]; | 3874 | parent = path->nodes[*level]; |
3554 | root_owner = btrfs_header_owner(parent); | 3875 | root_owner = btrfs_header_owner(parent); |
@@ -3567,46 +3888,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3567 | 3888 | ||
3568 | continue; | 3889 | continue; |
3569 | } | 3890 | } |
3891 | |||
3570 | /* | 3892 | /* |
3571 | * at this point, we have a single ref, and since the | 3893 | * we need to keep freeing things in the next level down. |
3572 | * only place referencing this extent is a dead root | 3894 | * read the block and loop around to process it |
3573 | * the reference count should never go higher. | ||
3574 | * So, we don't need to check it again | ||
3575 | */ | 3895 | */ |
3576 | if (*level == 1) { | 3896 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); |
3577 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
3578 | if (ref && ref->generation != ptr_gen) { | ||
3579 | btrfs_free_leaf_ref(root, ref); | ||
3580 | ref = NULL; | ||
3581 | } | ||
3582 | if (ref) { | ||
3583 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
3584 | BUG_ON(ret); | ||
3585 | btrfs_remove_leaf_ref(root, ref); | ||
3586 | btrfs_free_leaf_ref(root, ref); | ||
3587 | *level = 0; | ||
3588 | break; | ||
3589 | } | ||
3590 | } | ||
3591 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
3592 | if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { | ||
3593 | free_extent_buffer(next); | ||
3594 | |||
3595 | next = read_tree_block(root, bytenr, blocksize, | ||
3596 | ptr_gen); | ||
3597 | cond_resched(); | ||
3598 | #if 0 | ||
3599 | /* | ||
3600 | * this is a debugging check and can go away | ||
3601 | * the ref should never go all the way down to 1 | ||
3602 | * at this point | ||
3603 | */ | ||
3604 | ret = lookup_extent_ref(NULL, root, bytenr, blocksize, | ||
3605 | &refs); | ||
3606 | BUG_ON(ret); | ||
3607 | WARN_ON(refs != 1); | ||
3608 | #endif | ||
3609 | } | ||
3610 | WARN_ON(*level <= 0); | 3897 | WARN_ON(*level <= 0); |
3611 | if (path->nodes[*level-1]) | 3898 | if (path->nodes[*level-1]) |
3612 | free_extent_buffer(path->nodes[*level-1]); | 3899 | free_extent_buffer(path->nodes[*level-1]); |
@@ -3631,11 +3918,16 @@ out: | |||
3631 | root_owner = btrfs_header_owner(parent); | 3918 | root_owner = btrfs_header_owner(parent); |
3632 | root_gen = btrfs_header_generation(parent); | 3919 | root_gen = btrfs_header_generation(parent); |
3633 | 3920 | ||
3921 | /* | ||
3922 | * cleanup and free the reference on the last node | ||
3923 | * we processed | ||
3924 | */ | ||
3634 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | 3925 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, |
3635 | parent->start, root_owner, root_gen, | 3926 | parent->start, root_owner, root_gen, |
3636 | *level, 1); | 3927 | *level, 1); |
3637 | free_extent_buffer(path->nodes[*level]); | 3928 | free_extent_buffer(path->nodes[*level]); |
3638 | path->nodes[*level] = NULL; | 3929 | path->nodes[*level] = NULL; |
3930 | |||
3639 | *level += 1; | 3931 | *level += 1; |
3640 | BUG_ON(ret); | 3932 | BUG_ON(ret); |
3641 | 3933 | ||
@@ -3687,6 +3979,7 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3687 | 3979 | ||
3688 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 3980 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); |
3689 | btrfs_tree_lock(next); | 3981 | btrfs_tree_lock(next); |
3982 | btrfs_set_lock_blocking(next); | ||
3690 | 3983 | ||
3691 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | 3984 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, |
3692 | &refs); | 3985 | &refs); |
@@ -3754,6 +4047,13 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3754 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 4047 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { |
3755 | struct extent_buffer *node; | 4048 | struct extent_buffer *node; |
3756 | struct btrfs_disk_key disk_key; | 4049 | struct btrfs_disk_key disk_key; |
4050 | |||
4051 | /* | ||
4052 | * there is more work to do in this level. | ||
4053 | * Update the drop_progress marker to reflect | ||
4054 | * the work we've done so far, and then bump | ||
4055 | * the slot number | ||
4056 | */ | ||
3757 | node = path->nodes[i]; | 4057 | node = path->nodes[i]; |
3758 | path->slots[i]++; | 4058 | path->slots[i]++; |
3759 | *level = i; | 4059 | *level = i; |
@@ -3765,6 +4065,11 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3765 | return 0; | 4065 | return 0; |
3766 | } else { | 4066 | } else { |
3767 | struct extent_buffer *parent; | 4067 | struct extent_buffer *parent; |
4068 | |||
4069 | /* | ||
4070 | * this whole node is done, free our reference | ||
4071 | * on it and go up one level | ||
4072 | */ | ||
3768 | if (path->nodes[*level] == root->node) | 4073 | if (path->nodes[*level] == root->node) |
3769 | parent = path->nodes[*level]; | 4074 | parent = path->nodes[*level]; |
3770 | else | 4075 | else |
@@ -4444,7 +4749,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | |||
4444 | u64 lock_end = 0; | 4749 | u64 lock_end = 0; |
4445 | u64 num_bytes; | 4750 | u64 num_bytes; |
4446 | u64 ext_offset; | 4751 | u64 ext_offset; |
4447 | u64 first_pos; | 4752 | u64 search_end = (u64)-1; |
4448 | u32 nritems; | 4753 | u32 nritems; |
4449 | int nr_scaned = 0; | 4754 | int nr_scaned = 0; |
4450 | int extent_locked = 0; | 4755 | int extent_locked = 0; |
@@ -4452,7 +4757,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | |||
4452 | int ret; | 4757 | int ret; |
4453 | 4758 | ||
4454 | memcpy(&key, leaf_key, sizeof(key)); | 4759 | memcpy(&key, leaf_key, sizeof(key)); |
4455 | first_pos = INT_LIMIT(loff_t) - extent_key->offset; | ||
4456 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | 4760 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { |
4457 | if (key.objectid < ref_path->owner_objectid || | 4761 | if (key.objectid < ref_path->owner_objectid || |
4458 | (key.objectid == ref_path->owner_objectid && | 4762 | (key.objectid == ref_path->owner_objectid && |
@@ -4501,7 +4805,7 @@ next: | |||
4501 | if ((key.objectid > ref_path->owner_objectid) || | 4805 | if ((key.objectid > ref_path->owner_objectid) || |
4502 | (key.objectid == ref_path->owner_objectid && | 4806 | (key.objectid == ref_path->owner_objectid && |
4503 | key.type > BTRFS_EXTENT_DATA_KEY) || | 4807 | key.type > BTRFS_EXTENT_DATA_KEY) || |
4504 | (key.offset >= first_pos + extent_key->offset)) | 4808 | key.offset >= search_end) |
4505 | break; | 4809 | break; |
4506 | } | 4810 | } |
4507 | 4811 | ||
@@ -4534,8 +4838,10 @@ next: | |||
4534 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | 4838 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
4535 | ext_offset = btrfs_file_extent_offset(leaf, fi); | 4839 | ext_offset = btrfs_file_extent_offset(leaf, fi); |
4536 | 4840 | ||
4537 | if (first_pos > key.offset - ext_offset) | 4841 | if (search_end == (u64)-1) { |
4538 | first_pos = key.offset - ext_offset; | 4842 | search_end = key.offset - ext_offset + |
4843 | btrfs_file_extent_ram_bytes(leaf, fi); | ||
4844 | } | ||
4539 | 4845 | ||
4540 | if (!extent_locked) { | 4846 | if (!extent_locked) { |
4541 | lock_start = key.offset; | 4847 | lock_start = key.offset; |
@@ -4724,7 +5030,7 @@ next: | |||
4724 | } | 5030 | } |
4725 | skip: | 5031 | skip: |
4726 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | 5032 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && |
4727 | key.offset >= first_pos + extent_key->offset) | 5033 | key.offset >= search_end) |
4728 | break; | 5034 | break; |
4729 | 5035 | ||
4730 | cond_resched(); | 5036 | cond_resched(); |
@@ -4778,6 +5084,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | |||
4778 | ref->bytenr = buf->start; | 5084 | ref->bytenr = buf->start; |
4779 | ref->owner = btrfs_header_owner(buf); | 5085 | ref->owner = btrfs_header_owner(buf); |
4780 | ref->generation = btrfs_header_generation(buf); | 5086 | ref->generation = btrfs_header_generation(buf); |
5087 | |||
4781 | ret = btrfs_add_leaf_ref(root, ref, 0); | 5088 | ret = btrfs_add_leaf_ref(root, ref, 0); |
4782 | WARN_ON(ret); | 5089 | WARN_ON(ret); |
4783 | btrfs_free_leaf_ref(root, ref); | 5090 | btrfs_free_leaf_ref(root, ref); |
@@ -5351,7 +5658,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
5351 | prev_block = block_start; | 5658 | prev_block = block_start; |
5352 | } | 5659 | } |
5353 | 5660 | ||
5661 | mutex_lock(&extent_root->fs_info->trans_mutex); | ||
5354 | btrfs_record_root_in_trans(found_root); | 5662 | btrfs_record_root_in_trans(found_root); |
5663 | mutex_unlock(&extent_root->fs_info->trans_mutex); | ||
5355 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 5664 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
5356 | /* | 5665 | /* |
5357 | * try to update data extent references while | 5666 | * try to update data extent references while |
@@ -5957,9 +6266,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
5957 | path = btrfs_alloc_path(); | 6266 | path = btrfs_alloc_path(); |
5958 | BUG_ON(!path); | 6267 | BUG_ON(!path); |
5959 | 6268 | ||
5960 | btrfs_remove_free_space_cache(block_group); | 6269 | spin_lock(&root->fs_info->block_group_cache_lock); |
5961 | rb_erase(&block_group->cache_node, | 6270 | rb_erase(&block_group->cache_node, |
5962 | &root->fs_info->block_group_cache_tree); | 6271 | &root->fs_info->block_group_cache_tree); |
6272 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
6273 | btrfs_remove_free_space_cache(block_group); | ||
5963 | down_write(&block_group->space_info->groups_sem); | 6274 | down_write(&block_group->space_info->groups_sem); |
5964 | list_del(&block_group->list); | 6275 | list_del(&block_group->list); |
5965 | up_write(&block_group->space_info->groups_sem); | 6276 | up_write(&block_group->space_info->groups_sem); |