diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 818 |
1 files changed, 695 insertions, 123 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 293da650873..fefe83ad205 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -19,7 +19,8 @@ | |||
19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/version.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | ||
23 | #include "compat.h" | 24 | #include "compat.h" |
24 | #include "hash.h" | 25 | #include "hash.h" |
25 | #include "crc32c.h" | 26 | #include "crc32c.h" |
@@ -30,7 +31,6 @@ | |||
30 | #include "volumes.h" | 31 | #include "volumes.h" |
31 | #include "locking.h" | 32 | #include "locking.h" |
32 | #include "ref-cache.h" | 33 | #include "ref-cache.h" |
33 | #include "compat.h" | ||
34 | 34 | ||
35 | #define PENDING_EXTENT_INSERT 0 | 35 | #define PENDING_EXTENT_INSERT 0 |
36 | #define PENDING_EXTENT_DELETE 1 | 36 | #define PENDING_EXTENT_DELETE 1 |
@@ -61,6 +61,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
61 | u64 bytenr, u64 num_bytes, int alloc, | 61 | u64 bytenr, u64 num_bytes, int alloc, |
62 | int mark_free); | 62 | int mark_free); |
63 | 63 | ||
64 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *extent_root, u64 alloc_bytes, | ||
66 | u64 flags, int force); | ||
67 | |||
64 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 68 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
65 | { | 69 | { |
66 | return (cache->flags & bits) == bits; | 70 | return (cache->flags & bits) == bits; |
@@ -326,16 +330,34 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
326 | u64 flags) | 330 | u64 flags) |
327 | { | 331 | { |
328 | struct list_head *head = &info->space_info; | 332 | struct list_head *head = &info->space_info; |
329 | struct list_head *cur; | ||
330 | struct btrfs_space_info *found; | 333 | struct btrfs_space_info *found; |
331 | list_for_each(cur, head) { | 334 | |
332 | found = list_entry(cur, struct btrfs_space_info, list); | 335 | rcu_read_lock(); |
333 | if (found->flags == flags) | 336 | list_for_each_entry_rcu(found, head, list) { |
337 | if (found->flags == flags) { | ||
338 | rcu_read_unlock(); | ||
334 | return found; | 339 | return found; |
340 | } | ||
335 | } | 341 | } |
342 | rcu_read_unlock(); | ||
336 | return NULL; | 343 | return NULL; |
337 | } | 344 | } |
338 | 345 | ||
346 | /* | ||
347 | * after adding space to the filesystem, we need to clear the full flags | ||
348 | * on all the space infos. | ||
349 | */ | ||
350 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | ||
351 | { | ||
352 | struct list_head *head = &info->space_info; | ||
353 | struct btrfs_space_info *found; | ||
354 | |||
355 | rcu_read_lock(); | ||
356 | list_for_each_entry_rcu(found, head, list) | ||
357 | found->full = 0; | ||
358 | rcu_read_unlock(); | ||
359 | } | ||
360 | |||
339 | static u64 div_factor(u64 num, int factor) | 361 | static u64 div_factor(u64 num, int factor) |
340 | { | 362 | { |
341 | if (factor == 10) | 363 | if (factor == 10) |
@@ -1326,8 +1348,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1326 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 1348 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, |
1327 | struct btrfs_root *root) | 1349 | struct btrfs_root *root) |
1328 | { | 1350 | { |
1329 | finish_current_insert(trans, root->fs_info->extent_root, 1); | 1351 | u64 start; |
1330 | del_pending_extents(trans, root->fs_info->extent_root, 1); | 1352 | u64 end; |
1353 | int ret; | ||
1354 | |||
1355 | while(1) { | ||
1356 | finish_current_insert(trans, root->fs_info->extent_root, 1); | ||
1357 | del_pending_extents(trans, root->fs_info->extent_root, 1); | ||
1358 | |||
1359 | /* is there more work to do? */ | ||
1360 | ret = find_first_extent_bit(&root->fs_info->pending_del, | ||
1361 | 0, &start, &end, EXTENT_WRITEBACK); | ||
1362 | if (!ret) | ||
1363 | continue; | ||
1364 | ret = find_first_extent_bit(&root->fs_info->extent_ins, | ||
1365 | 0, &start, &end, EXTENT_WRITEBACK); | ||
1366 | if (!ret) | ||
1367 | continue; | ||
1368 | break; | ||
1369 | } | ||
1331 | return 0; | 1370 | return 0; |
1332 | } | 1371 | } |
1333 | 1372 | ||
@@ -1525,15 +1564,55 @@ out: | |||
1525 | return ret; | 1564 | return ret; |
1526 | } | 1565 | } |
1527 | 1566 | ||
1528 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1567 | /* when a block goes through cow, we update the reference counts of |
1529 | struct extent_buffer *orig_buf, struct extent_buffer *buf, | 1568 | * everything that block points to. The internal pointers of the block |
1530 | u32 *nr_extents) | 1569 | * can be in just about any order, and it is likely to have clusters of |
1570 | * things that are close together and clusters of things that are not. | ||
1571 | * | ||
1572 | * To help reduce the seeks that come with updating all of these reference | ||
1573 | * counts, sort them by byte number before actual updates are done. | ||
1574 | * | ||
1575 | * struct refsort is used to match byte number to slot in the btree block. | ||
1576 | * we sort based on the byte number and then use the slot to actually | ||
1577 | * find the item. | ||
1578 | * | ||
1579 | * struct refsort is smaller than strcut btrfs_item and smaller than | ||
1580 | * struct btrfs_key_ptr. Since we're currently limited to the page size | ||
1581 | * for a btree block, there's no way for a kmalloc of refsorts for a | ||
1582 | * single node to be bigger than a page. | ||
1583 | */ | ||
1584 | struct refsort { | ||
1585 | u64 bytenr; | ||
1586 | u32 slot; | ||
1587 | }; | ||
1588 | |||
1589 | /* | ||
1590 | * for passing into sort() | ||
1591 | */ | ||
1592 | static int refsort_cmp(const void *a_void, const void *b_void) | ||
1593 | { | ||
1594 | const struct refsort *a = a_void; | ||
1595 | const struct refsort *b = b_void; | ||
1596 | |||
1597 | if (a->bytenr < b->bytenr) | ||
1598 | return -1; | ||
1599 | if (a->bytenr > b->bytenr) | ||
1600 | return 1; | ||
1601 | return 0; | ||
1602 | } | ||
1603 | |||
1604 | |||
1605 | noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | ||
1606 | struct btrfs_root *root, | ||
1607 | struct extent_buffer *orig_buf, | ||
1608 | struct extent_buffer *buf, u32 *nr_extents) | ||
1531 | { | 1609 | { |
1532 | u64 bytenr; | 1610 | u64 bytenr; |
1533 | u64 ref_root; | 1611 | u64 ref_root; |
1534 | u64 orig_root; | 1612 | u64 orig_root; |
1535 | u64 ref_generation; | 1613 | u64 ref_generation; |
1536 | u64 orig_generation; | 1614 | u64 orig_generation; |
1615 | struct refsort *sorted; | ||
1537 | u32 nritems; | 1616 | u32 nritems; |
1538 | u32 nr_file_extents = 0; | 1617 | u32 nr_file_extents = 0; |
1539 | struct btrfs_key key; | 1618 | struct btrfs_key key; |
@@ -1542,6 +1621,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1542 | int level; | 1621 | int level; |
1543 | int ret = 0; | 1622 | int ret = 0; |
1544 | int faili = 0; | 1623 | int faili = 0; |
1624 | int refi = 0; | ||
1625 | int slot; | ||
1545 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 1626 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1546 | u64, u64, u64, u64, u64, u64, u64, u64); | 1627 | u64, u64, u64, u64, u64, u64, u64, u64); |
1547 | 1628 | ||
@@ -1553,6 +1634,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1553 | nritems = btrfs_header_nritems(buf); | 1634 | nritems = btrfs_header_nritems(buf); |
1554 | level = btrfs_header_level(buf); | 1635 | level = btrfs_header_level(buf); |
1555 | 1636 | ||
1637 | sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); | ||
1638 | BUG_ON(!sorted); | ||
1639 | |||
1556 | if (root->ref_cows) { | 1640 | if (root->ref_cows) { |
1557 | process_func = __btrfs_inc_extent_ref; | 1641 | process_func = __btrfs_inc_extent_ref; |
1558 | } else { | 1642 | } else { |
@@ -1565,6 +1649,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1565 | process_func = __btrfs_update_extent_ref; | 1649 | process_func = __btrfs_update_extent_ref; |
1566 | } | 1650 | } |
1567 | 1651 | ||
1652 | /* | ||
1653 | * we make two passes through the items. In the first pass we | ||
1654 | * only record the byte number and slot. Then we sort based on | ||
1655 | * byte number and do the actual work based on the sorted results | ||
1656 | */ | ||
1568 | for (i = 0; i < nritems; i++) { | 1657 | for (i = 0; i < nritems; i++) { |
1569 | cond_resched(); | 1658 | cond_resched(); |
1570 | if (level == 0) { | 1659 | if (level == 0) { |
@@ -1581,6 +1670,32 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1581 | continue; | 1670 | continue; |
1582 | 1671 | ||
1583 | nr_file_extents++; | 1672 | nr_file_extents++; |
1673 | sorted[refi].bytenr = bytenr; | ||
1674 | sorted[refi].slot = i; | ||
1675 | refi++; | ||
1676 | } else { | ||
1677 | bytenr = btrfs_node_blockptr(buf, i); | ||
1678 | sorted[refi].bytenr = bytenr; | ||
1679 | sorted[refi].slot = i; | ||
1680 | refi++; | ||
1681 | } | ||
1682 | } | ||
1683 | /* | ||
1684 | * if refi == 0, we didn't actually put anything into the sorted | ||
1685 | * array and we're done | ||
1686 | */ | ||
1687 | if (refi == 0) | ||
1688 | goto out; | ||
1689 | |||
1690 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
1691 | |||
1692 | for (i = 0; i < refi; i++) { | ||
1693 | cond_resched(); | ||
1694 | slot = sorted[i].slot; | ||
1695 | bytenr = sorted[i].bytenr; | ||
1696 | |||
1697 | if (level == 0) { | ||
1698 | btrfs_item_key_to_cpu(buf, &key, slot); | ||
1584 | 1699 | ||
1585 | ret = process_func(trans, root, bytenr, | 1700 | ret = process_func(trans, root, bytenr, |
1586 | orig_buf->start, buf->start, | 1701 | orig_buf->start, buf->start, |
@@ -1589,25 +1704,25 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1589 | key.objectid); | 1704 | key.objectid); |
1590 | 1705 | ||
1591 | if (ret) { | 1706 | if (ret) { |
1592 | faili = i; | 1707 | faili = slot; |
1593 | WARN_ON(1); | 1708 | WARN_ON(1); |
1594 | goto fail; | 1709 | goto fail; |
1595 | } | 1710 | } |
1596 | } else { | 1711 | } else { |
1597 | bytenr = btrfs_node_blockptr(buf, i); | ||
1598 | ret = process_func(trans, root, bytenr, | 1712 | ret = process_func(trans, root, bytenr, |
1599 | orig_buf->start, buf->start, | 1713 | orig_buf->start, buf->start, |
1600 | orig_root, ref_root, | 1714 | orig_root, ref_root, |
1601 | orig_generation, ref_generation, | 1715 | orig_generation, ref_generation, |
1602 | level - 1); | 1716 | level - 1); |
1603 | if (ret) { | 1717 | if (ret) { |
1604 | faili = i; | 1718 | faili = slot; |
1605 | WARN_ON(1); | 1719 | WARN_ON(1); |
1606 | goto fail; | 1720 | goto fail; |
1607 | } | 1721 | } |
1608 | } | 1722 | } |
1609 | } | 1723 | } |
1610 | out: | 1724 | out: |
1725 | kfree(sorted); | ||
1611 | if (nr_extents) { | 1726 | if (nr_extents) { |
1612 | if (level == 0) | 1727 | if (level == 0) |
1613 | *nr_extents = nr_file_extents; | 1728 | *nr_extents = nr_file_extents; |
@@ -1616,6 +1731,7 @@ out: | |||
1616 | } | 1731 | } |
1617 | return 0; | 1732 | return 0; |
1618 | fail: | 1733 | fail: |
1734 | kfree(sorted); | ||
1619 | WARN_ON(1); | 1735 | WARN_ON(1); |
1620 | return ret; | 1736 | return ret; |
1621 | } | 1737 | } |
@@ -1808,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1808 | if (!found) | 1924 | if (!found) |
1809 | return -ENOMEM; | 1925 | return -ENOMEM; |
1810 | 1926 | ||
1811 | list_add(&found->list, &info->space_info); | ||
1812 | INIT_LIST_HEAD(&found->block_groups); | 1927 | INIT_LIST_HEAD(&found->block_groups); |
1813 | init_rwsem(&found->groups_sem); | 1928 | init_rwsem(&found->groups_sem); |
1814 | spin_lock_init(&found->lock); | 1929 | spin_lock_init(&found->lock); |
@@ -1818,9 +1933,11 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
1818 | found->bytes_pinned = 0; | 1933 | found->bytes_pinned = 0; |
1819 | found->bytes_reserved = 0; | 1934 | found->bytes_reserved = 0; |
1820 | found->bytes_readonly = 0; | 1935 | found->bytes_readonly = 0; |
1936 | found->bytes_delalloc = 0; | ||
1821 | found->full = 0; | 1937 | found->full = 0; |
1822 | found->force_alloc = 0; | 1938 | found->force_alloc = 0; |
1823 | *space_info = found; | 1939 | *space_info = found; |
1940 | list_add_rcu(&found->list, &info->space_info); | ||
1824 | return 0; | 1941 | return 0; |
1825 | } | 1942 | } |
1826 | 1943 | ||
@@ -1881,6 +1998,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
1881 | return flags; | 1998 | return flags; |
1882 | } | 1999 | } |
1883 | 2000 | ||
2001 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | ||
2002 | { | ||
2003 | struct btrfs_fs_info *info = root->fs_info; | ||
2004 | u64 alloc_profile; | ||
2005 | |||
2006 | if (data) { | ||
2007 | alloc_profile = info->avail_data_alloc_bits & | ||
2008 | info->data_alloc_profile; | ||
2009 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
2010 | } else if (root == root->fs_info->chunk_root) { | ||
2011 | alloc_profile = info->avail_system_alloc_bits & | ||
2012 | info->system_alloc_profile; | ||
2013 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
2014 | } else { | ||
2015 | alloc_profile = info->avail_metadata_alloc_bits & | ||
2016 | info->metadata_alloc_profile; | ||
2017 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
2018 | } | ||
2019 | |||
2020 | return btrfs_reduce_alloc_profile(root, data); | ||
2021 | } | ||
2022 | |||
2023 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
2024 | { | ||
2025 | u64 alloc_target; | ||
2026 | |||
2027 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2028 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
2029 | alloc_target); | ||
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * for now this just makes sure we have at least 5% of our metadata space free | ||
2034 | * for use. | ||
2035 | */ | ||
2036 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | ||
2037 | { | ||
2038 | struct btrfs_fs_info *info = root->fs_info; | ||
2039 | struct btrfs_space_info *meta_sinfo; | ||
2040 | u64 alloc_target, thresh; | ||
2041 | int committed = 0, ret; | ||
2042 | |||
2043 | /* get the space info for where the metadata will live */ | ||
2044 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2045 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2046 | |||
2047 | again: | ||
2048 | spin_lock(&meta_sinfo->lock); | ||
2049 | if (!meta_sinfo->full) | ||
2050 | thresh = meta_sinfo->total_bytes * 80; | ||
2051 | else | ||
2052 | thresh = meta_sinfo->total_bytes * 95; | ||
2053 | |||
2054 | do_div(thresh, 100); | ||
2055 | |||
2056 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2057 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | ||
2058 | struct btrfs_trans_handle *trans; | ||
2059 | if (!meta_sinfo->full) { | ||
2060 | meta_sinfo->force_alloc = 1; | ||
2061 | spin_unlock(&meta_sinfo->lock); | ||
2062 | |||
2063 | trans = btrfs_start_transaction(root, 1); | ||
2064 | if (!trans) | ||
2065 | return -ENOMEM; | ||
2066 | |||
2067 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2068 | 2 * 1024 * 1024, alloc_target, 0); | ||
2069 | btrfs_end_transaction(trans, root); | ||
2070 | goto again; | ||
2071 | } | ||
2072 | spin_unlock(&meta_sinfo->lock); | ||
2073 | |||
2074 | if (!committed) { | ||
2075 | committed = 1; | ||
2076 | trans = btrfs_join_transaction(root, 1); | ||
2077 | if (!trans) | ||
2078 | return -ENOMEM; | ||
2079 | ret = btrfs_commit_transaction(trans, root); | ||
2080 | if (ret) | ||
2081 | return ret; | ||
2082 | goto again; | ||
2083 | } | ||
2084 | return -ENOSPC; | ||
2085 | } | ||
2086 | spin_unlock(&meta_sinfo->lock); | ||
2087 | |||
2088 | return 0; | ||
2089 | } | ||
2090 | |||
2091 | /* | ||
2092 | * This will check the space that the inode allocates from to make sure we have | ||
2093 | * enough space for bytes. | ||
2094 | */ | ||
2095 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | ||
2096 | u64 bytes) | ||
2097 | { | ||
2098 | struct btrfs_space_info *data_sinfo; | ||
2099 | int ret = 0, committed = 0; | ||
2100 | |||
2101 | /* make sure bytes are sectorsize aligned */ | ||
2102 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
2103 | |||
2104 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2105 | again: | ||
2106 | /* make sure we have enough space to handle the data first */ | ||
2107 | spin_lock(&data_sinfo->lock); | ||
2108 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | ||
2109 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | ||
2110 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | ||
2111 | data_sinfo->bytes_may_use < bytes) { | ||
2112 | struct btrfs_trans_handle *trans; | ||
2113 | |||
2114 | /* | ||
2115 | * if we don't have enough free bytes in this space then we need | ||
2116 | * to alloc a new chunk. | ||
2117 | */ | ||
2118 | if (!data_sinfo->full) { | ||
2119 | u64 alloc_target; | ||
2120 | |||
2121 | data_sinfo->force_alloc = 1; | ||
2122 | spin_unlock(&data_sinfo->lock); | ||
2123 | |||
2124 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2125 | trans = btrfs_start_transaction(root, 1); | ||
2126 | if (!trans) | ||
2127 | return -ENOMEM; | ||
2128 | |||
2129 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2130 | bytes + 2 * 1024 * 1024, | ||
2131 | alloc_target, 0); | ||
2132 | btrfs_end_transaction(trans, root); | ||
2133 | if (ret) | ||
2134 | return ret; | ||
2135 | goto again; | ||
2136 | } | ||
2137 | spin_unlock(&data_sinfo->lock); | ||
2138 | |||
2139 | /* commit the current transaction and try again */ | ||
2140 | if (!committed) { | ||
2141 | committed = 1; | ||
2142 | trans = btrfs_join_transaction(root, 1); | ||
2143 | if (!trans) | ||
2144 | return -ENOMEM; | ||
2145 | ret = btrfs_commit_transaction(trans, root); | ||
2146 | if (ret) | ||
2147 | return ret; | ||
2148 | goto again; | ||
2149 | } | ||
2150 | |||
2151 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | ||
2152 | ", %llu bytes_used, %llu bytes_reserved, " | ||
2153 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | ||
2154 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | ||
2155 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | ||
2156 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | ||
2157 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | ||
2158 | return -ENOSPC; | ||
2159 | } | ||
2160 | data_sinfo->bytes_may_use += bytes; | ||
2161 | BTRFS_I(inode)->reserved_bytes += bytes; | ||
2162 | spin_unlock(&data_sinfo->lock); | ||
2163 | |||
2164 | return btrfs_check_metadata_free_space(root); | ||
2165 | } | ||
2166 | |||
2167 | /* | ||
2168 | * if there was an error for whatever reason after calling | ||
2169 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | ||
2170 | */ | ||
2171 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | ||
2172 | struct inode *inode, u64 bytes) | ||
2173 | { | ||
2174 | struct btrfs_space_info *data_sinfo; | ||
2175 | |||
2176 | /* make sure bytes are sectorsize aligned */ | ||
2177 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
2178 | |||
2179 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2180 | spin_lock(&data_sinfo->lock); | ||
2181 | data_sinfo->bytes_may_use -= bytes; | ||
2182 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
2183 | spin_unlock(&data_sinfo->lock); | ||
2184 | } | ||
2185 | |||
2186 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
2187 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
2188 | u64 bytes) | ||
2189 | { | ||
2190 | struct btrfs_space_info *data_sinfo; | ||
2191 | |||
2192 | /* get the space info for where this inode will be storing its data */ | ||
2193 | data_sinfo = BTRFS_I(inode)->space_info; | ||
2194 | |||
2195 | /* make sure we have enough space to handle the data first */ | ||
2196 | spin_lock(&data_sinfo->lock); | ||
2197 | data_sinfo->bytes_delalloc += bytes; | ||
2198 | |||
2199 | /* | ||
2200 | * we are adding a delalloc extent without calling | ||
2201 | * btrfs_check_data_free_space first. This happens on a weird | ||
2202 | * writepage condition, but shouldn't hurt our accounting | ||
2203 | */ | ||
2204 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
2205 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
2206 | BTRFS_I(inode)->reserved_bytes = 0; | ||
2207 | } else { | ||
2208 | data_sinfo->bytes_may_use -= bytes; | ||
2209 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
2210 | } | ||
2211 | |||
2212 | spin_unlock(&data_sinfo->lock); | ||
2213 | } | ||
2214 | |||
2215 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
2216 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
2217 | u64 bytes) | ||
2218 | { | ||
2219 | struct btrfs_space_info *info; | ||
2220 | |||
2221 | info = BTRFS_I(inode)->space_info; | ||
2222 | |||
2223 | spin_lock(&info->lock); | ||
2224 | info->bytes_delalloc -= bytes; | ||
2225 | spin_unlock(&info->lock); | ||
2226 | } | ||
2227 | |||
1884 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 2228 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1885 | struct btrfs_root *extent_root, u64 alloc_bytes, | 2229 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1886 | u64 flags, int force) | 2230 | u64 flags, int force) |
@@ -2137,13 +2481,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, | |||
2137 | u64 end; | 2481 | u64 end; |
2138 | u64 priv; | 2482 | u64 priv; |
2139 | u64 search = 0; | 2483 | u64 search = 0; |
2140 | u64 skipped = 0; | ||
2141 | struct btrfs_fs_info *info = extent_root->fs_info; | 2484 | struct btrfs_fs_info *info = extent_root->fs_info; |
2142 | struct btrfs_path *path; | 2485 | struct btrfs_path *path; |
2143 | struct pending_extent_op *extent_op, *tmp; | 2486 | struct pending_extent_op *extent_op, *tmp; |
2144 | struct list_head insert_list, update_list; | 2487 | struct list_head insert_list, update_list; |
2145 | int ret; | 2488 | int ret; |
2146 | int num_inserts = 0, max_inserts; | 2489 | int num_inserts = 0, max_inserts, restart = 0; |
2147 | 2490 | ||
2148 | path = btrfs_alloc_path(); | 2491 | path = btrfs_alloc_path(); |
2149 | INIT_LIST_HEAD(&insert_list); | 2492 | INIT_LIST_HEAD(&insert_list); |
@@ -2159,18 +2502,19 @@ again: | |||
2159 | ret = find_first_extent_bit(&info->extent_ins, search, &start, | 2502 | ret = find_first_extent_bit(&info->extent_ins, search, &start, |
2160 | &end, EXTENT_WRITEBACK); | 2503 | &end, EXTENT_WRITEBACK); |
2161 | if (ret) { | 2504 | if (ret) { |
2162 | if (skipped && all && !num_inserts) { | 2505 | if (restart && !num_inserts && |
2163 | skipped = 0; | 2506 | list_empty(&update_list)) { |
2507 | restart = 0; | ||
2164 | search = 0; | 2508 | search = 0; |
2165 | continue; | 2509 | continue; |
2166 | } | 2510 | } |
2167 | mutex_unlock(&info->extent_ins_mutex); | ||
2168 | break; | 2511 | break; |
2169 | } | 2512 | } |
2170 | 2513 | ||
2171 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); | 2514 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); |
2172 | if (!ret) { | 2515 | if (!ret) { |
2173 | skipped = 1; | 2516 | if (all) |
2517 | restart = 1; | ||
2174 | search = end + 1; | 2518 | search = end + 1; |
2175 | if (need_resched()) { | 2519 | if (need_resched()) { |
2176 | mutex_unlock(&info->extent_ins_mutex); | 2520 | mutex_unlock(&info->extent_ins_mutex); |
@@ -2189,7 +2533,7 @@ again: | |||
2189 | list_add_tail(&extent_op->list, &insert_list); | 2533 | list_add_tail(&extent_op->list, &insert_list); |
2190 | search = end + 1; | 2534 | search = end + 1; |
2191 | if (num_inserts == max_inserts) { | 2535 | if (num_inserts == max_inserts) { |
2192 | mutex_unlock(&info->extent_ins_mutex); | 2536 | restart = 1; |
2193 | break; | 2537 | break; |
2194 | } | 2538 | } |
2195 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | 2539 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { |
@@ -2205,7 +2549,6 @@ again: | |||
2205 | * somebody marked this thing for deletion then just unlock it and be | 2549 | * somebody marked this thing for deletion then just unlock it and be |
2206 | * done, the free_extents will handle it | 2550 | * done, the free_extents will handle it |
2207 | */ | 2551 | */ |
2208 | mutex_lock(&info->extent_ins_mutex); | ||
2209 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { | 2552 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { |
2210 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | 2553 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, |
2211 | extent_op->bytenr + extent_op->num_bytes - 1, | 2554 | extent_op->bytenr + extent_op->num_bytes - 1, |
@@ -2227,6 +2570,10 @@ again: | |||
2227 | if (!list_empty(&update_list)) { | 2570 | if (!list_empty(&update_list)) { |
2228 | ret = update_backrefs(trans, extent_root, path, &update_list); | 2571 | ret = update_backrefs(trans, extent_root, path, &update_list); |
2229 | BUG_ON(ret); | 2572 | BUG_ON(ret); |
2573 | |||
2574 | /* we may have COW'ed new blocks, so lets start over */ | ||
2575 | if (all) | ||
2576 | restart = 1; | ||
2230 | } | 2577 | } |
2231 | 2578 | ||
2232 | /* | 2579 | /* |
@@ -2234,9 +2581,9 @@ again: | |||
2234 | * need to make sure everything is cleaned then reset everything and | 2581 | * need to make sure everything is cleaned then reset everything and |
2235 | * go back to the beginning | 2582 | * go back to the beginning |
2236 | */ | 2583 | */ |
2237 | if (!num_inserts && all && skipped) { | 2584 | if (!num_inserts && restart) { |
2238 | search = 0; | 2585 | search = 0; |
2239 | skipped = 0; | 2586 | restart = 0; |
2240 | INIT_LIST_HEAD(&update_list); | 2587 | INIT_LIST_HEAD(&update_list); |
2241 | INIT_LIST_HEAD(&insert_list); | 2588 | INIT_LIST_HEAD(&insert_list); |
2242 | goto again; | 2589 | goto again; |
@@ -2293,27 +2640,19 @@ again: | |||
2293 | BUG_ON(ret); | 2640 | BUG_ON(ret); |
2294 | 2641 | ||
2295 | /* | 2642 | /* |
2296 | * if we broke out of the loop in order to insert stuff because we hit | 2643 | * if restart is set for whatever reason we need to go back and start |
2297 | * the maximum number of inserts at a time we can handle, then loop | 2644 | * searching through the pending list again. |
2298 | * back and pick up where we left off | 2645 | * |
2299 | */ | 2646 | * We just inserted some extents, which could have resulted in new |
2300 | if (num_inserts == max_inserts) { | 2647 | * blocks being allocated, which would result in new blocks needing |
2301 | INIT_LIST_HEAD(&insert_list); | 2648 | * updates, so if all is set we _must_ restart to get the updated |
2302 | INIT_LIST_HEAD(&update_list); | 2649 | * blocks. |
2303 | num_inserts = 0; | ||
2304 | goto again; | ||
2305 | } | ||
2306 | |||
2307 | /* | ||
2308 | * again, if we need to make absolutely sure there are no more pending | ||
2309 | * extent operations left and we know that we skipped some, go back to | ||
2310 | * the beginning and do it all again | ||
2311 | */ | 2650 | */ |
2312 | if (all && skipped) { | 2651 | if (restart || all) { |
2313 | INIT_LIST_HEAD(&insert_list); | 2652 | INIT_LIST_HEAD(&insert_list); |
2314 | INIT_LIST_HEAD(&update_list); | 2653 | INIT_LIST_HEAD(&update_list); |
2315 | search = 0; | 2654 | search = 0; |
2316 | skipped = 0; | 2655 | restart = 0; |
2317 | num_inserts = 0; | 2656 | num_inserts = 0; |
2318 | goto again; | 2657 | goto again; |
2319 | } | 2658 | } |
@@ -2547,6 +2886,7 @@ again: | |||
2547 | if (ret) { | 2886 | if (ret) { |
2548 | if (all && skipped && !nr) { | 2887 | if (all && skipped && !nr) { |
2549 | search = 0; | 2888 | search = 0; |
2889 | skipped = 0; | ||
2550 | continue; | 2890 | continue; |
2551 | } | 2891 | } |
2552 | mutex_unlock(&info->extent_ins_mutex); | 2892 | mutex_unlock(&info->extent_ins_mutex); |
@@ -2633,6 +2973,8 @@ again: | |||
2633 | goto again; | 2973 | goto again; |
2634 | } | 2974 | } |
2635 | 2975 | ||
2976 | if (!err) | ||
2977 | finish_current_insert(trans, extent_root, 0); | ||
2636 | return err; | 2978 | return err; |
2637 | } | 2979 | } |
2638 | 2980 | ||
@@ -2700,13 +3042,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
2700 | /* if metadata always pin */ | 3042 | /* if metadata always pin */ |
2701 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | 3043 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { |
2702 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 3044 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
2703 | struct btrfs_block_group_cache *cache; | 3045 | mutex_lock(&root->fs_info->pinned_mutex); |
2704 | 3046 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | |
2705 | /* btrfs_free_reserved_extent */ | 3047 | mutex_unlock(&root->fs_info->pinned_mutex); |
2706 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
2707 | BUG_ON(!cache); | ||
2708 | btrfs_add_free_space(cache, bytenr, num_bytes); | ||
2709 | put_block_group(cache); | ||
2710 | update_reserved_extents(root, bytenr, num_bytes, 0); | 3048 | update_reserved_extents(root, bytenr, num_bytes, 0); |
2711 | return 0; | 3049 | return 0; |
2712 | } | 3050 | } |
@@ -2787,7 +3125,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
2787 | 3125 | ||
2788 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 3126 | if (data & BTRFS_BLOCK_GROUP_METADATA) { |
2789 | last_ptr = &root->fs_info->last_alloc; | 3127 | last_ptr = &root->fs_info->last_alloc; |
2790 | empty_cluster = 64 * 1024; | 3128 | if (!btrfs_test_opt(root, SSD)) |
3129 | empty_cluster = 64 * 1024; | ||
2791 | } | 3130 | } |
2792 | 3131 | ||
2793 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) | 3132 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) |
@@ -3014,16 +3353,18 @@ loop_check: | |||
3014 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 3353 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) |
3015 | { | 3354 | { |
3016 | struct btrfs_block_group_cache *cache; | 3355 | struct btrfs_block_group_cache *cache; |
3017 | struct list_head *l; | ||
3018 | 3356 | ||
3019 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 3357 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
3020 | (unsigned long long)(info->total_bytes - info->bytes_used - | 3358 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3021 | info->bytes_pinned - info->bytes_reserved), | 3359 | info->bytes_pinned - info->bytes_reserved), |
3022 | (info->full) ? "" : "not "); | 3360 | (info->full) ? "" : "not "); |
3361 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | ||
3362 | " may_use=%llu, used=%llu\n", info->total_bytes, | ||
3363 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | ||
3364 | info->bytes_used); | ||
3023 | 3365 | ||
3024 | down_read(&info->groups_sem); | 3366 | down_read(&info->groups_sem); |
3025 | list_for_each(l, &info->block_groups) { | 3367 | list_for_each_entry(cache, &info->block_groups, list) { |
3026 | cache = list_entry(l, struct btrfs_block_group_cache, list); | ||
3027 | spin_lock(&cache->lock); | 3368 | spin_lock(&cache->lock); |
3028 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 3369 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
3029 | "%llu pinned %llu reserved\n", | 3370 | "%llu pinned %llu reserved\n", |
@@ -3047,24 +3388,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3047 | { | 3388 | { |
3048 | int ret; | 3389 | int ret; |
3049 | u64 search_start = 0; | 3390 | u64 search_start = 0; |
3050 | u64 alloc_profile; | ||
3051 | struct btrfs_fs_info *info = root->fs_info; | 3391 | struct btrfs_fs_info *info = root->fs_info; |
3052 | 3392 | ||
3053 | if (data) { | 3393 | data = btrfs_get_alloc_profile(root, data); |
3054 | alloc_profile = info->avail_data_alloc_bits & | ||
3055 | info->data_alloc_profile; | ||
3056 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
3057 | } else if (root == root->fs_info->chunk_root) { | ||
3058 | alloc_profile = info->avail_system_alloc_bits & | ||
3059 | info->system_alloc_profile; | ||
3060 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
3061 | } else { | ||
3062 | alloc_profile = info->avail_metadata_alloc_bits & | ||
3063 | info->metadata_alloc_profile; | ||
3064 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
3065 | } | ||
3066 | again: | 3394 | again: |
3067 | data = btrfs_reduce_alloc_profile(root, data); | ||
3068 | /* | 3395 | /* |
3069 | * the only place that sets empty_size is btrfs_realloc_node, which | 3396 | * the only place that sets empty_size is btrfs_realloc_node, which |
3070 | * is not called recursively on allocations | 3397 | * is not called recursively on allocations |
@@ -3332,7 +3659,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
3332 | 3659 | ||
3333 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 3660 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
3334 | struct btrfs_root *root, | 3661 | struct btrfs_root *root, |
3335 | u64 bytenr, u32 blocksize) | 3662 | u64 bytenr, u32 blocksize, |
3663 | int level) | ||
3336 | { | 3664 | { |
3337 | struct extent_buffer *buf; | 3665 | struct extent_buffer *buf; |
3338 | 3666 | ||
@@ -3340,9 +3668,13 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3340 | if (!buf) | 3668 | if (!buf) |
3341 | return ERR_PTR(-ENOMEM); | 3669 | return ERR_PTR(-ENOMEM); |
3342 | btrfs_set_header_generation(buf, trans->transid); | 3670 | btrfs_set_header_generation(buf, trans->transid); |
3671 | btrfs_set_buffer_lockdep_class(buf, level); | ||
3343 | btrfs_tree_lock(buf); | 3672 | btrfs_tree_lock(buf); |
3344 | clean_tree_block(trans, root, buf); | 3673 | clean_tree_block(trans, root, buf); |
3674 | |||
3675 | btrfs_set_lock_blocking(buf); | ||
3345 | btrfs_set_buffer_uptodate(buf); | 3676 | btrfs_set_buffer_uptodate(buf); |
3677 | |||
3346 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 3678 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
3347 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 3679 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
3348 | buf->start + buf->len - 1, GFP_NOFS); | 3680 | buf->start + buf->len - 1, GFP_NOFS); |
@@ -3351,6 +3683,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3351 | buf->start + buf->len - 1, GFP_NOFS); | 3683 | buf->start + buf->len - 1, GFP_NOFS); |
3352 | } | 3684 | } |
3353 | trans->blocks_used++; | 3685 | trans->blocks_used++; |
3686 | /* this returns a buffer locked for blocking */ | ||
3354 | return buf; | 3687 | return buf; |
3355 | } | 3688 | } |
3356 | 3689 | ||
@@ -3379,7 +3712,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
3379 | return ERR_PTR(ret); | 3712 | return ERR_PTR(ret); |
3380 | } | 3713 | } |
3381 | 3714 | ||
3382 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); | 3715 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
3716 | blocksize, level); | ||
3383 | return buf; | 3717 | return buf; |
3384 | } | 3718 | } |
3385 | 3719 | ||
@@ -3388,36 +3722,73 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3388 | { | 3722 | { |
3389 | u64 leaf_owner; | 3723 | u64 leaf_owner; |
3390 | u64 leaf_generation; | 3724 | u64 leaf_generation; |
3725 | struct refsort *sorted; | ||
3391 | struct btrfs_key key; | 3726 | struct btrfs_key key; |
3392 | struct btrfs_file_extent_item *fi; | 3727 | struct btrfs_file_extent_item *fi; |
3393 | int i; | 3728 | int i; |
3394 | int nritems; | 3729 | int nritems; |
3395 | int ret; | 3730 | int ret; |
3731 | int refi = 0; | ||
3732 | int slot; | ||
3396 | 3733 | ||
3397 | BUG_ON(!btrfs_is_leaf(leaf)); | 3734 | BUG_ON(!btrfs_is_leaf(leaf)); |
3398 | nritems = btrfs_header_nritems(leaf); | 3735 | nritems = btrfs_header_nritems(leaf); |
3399 | leaf_owner = btrfs_header_owner(leaf); | 3736 | leaf_owner = btrfs_header_owner(leaf); |
3400 | leaf_generation = btrfs_header_generation(leaf); | 3737 | leaf_generation = btrfs_header_generation(leaf); |
3401 | 3738 | ||
3739 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
3740 | /* we do this loop twice. The first time we build a list | ||
3741 | * of the extents we have a reference on, then we sort the list | ||
3742 | * by bytenr. The second time around we actually do the | ||
3743 | * extent freeing. | ||
3744 | */ | ||
3402 | for (i = 0; i < nritems; i++) { | 3745 | for (i = 0; i < nritems; i++) { |
3403 | u64 disk_bytenr; | 3746 | u64 disk_bytenr; |
3404 | cond_resched(); | 3747 | cond_resched(); |
3405 | 3748 | ||
3406 | btrfs_item_key_to_cpu(leaf, &key, i); | 3749 | btrfs_item_key_to_cpu(leaf, &key, i); |
3750 | |||
3751 | /* only extents have references, skip everything else */ | ||
3407 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | 3752 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) |
3408 | continue; | 3753 | continue; |
3754 | |||
3409 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | 3755 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); |
3756 | |||
3757 | /* inline extents live in the btree, they don't have refs */ | ||
3410 | if (btrfs_file_extent_type(leaf, fi) == | 3758 | if (btrfs_file_extent_type(leaf, fi) == |
3411 | BTRFS_FILE_EXTENT_INLINE) | 3759 | BTRFS_FILE_EXTENT_INLINE) |
3412 | continue; | 3760 | continue; |
3413 | /* | 3761 | |
3414 | * FIXME make sure to insert a trans record that | ||
3415 | * repeats the snapshot del on crash | ||
3416 | */ | ||
3417 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 3762 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
3763 | |||
3764 | /* holes don't have refs */ | ||
3418 | if (disk_bytenr == 0) | 3765 | if (disk_bytenr == 0) |
3419 | continue; | 3766 | continue; |
3420 | 3767 | ||
3768 | sorted[refi].bytenr = disk_bytenr; | ||
3769 | sorted[refi].slot = i; | ||
3770 | refi++; | ||
3771 | } | ||
3772 | |||
3773 | if (refi == 0) | ||
3774 | goto out; | ||
3775 | |||
3776 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
3777 | |||
3778 | for (i = 0; i < refi; i++) { | ||
3779 | u64 disk_bytenr; | ||
3780 | |||
3781 | disk_bytenr = sorted[i].bytenr; | ||
3782 | slot = sorted[i].slot; | ||
3783 | |||
3784 | cond_resched(); | ||
3785 | |||
3786 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
3787 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
3788 | continue; | ||
3789 | |||
3790 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
3791 | |||
3421 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 3792 | ret = __btrfs_free_extent(trans, root, disk_bytenr, |
3422 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 3793 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
3423 | leaf->start, leaf_owner, leaf_generation, | 3794 | leaf->start, leaf_owner, leaf_generation, |
@@ -3428,6 +3799,8 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3428 | wake_up(&root->fs_info->transaction_throttle); | 3799 | wake_up(&root->fs_info->transaction_throttle); |
3429 | cond_resched(); | 3800 | cond_resched(); |
3430 | } | 3801 | } |
3802 | out: | ||
3803 | kfree(sorted); | ||
3431 | return 0; | 3804 | return 0; |
3432 | } | 3805 | } |
3433 | 3806 | ||
@@ -3437,9 +3810,25 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3437 | { | 3810 | { |
3438 | int i; | 3811 | int i; |
3439 | int ret; | 3812 | int ret; |
3440 | struct btrfs_extent_info *info = ref->extents; | 3813 | struct btrfs_extent_info *info; |
3814 | struct refsort *sorted; | ||
3815 | |||
3816 | if (ref->nritems == 0) | ||
3817 | return 0; | ||
3441 | 3818 | ||
3819 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
3442 | for (i = 0; i < ref->nritems; i++) { | 3820 | for (i = 0; i < ref->nritems; i++) { |
3821 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
3822 | sorted[i].slot = i; | ||
3823 | } | ||
3824 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
3825 | |||
3826 | /* | ||
3827 | * the items in the ref were sorted when the ref was inserted | ||
3828 | * into the ref cache, so this is already in order | ||
3829 | */ | ||
3830 | for (i = 0; i < ref->nritems; i++) { | ||
3831 | info = ref->extents + sorted[i].slot; | ||
3443 | ret = __btrfs_free_extent(trans, root, info->bytenr, | 3832 | ret = __btrfs_free_extent(trans, root, info->bytenr, |
3444 | info->num_bytes, ref->bytenr, | 3833 | info->num_bytes, ref->bytenr, |
3445 | ref->owner, ref->generation, | 3834 | ref->owner, ref->generation, |
@@ -3453,6 +3842,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3453 | info++; | 3842 | info++; |
3454 | } | 3843 | } |
3455 | 3844 | ||
3845 | kfree(sorted); | ||
3456 | return 0; | 3846 | return 0; |
3457 | } | 3847 | } |
3458 | 3848 | ||
@@ -3497,6 +3887,152 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, | |||
3497 | } | 3887 | } |
3498 | 3888 | ||
3499 | /* | 3889 | /* |
3890 | * this is used while deleting old snapshots, and it drops the refs | ||
3891 | * on a whole subtree starting from a level 1 node. | ||
3892 | * | ||
3893 | * The idea is to sort all the leaf pointers, and then drop the | ||
3894 | * ref on all the leaves in order. Most of the time the leaves | ||
3895 | * will have ref cache entries, so no leaf IOs will be required to | ||
3896 | * find the extents they have references on. | ||
3897 | * | ||
3898 | * For each leaf, any references it has are also dropped in order | ||
3899 | * | ||
3900 | * This ends up dropping the references in something close to optimal | ||
3901 | * order for reading and modifying the extent allocation tree. | ||
3902 | */ | ||
3903 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
3904 | struct btrfs_root *root, | ||
3905 | struct btrfs_path *path) | ||
3906 | { | ||
3907 | u64 bytenr; | ||
3908 | u64 root_owner; | ||
3909 | u64 root_gen; | ||
3910 | struct extent_buffer *eb = path->nodes[1]; | ||
3911 | struct extent_buffer *leaf; | ||
3912 | struct btrfs_leaf_ref *ref; | ||
3913 | struct refsort *sorted = NULL; | ||
3914 | int nritems = btrfs_header_nritems(eb); | ||
3915 | int ret; | ||
3916 | int i; | ||
3917 | int refi = 0; | ||
3918 | int slot = path->slots[1]; | ||
3919 | u32 blocksize = btrfs_level_size(root, 0); | ||
3920 | u32 refs; | ||
3921 | |||
3922 | if (nritems == 0) | ||
3923 | goto out; | ||
3924 | |||
3925 | root_owner = btrfs_header_owner(eb); | ||
3926 | root_gen = btrfs_header_generation(eb); | ||
3927 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
3928 | |||
3929 | /* | ||
3930 | * step one, sort all the leaf pointers so we don't scribble | ||
3931 | * randomly into the extent allocation tree | ||
3932 | */ | ||
3933 | for (i = slot; i < nritems; i++) { | ||
3934 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | ||
3935 | sorted[refi].slot = i; | ||
3936 | refi++; | ||
3937 | } | ||
3938 | |||
3939 | /* | ||
3940 | * nritems won't be zero, but if we're picking up drop_snapshot | ||
3941 | * after a crash, slot might be > 0, so double check things | ||
3942 | * just in case. | ||
3943 | */ | ||
3944 | if (refi == 0) | ||
3945 | goto out; | ||
3946 | |||
3947 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | ||
3948 | |||
3949 | /* | ||
3950 | * the first loop frees everything the leaves point to | ||
3951 | */ | ||
3952 | for (i = 0; i < refi; i++) { | ||
3953 | u64 ptr_gen; | ||
3954 | |||
3955 | bytenr = sorted[i].bytenr; | ||
3956 | |||
3957 | /* | ||
3958 | * check the reference count on this leaf. If it is > 1 | ||
3959 | * we just decrement it below and don't update any | ||
3960 | * of the refs the leaf points to. | ||
3961 | */ | ||
3962 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | ||
3963 | BUG_ON(ret); | ||
3964 | if (refs != 1) | ||
3965 | continue; | ||
3966 | |||
3967 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | ||
3968 | |||
3969 | /* | ||
3970 | * the leaf only had one reference, which means the | ||
3971 | * only thing pointing to this leaf is the snapshot | ||
3972 | * we're deleting. It isn't possible for the reference | ||
3973 | * count to increase again later | ||
3974 | * | ||
3975 | * The reference cache is checked for the leaf, | ||
3976 | * and if found we'll be able to drop any refs held by | ||
3977 | * the leaf without needing to read it in. | ||
3978 | */ | ||
3979 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
3980 | if (ref && ref->generation != ptr_gen) { | ||
3981 | btrfs_free_leaf_ref(root, ref); | ||
3982 | ref = NULL; | ||
3983 | } | ||
3984 | if (ref) { | ||
3985 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
3986 | BUG_ON(ret); | ||
3987 | btrfs_remove_leaf_ref(root, ref); | ||
3988 | btrfs_free_leaf_ref(root, ref); | ||
3989 | } else { | ||
3990 | /* | ||
3991 | * the leaf wasn't in the reference cache, so | ||
3992 | * we have to read it. | ||
3993 | */ | ||
3994 | leaf = read_tree_block(root, bytenr, blocksize, | ||
3995 | ptr_gen); | ||
3996 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
3997 | BUG_ON(ret); | ||
3998 | free_extent_buffer(leaf); | ||
3999 | } | ||
4000 | atomic_inc(&root->fs_info->throttle_gen); | ||
4001 | wake_up(&root->fs_info->transaction_throttle); | ||
4002 | cond_resched(); | ||
4003 | } | ||
4004 | |||
4005 | /* | ||
4006 | * run through the loop again to free the refs on the leaves. | ||
4007 | * This is faster than doing it in the loop above because | ||
4008 | * the leaves are likely to be clustered together. We end up | ||
4009 | * working in nice chunks on the extent allocation tree. | ||
4010 | */ | ||
4011 | for (i = 0; i < refi; i++) { | ||
4012 | bytenr = sorted[i].bytenr; | ||
4013 | ret = __btrfs_free_extent(trans, root, bytenr, | ||
4014 | blocksize, eb->start, | ||
4015 | root_owner, root_gen, 0, 1); | ||
4016 | BUG_ON(ret); | ||
4017 | |||
4018 | atomic_inc(&root->fs_info->throttle_gen); | ||
4019 | wake_up(&root->fs_info->transaction_throttle); | ||
4020 | cond_resched(); | ||
4021 | } | ||
4022 | out: | ||
4023 | kfree(sorted); | ||
4024 | |||
4025 | /* | ||
4026 | * update the path to show we've processed the entire level 1 | ||
4027 | * node. This will get saved into the root's drop_snapshot_progress | ||
4028 | * field so these drops are not repeated again if this transaction | ||
4029 | * commits. | ||
4030 | */ | ||
4031 | path->slots[1] = nritems; | ||
4032 | return 0; | ||
4033 | } | ||
4034 | |||
4035 | /* | ||
3500 | * helper function for drop_snapshot, this walks down the tree dropping ref | 4036 | * helper function for drop_snapshot, this walks down the tree dropping ref |
3501 | * counts as it goes. | 4037 | * counts as it goes. |
3502 | */ | 4038 | */ |
@@ -3511,7 +4047,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3511 | struct extent_buffer *next; | 4047 | struct extent_buffer *next; |
3512 | struct extent_buffer *cur; | 4048 | struct extent_buffer *cur; |
3513 | struct extent_buffer *parent; | 4049 | struct extent_buffer *parent; |
3514 | struct btrfs_leaf_ref *ref; | ||
3515 | u32 blocksize; | 4050 | u32 blocksize; |
3516 | int ret; | 4051 | int ret; |
3517 | u32 refs; | 4052 | u32 refs; |
@@ -3538,17 +4073,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3538 | if (path->slots[*level] >= | 4073 | if (path->slots[*level] >= |
3539 | btrfs_header_nritems(cur)) | 4074 | btrfs_header_nritems(cur)) |
3540 | break; | 4075 | break; |
4076 | |||
4077 | /* the new code goes down to level 1 and does all the | ||
4078 | * leaves pointed to that node in bulk. So, this check | ||
4079 | * for level 0 will always be false. | ||
4080 | * | ||
4081 | * But, the disk format allows the drop_snapshot_progress | ||
4082 | * field in the root to leave things in a state where | ||
4083 | * a leaf will need cleaning up here. If someone crashes | ||
4084 | * with the old code and then boots with the new code, | ||
4085 | * we might find a leaf here. | ||
4086 | */ | ||
3541 | if (*level == 0) { | 4087 | if (*level == 0) { |
3542 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 4088 | ret = btrfs_drop_leaf_ref(trans, root, cur); |
3543 | BUG_ON(ret); | 4089 | BUG_ON(ret); |
3544 | break; | 4090 | break; |
3545 | } | 4091 | } |
4092 | |||
4093 | /* | ||
4094 | * once we get to level one, process the whole node | ||
4095 | * at once, including everything below it. | ||
4096 | */ | ||
4097 | if (*level == 1) { | ||
4098 | ret = drop_level_one_refs(trans, root, path); | ||
4099 | BUG_ON(ret); | ||
4100 | break; | ||
4101 | } | ||
4102 | |||
3546 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | 4103 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); |
3547 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 4104 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
3548 | blocksize = btrfs_level_size(root, *level - 1); | 4105 | blocksize = btrfs_level_size(root, *level - 1); |
3549 | 4106 | ||
3550 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 4107 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); |
3551 | BUG_ON(ret); | 4108 | BUG_ON(ret); |
4109 | |||
4110 | /* | ||
4111 | * if there is more than one reference, we don't need | ||
4112 | * to read that node to drop any references it has. We | ||
4113 | * just drop the ref we hold on that node and move on to the | ||
4114 | * next slot in this level. | ||
4115 | */ | ||
3552 | if (refs != 1) { | 4116 | if (refs != 1) { |
3553 | parent = path->nodes[*level]; | 4117 | parent = path->nodes[*level]; |
3554 | root_owner = btrfs_header_owner(parent); | 4118 | root_owner = btrfs_header_owner(parent); |
@@ -3567,46 +4131,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
3567 | 4131 | ||
3568 | continue; | 4132 | continue; |
3569 | } | 4133 | } |
4134 | |||
3570 | /* | 4135 | /* |
3571 | * at this point, we have a single ref, and since the | 4136 | * we need to keep freeing things in the next level down. |
3572 | * only place referencing this extent is a dead root | 4137 | * read the block and loop around to process it |
3573 | * the reference count should never go higher. | ||
3574 | * So, we don't need to check it again | ||
3575 | */ | 4138 | */ |
3576 | if (*level == 1) { | 4139 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); |
3577 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
3578 | if (ref && ref->generation != ptr_gen) { | ||
3579 | btrfs_free_leaf_ref(root, ref); | ||
3580 | ref = NULL; | ||
3581 | } | ||
3582 | if (ref) { | ||
3583 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
3584 | BUG_ON(ret); | ||
3585 | btrfs_remove_leaf_ref(root, ref); | ||
3586 | btrfs_free_leaf_ref(root, ref); | ||
3587 | *level = 0; | ||
3588 | break; | ||
3589 | } | ||
3590 | } | ||
3591 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
3592 | if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { | ||
3593 | free_extent_buffer(next); | ||
3594 | |||
3595 | next = read_tree_block(root, bytenr, blocksize, | ||
3596 | ptr_gen); | ||
3597 | cond_resched(); | ||
3598 | #if 0 | ||
3599 | /* | ||
3600 | * this is a debugging check and can go away | ||
3601 | * the ref should never go all the way down to 1 | ||
3602 | * at this point | ||
3603 | */ | ||
3604 | ret = lookup_extent_ref(NULL, root, bytenr, blocksize, | ||
3605 | &refs); | ||
3606 | BUG_ON(ret); | ||
3607 | WARN_ON(refs != 1); | ||
3608 | #endif | ||
3609 | } | ||
3610 | WARN_ON(*level <= 0); | 4140 | WARN_ON(*level <= 0); |
3611 | if (path->nodes[*level-1]) | 4141 | if (path->nodes[*level-1]) |
3612 | free_extent_buffer(path->nodes[*level-1]); | 4142 | free_extent_buffer(path->nodes[*level-1]); |
@@ -3631,11 +4161,16 @@ out: | |||
3631 | root_owner = btrfs_header_owner(parent); | 4161 | root_owner = btrfs_header_owner(parent); |
3632 | root_gen = btrfs_header_generation(parent); | 4162 | root_gen = btrfs_header_generation(parent); |
3633 | 4163 | ||
4164 | /* | ||
4165 | * cleanup and free the reference on the last node | ||
4166 | * we processed | ||
4167 | */ | ||
3634 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | 4168 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, |
3635 | parent->start, root_owner, root_gen, | 4169 | parent->start, root_owner, root_gen, |
3636 | *level, 1); | 4170 | *level, 1); |
3637 | free_extent_buffer(path->nodes[*level]); | 4171 | free_extent_buffer(path->nodes[*level]); |
3638 | path->nodes[*level] = NULL; | 4172 | path->nodes[*level] = NULL; |
4173 | |||
3639 | *level += 1; | 4174 | *level += 1; |
3640 | BUG_ON(ret); | 4175 | BUG_ON(ret); |
3641 | 4176 | ||
@@ -3687,6 +4222,7 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3687 | 4222 | ||
3688 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 4223 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); |
3689 | btrfs_tree_lock(next); | 4224 | btrfs_tree_lock(next); |
4225 | btrfs_set_lock_blocking(next); | ||
3690 | 4226 | ||
3691 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | 4227 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, |
3692 | &refs); | 4228 | &refs); |
@@ -3754,6 +4290,13 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3754 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 4290 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { |
3755 | struct extent_buffer *node; | 4291 | struct extent_buffer *node; |
3756 | struct btrfs_disk_key disk_key; | 4292 | struct btrfs_disk_key disk_key; |
4293 | |||
4294 | /* | ||
4295 | * there is more work to do in this level. | ||
4296 | * Update the drop_progress marker to reflect | ||
4297 | * the work we've done so far, and then bump | ||
4298 | * the slot number | ||
4299 | */ | ||
3757 | node = path->nodes[i]; | 4300 | node = path->nodes[i]; |
3758 | path->slots[i]++; | 4301 | path->slots[i]++; |
3759 | *level = i; | 4302 | *level = i; |
@@ -3765,6 +4308,11 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
3765 | return 0; | 4308 | return 0; |
3766 | } else { | 4309 | } else { |
3767 | struct extent_buffer *parent; | 4310 | struct extent_buffer *parent; |
4311 | |||
4312 | /* | ||
4313 | * this whole node is done, free our reference | ||
4314 | * on it and go up one level | ||
4315 | */ | ||
3768 | if (path->nodes[*level] == root->node) | 4316 | if (path->nodes[*level] == root->node) |
3769 | parent = path->nodes[*level]; | 4317 | parent = path->nodes[*level]; |
3770 | else | 4318 | else |
@@ -3891,13 +4439,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
3891 | path = btrfs_alloc_path(); | 4439 | path = btrfs_alloc_path(); |
3892 | BUG_ON(!path); | 4440 | BUG_ON(!path); |
3893 | 4441 | ||
3894 | BUG_ON(!btrfs_tree_locked(parent)); | 4442 | btrfs_assert_tree_locked(parent); |
3895 | parent_level = btrfs_header_level(parent); | 4443 | parent_level = btrfs_header_level(parent); |
3896 | extent_buffer_get(parent); | 4444 | extent_buffer_get(parent); |
3897 | path->nodes[parent_level] = parent; | 4445 | path->nodes[parent_level] = parent; |
3898 | path->slots[parent_level] = btrfs_header_nritems(parent); | 4446 | path->slots[parent_level] = btrfs_header_nritems(parent); |
3899 | 4447 | ||
3900 | BUG_ON(!btrfs_tree_locked(node)); | 4448 | btrfs_assert_tree_locked(node); |
3901 | level = btrfs_header_level(node); | 4449 | level = btrfs_header_level(node); |
3902 | extent_buffer_get(node); | 4450 | extent_buffer_get(node); |
3903 | path->nodes[level] = node; | 4451 | path->nodes[level] = node; |
@@ -4444,7 +4992,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | |||
4444 | u64 lock_end = 0; | 4992 | u64 lock_end = 0; |
4445 | u64 num_bytes; | 4993 | u64 num_bytes; |
4446 | u64 ext_offset; | 4994 | u64 ext_offset; |
4447 | u64 first_pos; | 4995 | u64 search_end = (u64)-1; |
4448 | u32 nritems; | 4996 | u32 nritems; |
4449 | int nr_scaned = 0; | 4997 | int nr_scaned = 0; |
4450 | int extent_locked = 0; | 4998 | int extent_locked = 0; |
@@ -4452,7 +5000,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, | |||
4452 | int ret; | 5000 | int ret; |
4453 | 5001 | ||
4454 | memcpy(&key, leaf_key, sizeof(key)); | 5002 | memcpy(&key, leaf_key, sizeof(key)); |
4455 | first_pos = INT_LIMIT(loff_t) - extent_key->offset; | ||
4456 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { | 5003 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { |
4457 | if (key.objectid < ref_path->owner_objectid || | 5004 | if (key.objectid < ref_path->owner_objectid || |
4458 | (key.objectid == ref_path->owner_objectid && | 5005 | (key.objectid == ref_path->owner_objectid && |
@@ -4501,7 +5048,7 @@ next: | |||
4501 | if ((key.objectid > ref_path->owner_objectid) || | 5048 | if ((key.objectid > ref_path->owner_objectid) || |
4502 | (key.objectid == ref_path->owner_objectid && | 5049 | (key.objectid == ref_path->owner_objectid && |
4503 | key.type > BTRFS_EXTENT_DATA_KEY) || | 5050 | key.type > BTRFS_EXTENT_DATA_KEY) || |
4504 | (key.offset >= first_pos + extent_key->offset)) | 5051 | key.offset >= search_end) |
4505 | break; | 5052 | break; |
4506 | } | 5053 | } |
4507 | 5054 | ||
@@ -4534,8 +5081,10 @@ next: | |||
4534 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); | 5081 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi); |
4535 | ext_offset = btrfs_file_extent_offset(leaf, fi); | 5082 | ext_offset = btrfs_file_extent_offset(leaf, fi); |
4536 | 5083 | ||
4537 | if (first_pos > key.offset - ext_offset) | 5084 | if (search_end == (u64)-1) { |
4538 | first_pos = key.offset - ext_offset; | 5085 | search_end = key.offset - ext_offset + |
5086 | btrfs_file_extent_ram_bytes(leaf, fi); | ||
5087 | } | ||
4539 | 5088 | ||
4540 | if (!extent_locked) { | 5089 | if (!extent_locked) { |
4541 | lock_start = key.offset; | 5090 | lock_start = key.offset; |
@@ -4724,7 +5273,7 @@ next: | |||
4724 | } | 5273 | } |
4725 | skip: | 5274 | skip: |
4726 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && | 5275 | if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && |
4727 | key.offset >= first_pos + extent_key->offset) | 5276 | key.offset >= search_end) |
4728 | break; | 5277 | break; |
4729 | 5278 | ||
4730 | cond_resched(); | 5279 | cond_resched(); |
@@ -4778,6 +5327,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, | |||
4778 | ref->bytenr = buf->start; | 5327 | ref->bytenr = buf->start; |
4779 | ref->owner = btrfs_header_owner(buf); | 5328 | ref->owner = btrfs_header_owner(buf); |
4780 | ref->generation = btrfs_header_generation(buf); | 5329 | ref->generation = btrfs_header_generation(buf); |
5330 | |||
4781 | ret = btrfs_add_leaf_ref(root, ref, 0); | 5331 | ret = btrfs_add_leaf_ref(root, ref, 0); |
4782 | WARN_ON(ret); | 5332 | WARN_ON(ret); |
4783 | btrfs_free_leaf_ref(root, ref); | 5333 | btrfs_free_leaf_ref(root, ref); |
@@ -5351,7 +5901,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
5351 | prev_block = block_start; | 5901 | prev_block = block_start; |
5352 | } | 5902 | } |
5353 | 5903 | ||
5904 | mutex_lock(&extent_root->fs_info->trans_mutex); | ||
5354 | btrfs_record_root_in_trans(found_root); | 5905 | btrfs_record_root_in_trans(found_root); |
5906 | mutex_unlock(&extent_root->fs_info->trans_mutex); | ||
5355 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 5907 | if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
5356 | /* | 5908 | /* |
5357 | * try to update data extent references while | 5909 | * try to update data extent references while |
@@ -5789,6 +6341,7 @@ out: | |||
5789 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 6341 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
5790 | { | 6342 | { |
5791 | struct btrfs_block_group_cache *block_group; | 6343 | struct btrfs_block_group_cache *block_group; |
6344 | struct btrfs_space_info *space_info; | ||
5792 | struct rb_node *n; | 6345 | struct rb_node *n; |
5793 | 6346 | ||
5794 | spin_lock(&info->block_group_cache_lock); | 6347 | spin_lock(&info->block_group_cache_lock); |
@@ -5810,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
5810 | spin_lock(&info->block_group_cache_lock); | 6363 | spin_lock(&info->block_group_cache_lock); |
5811 | } | 6364 | } |
5812 | spin_unlock(&info->block_group_cache_lock); | 6365 | spin_unlock(&info->block_group_cache_lock); |
6366 | |||
6367 | /* now that all the block groups are freed, go through and | ||
6368 | * free all the space_info structs. This is only called during | ||
6369 | * the final stages of unmount, and so we know nobody is | ||
6370 | * using them. We call synchronize_rcu() once before we start, | ||
6371 | * just to be on the safe side. | ||
6372 | */ | ||
6373 | synchronize_rcu(); | ||
6374 | |||
6375 | while(!list_empty(&info->space_info)) { | ||
6376 | space_info = list_entry(info->space_info.next, | ||
6377 | struct btrfs_space_info, | ||
6378 | list); | ||
6379 | |||
6380 | list_del(&space_info->list); | ||
6381 | kfree(space_info); | ||
6382 | } | ||
5813 | return 0; | 6383 | return 0; |
5814 | } | 6384 | } |
5815 | 6385 | ||
@@ -5957,9 +6527,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
5957 | path = btrfs_alloc_path(); | 6527 | path = btrfs_alloc_path(); |
5958 | BUG_ON(!path); | 6528 | BUG_ON(!path); |
5959 | 6529 | ||
5960 | btrfs_remove_free_space_cache(block_group); | 6530 | spin_lock(&root->fs_info->block_group_cache_lock); |
5961 | rb_erase(&block_group->cache_node, | 6531 | rb_erase(&block_group->cache_node, |
5962 | &root->fs_info->block_group_cache_tree); | 6532 | &root->fs_info->block_group_cache_tree); |
6533 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
6534 | btrfs_remove_free_space_cache(block_group); | ||
5963 | down_write(&block_group->space_info->groups_sem); | 6535 | down_write(&block_group->space_info->groups_sem); |
5964 | list_del(&block_group->list); | 6536 | list_del(&block_group->list); |
5965 | up_write(&block_group->space_info->groups_sem); | 6537 | up_write(&block_group->space_info->groups_sem); |