aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 14:34:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 14:34:19 -0400
commit9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e (patch)
treec25b0eb20dac1a39a6b55c521b2658dcceb7d532 /fs/btrfs
parent78438ce18f26dbcaa8993bb45d20ffb0cec3bc3e (diff)
parentb1c16ac978fd40ae636e629bb69a652df7eebdc2 (diff)
Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This time the majority of changes are cleanups, though there's still a number of changes of user interest. User visible changes: - better read time and write checks to catch errors early and before writing data to disk (to catch potential memory corruption on data that get checksummed) - qgroups + metadata relocation: last speed up patch int the series to address the slowness, there should be no overhead comparing balance with and without qgroups - FIEMAP ioctl does not start a transaction unnecessarily, this can result in a speed up and less blocking due to IO - LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this can speed up the mentioned ioctl and scrub as well - fsync on files with many (but not too many) hardlinks is faster, finer decision if the links should be fsynced individually or completely - send tries harder to find ranges to clone - trim/discard will skip unallocated chunks that haven't been touched since the last mount Fixes: - send flushes delayed allocation before start, otherwise it could miss some changes in case of a very recent rw->ro switch of a subvolume - fix fallocate with qgroups that could lead to space accounting underflow, reported as a warning - trim/discard ioctl honours the requested range - starting send and dedupe on a subvolume at the same time will let only one of them succeed, this is to prevent changes that send could miss due to dedupe; both operations are restartable Core changes: - more tree-checker validations, errors reported by fuzzing tools: - device item - inode item - block group profiles - tracepoints for extent buffer locking - async cow preallocates memory to avoid errors happening too deep in the call chain - metadata reservations for delalloc reworked to better adapt in many-writers/low-space scenarios - improved space flushing logic for intense DIO vs buffered workloads - lots of cleanups - removed unused struct members - redundant argument removal - properties and xattrs - extent buffer locking - selftests - use common file type conversions - many-argument functions reduction" * tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits) btrfs: Use kvmalloc for allocating compressed path context btrfs: Factor out common extent locking code in submit_compressed_extents btrfs: Set io_tree only once in submit_compressed_extents btrfs: Replace clear_extent_bit with unlock_extent btrfs: Make compress_file_range take only struct async_chunk btrfs: Remove fs_info from struct async_chunk btrfs: Rename async_cow to async_chunk btrfs: Preallocate chunks in cow_file_range_async btrfs: reserve delalloc metadata differently btrfs: track DIO bytes in flight btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop btrfs: delete unused function btrfs_set_prop_trans btrfs: start transaction in xattr_handler_set_prop btrfs: drop local copy of inode i_mode btrfs: drop old_fsflags in btrfs_ioctl_setflags btrfs: modify local copy of btrfs_inode flags btrfs: drop useless inode i_flags copy and restore btrfs: start transaction in btrfs_ioctl_setflags() btrfs: export btrfs_set_prop btrfs: refactor btrfs_set_props to validate externally ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/backref.c38
-rw-r--r--fs/btrfs/btrfs_inode.h8
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c254
-rw-r--r--fs/btrfs/ctree.h78
-rw-r--r--fs/btrfs/delayed-inode.c5
-rw-r--r--fs/btrfs/delayed-ref.c46
-rw-r--r--fs/btrfs/delayed-ref.h122
-rw-r--r--fs/btrfs/dev-replace.c8
-rw-r--r--fs/btrfs/dev-replace.h3
-rw-r--r--fs/btrfs/dir-item.c5
-rw-r--r--fs/btrfs/disk-io.c225
-rw-r--r--fs/btrfs/disk-io.h7
-rw-r--r--fs/btrfs/extent-tree.c651
-rw-r--r--fs/btrfs/extent_io.c356
-rw-r--r--fs/btrfs/extent_io.h89
-rw-r--r--fs/btrfs/extent_map.c38
-rw-r--r--fs/btrfs/file-item.c32
-rw-r--r--fs/btrfs/file.c47
-rw-r--r--fs/btrfs/free-space-cache.c45
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/free-space-tree.c24
-rw-r--r--fs/btrfs/free-space-tree.h1
-rw-r--r--fs/btrfs/inode-item.c8
-rw-r--r--fs/btrfs/inode.c329
-rw-r--r--fs/btrfs/ioctl.c181
-rw-r--r--fs/btrfs/locking.c157
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/print-tree.c2
-rw-r--r--fs/btrfs/props.c242
-rw-r--r--fs/btrfs/props.h7
-rw-r--r--fs/btrfs/qgroup.c5
-rw-r--r--fs/btrfs/ref-verify.c53
-rw-r--r--fs/btrfs/ref-verify.h10
-rw-r--r--fs/btrfs/relocation.c123
-rw-r--r--fs/btrfs/root-tree.c13
-rw-r--r--fs/btrfs/scrub.c6
-rw-r--r--fs/btrfs/send.c114
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tests/btrfs-tests.c17
-rw-r--r--fs/btrfs/tests/btrfs-tests.h17
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c8
-rw-r--r--fs/btrfs/tests/extent-io-tests.c35
-rw-r--r--fs/btrfs/tests/extent-map-tests.c213
-rw-r--r--fs/btrfs/tests/free-space-tests.c11
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c14
-rw-r--r--fs/btrfs/tests/inode-tests.c34
-rw-r--r--fs/btrfs/tests/qgroup-tests.c20
-rw-r--r--fs/btrfs/transaction.c64
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-checker.c513
-rw-r--r--fs/btrfs/tree-checker.h11
-rw-r--r--fs/btrfs/tree-log.c289
-rw-r--r--fs/btrfs/tree-log.h10
-rw-r--r--fs/btrfs/uuid-tree.c6
-rw-r--r--fs/btrfs/volumes.c469
-rw-r--r--fs/btrfs/volumes.h39
-rw-r--r--fs/btrfs/xattr.c65
-rw-r--r--fs/btrfs/xattr.h7
-rw-r--r--fs/btrfs/zstd.c11
62 files changed, 3043 insertions, 2193 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 5810463dc6d2..a0af1b952c4d 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -93,7 +93,11 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
93 goto out; 93 goto out;
94 } 94 }
95 95
96 ret = btrfs_setxattr(trans, inode, name, value, size, 0); 96 if (trans)
97 ret = btrfs_setxattr(trans, inode, name, value, size, 0);
98 else
99 ret = btrfs_setxattr_trans(inode, name, value, size, 0);
100
97out: 101out:
98 kfree(value); 102 kfree(value);
99 103
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 11459fe84a29..982152d3f920 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -791,7 +791,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
791 count = node->ref_mod * -1; 791 count = node->ref_mod * -1;
792 break; 792 break;
793 default: 793 default:
794 BUG_ON(1); 794 BUG();
795 } 795 }
796 *total_refs += count; 796 *total_refs += count;
797 switch (node->type) { 797 switch (node->type) {
@@ -1460,8 +1460,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1460 * callers (such as fiemap) which want to know whether the extent is 1460 * callers (such as fiemap) which want to know whether the extent is
1461 * shared but do not need a ref count. 1461 * shared but do not need a ref count.
1462 * 1462 *
1463 * This attempts to allocate a transaction in order to account for 1463 * This attempts to attach to the running transaction in order to account for
1464 * delayed refs, but continues on even when the alloc fails. 1464 * delayed refs, but continues on even when no running transaction exists.
1465 * 1465 *
1466 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. 1466 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
1467 */ 1467 */
@@ -1484,13 +1484,16 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
1484 tmp = ulist_alloc(GFP_NOFS); 1484 tmp = ulist_alloc(GFP_NOFS);
1485 roots = ulist_alloc(GFP_NOFS); 1485 roots = ulist_alloc(GFP_NOFS);
1486 if (!tmp || !roots) { 1486 if (!tmp || !roots) {
1487 ulist_free(tmp); 1487 ret = -ENOMEM;
1488 ulist_free(roots); 1488 goto out;
1489 return -ENOMEM;
1490 } 1489 }
1491 1490
1492 trans = btrfs_join_transaction(root); 1491 trans = btrfs_attach_transaction(root);
1493 if (IS_ERR(trans)) { 1492 if (IS_ERR(trans)) {
1493 if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
1494 ret = PTR_ERR(trans);
1495 goto out;
1496 }
1494 trans = NULL; 1497 trans = NULL;
1495 down_read(&fs_info->commit_root_sem); 1498 down_read(&fs_info->commit_root_sem);
1496 } else { 1499 } else {
@@ -1523,6 +1526,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
1523 } else { 1526 } else {
1524 up_read(&fs_info->commit_root_sem); 1527 up_read(&fs_info->commit_root_sem);
1525 } 1528 }
1529out:
1526 ulist_free(tmp); 1530 ulist_free(tmp);
1527 ulist_free(roots); 1531 ulist_free(roots);
1528 return ret; 1532 return ret;
@@ -1747,7 +1751,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1747 else if (flags & BTRFS_EXTENT_FLAG_DATA) 1751 else if (flags & BTRFS_EXTENT_FLAG_DATA)
1748 *flags_ret = BTRFS_EXTENT_FLAG_DATA; 1752 *flags_ret = BTRFS_EXTENT_FLAG_DATA;
1749 else 1753 else
1750 BUG_ON(1); 1754 BUG();
1751 return 0; 1755 return 0;
1752 } 1756 }
1753 1757
@@ -1912,13 +1916,19 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1912 extent_item_objectid); 1916 extent_item_objectid);
1913 1917
1914 if (!search_commit_root) { 1918 if (!search_commit_root) {
1915 trans = btrfs_join_transaction(fs_info->extent_root); 1919 trans = btrfs_attach_transaction(fs_info->extent_root);
1916 if (IS_ERR(trans)) 1920 if (IS_ERR(trans)) {
1917 return PTR_ERR(trans); 1921 if (PTR_ERR(trans) != -ENOENT &&
1922 PTR_ERR(trans) != -EROFS)
1923 return PTR_ERR(trans);
1924 trans = NULL;
1925 }
1926 }
1927
1928 if (trans)
1918 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1929 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1919 } else { 1930 else
1920 down_read(&fs_info->commit_root_sem); 1931 down_read(&fs_info->commit_root_sem);
1921 }
1922 1932
1923 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1933 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1924 tree_mod_seq_elem.seq, &refs, 1934 tree_mod_seq_elem.seq, &refs,
@@ -1951,7 +1961,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1951 1961
1952 free_leaf_list(refs); 1962 free_leaf_list(refs);
1953out: 1963out:
1954 if (!search_commit_root) { 1964 if (trans) {
1955 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1965 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1956 btrfs_end_transaction(trans); 1966 btrfs_end_transaction(trans);
1957 } else { 1967 } else {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6f5d07415dab..d5b438706b77 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -148,12 +148,6 @@ struct btrfs_inode {
148 u64 last_unlink_trans; 148 u64 last_unlink_trans;
149 149
150 /* 150 /*
151 * Track the transaction id of the last transaction used to create a
152 * hard link for the inode. This is used by the log tree (fsync).
153 */
154 u64 last_link_trans;
155
156 /*
157 * Number of bytes outstanding that are going to need csums. This is 151 * Number of bytes outstanding that are going to need csums. This is
158 * used in ENOSPC accounting. 152 * used in ENOSPC accounting.
159 */ 153 */
@@ -203,8 +197,6 @@ struct btrfs_inode {
203 struct inode vfs_inode; 197 struct inode vfs_inode;
204}; 198};
205 199
206extern unsigned char btrfs_filetype_table[];
207
208static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) 200static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
209{ 201{
210 return container_of(inode, struct btrfs_inode, vfs_inode); 202 return container_of(inode, struct btrfs_inode, vfs_inode);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4f2a8ae0aa42..1463e14af2fb 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -251,7 +251,7 @@ static void end_compressed_bio_write(struct bio *bio)
251 cb->compressed_pages[0]->mapping = cb->inode->i_mapping; 251 cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
252 btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], 252 btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
253 cb->start, cb->start + cb->len - 1, 253 cb->start, cb->start + cb->len - 1,
254 bio->bi_status ? BLK_STS_OK : BLK_STS_NOTSUPP); 254 bio->bi_status == BLK_STS_OK);
255 cb->compressed_pages[0]->mapping = NULL; 255 cb->compressed_pages[0]->mapping = NULL;
256 256
257 end_compressed_writeback(inode, cb); 257 end_compressed_writeback(inode, cb);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 324df36d28bf..5df76c17775a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -21,11 +21,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
21 const struct btrfs_key *ins_key, struct btrfs_path *path, 21 const struct btrfs_key *ins_key, struct btrfs_path *path,
22 int data_size, int extend); 22 int data_size, int extend);
23static int push_node_left(struct btrfs_trans_handle *trans, 23static int push_node_left(struct btrfs_trans_handle *trans,
24 struct btrfs_fs_info *fs_info,
25 struct extent_buffer *dst, 24 struct extent_buffer *dst,
26 struct extent_buffer *src, int empty); 25 struct extent_buffer *src, int empty);
27static int balance_node_right(struct btrfs_trans_handle *trans, 26static int balance_node_right(struct btrfs_trans_handle *trans,
28 struct btrfs_fs_info *fs_info,
29 struct extent_buffer *dst_buf, 27 struct extent_buffer *dst_buf,
30 struct extent_buffer *src_buf); 28 struct extent_buffer *src_buf);
31static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 29static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
@@ -726,11 +724,11 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
726 return __tree_mod_log_search(fs_info, start, min_seq, 0); 724 return __tree_mod_log_search(fs_info, start, min_seq, 0);
727} 725}
728 726
729static noinline int 727static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
730tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
731 struct extent_buffer *src, unsigned long dst_offset, 728 struct extent_buffer *src, unsigned long dst_offset,
732 unsigned long src_offset, int nr_items) 729 unsigned long src_offset, int nr_items)
733{ 730{
731 struct btrfs_fs_info *fs_info = dst->fs_info;
734 int ret = 0; 732 int ret = 0;
735 struct tree_mod_elem **tm_list = NULL; 733 struct tree_mod_elem **tm_list = NULL;
736 struct tree_mod_elem **tm_list_add, **tm_list_rem; 734 struct tree_mod_elem **tm_list_add, **tm_list_rem;
@@ -950,7 +948,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
950 if (new_flags != 0) { 948 if (new_flags != 0) {
951 int level = btrfs_header_level(buf); 949 int level = btrfs_header_level(buf);
952 950
953 ret = btrfs_set_disk_extent_flags(trans, fs_info, 951 ret = btrfs_set_disk_extent_flags(trans,
954 buf->start, 952 buf->start,
955 buf->len, 953 buf->len,
956 new_flags, level, 0); 954 new_flags, level, 0);
@@ -970,7 +968,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
970 if (ret) 968 if (ret)
971 return ret; 969 return ret;
972 } 970 }
973 clean_tree_block(fs_info, buf); 971 btrfs_clean_tree_block(buf);
974 *last_ref = 1; 972 *last_ref = 1;
975 } 973 }
976 return 0; 974 return 0;
@@ -1792,9 +1790,8 @@ static void root_sub_used(struct btrfs_root *root, u32 size)
1792/* given a node and slot number, this reads the blocks it points to. The 1790/* given a node and slot number, this reads the blocks it points to. The
1793 * extent buffer is returned with a reference taken (but unlocked). 1791 * extent buffer is returned with a reference taken (but unlocked).
1794 */ 1792 */
1795static noinline struct extent_buffer * 1793static noinline struct extent_buffer *read_node_slot(
1796read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, 1794 struct extent_buffer *parent, int slot)
1797 int slot)
1798{ 1795{
1799 int level = btrfs_header_level(parent); 1796 int level = btrfs_header_level(parent);
1800 struct extent_buffer *eb; 1797 struct extent_buffer *eb;
@@ -1806,7 +1803,7 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
1806 BUG_ON(level == 0); 1803 BUG_ON(level == 0);
1807 1804
1808 btrfs_node_key_to_cpu(parent, &first_key, slot); 1805 btrfs_node_key_to_cpu(parent, &first_key, slot);
1809 eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), 1806 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
1810 btrfs_node_ptr_generation(parent, slot), 1807 btrfs_node_ptr_generation(parent, slot),
1811 level - 1, &first_key); 1808 level - 1, &first_key);
1812 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { 1809 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
@@ -1863,7 +1860,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1863 return 0; 1860 return 0;
1864 1861
1865 /* promote the child to a root */ 1862 /* promote the child to a root */
1866 child = read_node_slot(fs_info, mid, 0); 1863 child = read_node_slot(mid, 0);
1867 if (IS_ERR(child)) { 1864 if (IS_ERR(child)) {
1868 ret = PTR_ERR(child); 1865 ret = PTR_ERR(child);
1869 btrfs_handle_fs_error(fs_info, ret, NULL); 1866 btrfs_handle_fs_error(fs_info, ret, NULL);
@@ -1888,7 +1885,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1888 1885
1889 path->locks[level] = 0; 1886 path->locks[level] = 0;
1890 path->nodes[level] = NULL; 1887 path->nodes[level] = NULL;
1891 clean_tree_block(fs_info, mid); 1888 btrfs_clean_tree_block(mid);
1892 btrfs_tree_unlock(mid); 1889 btrfs_tree_unlock(mid);
1893 /* once for the path */ 1890 /* once for the path */
1894 free_extent_buffer(mid); 1891 free_extent_buffer(mid);
@@ -1903,7 +1900,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1903 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) 1900 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
1904 return 0; 1901 return 0;
1905 1902
1906 left = read_node_slot(fs_info, parent, pslot - 1); 1903 left = read_node_slot(parent, pslot - 1);
1907 if (IS_ERR(left)) 1904 if (IS_ERR(left))
1908 left = NULL; 1905 left = NULL;
1909 1906
@@ -1918,7 +1915,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1918 } 1915 }
1919 } 1916 }
1920 1917
1921 right = read_node_slot(fs_info, parent, pslot + 1); 1918 right = read_node_slot(parent, pslot + 1);
1922 if (IS_ERR(right)) 1919 if (IS_ERR(right))
1923 right = NULL; 1920 right = NULL;
1924 1921
@@ -1936,7 +1933,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1936 /* first, try to make some room in the middle buffer */ 1933 /* first, try to make some room in the middle buffer */
1937 if (left) { 1934 if (left) {
1938 orig_slot += btrfs_header_nritems(left); 1935 orig_slot += btrfs_header_nritems(left);
1939 wret = push_node_left(trans, fs_info, left, mid, 1); 1936 wret = push_node_left(trans, left, mid, 1);
1940 if (wret < 0) 1937 if (wret < 0)
1941 ret = wret; 1938 ret = wret;
1942 } 1939 }
@@ -1945,11 +1942,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1945 * then try to empty the right most buffer into the middle 1942 * then try to empty the right most buffer into the middle
1946 */ 1943 */
1947 if (right) { 1944 if (right) {
1948 wret = push_node_left(trans, fs_info, mid, right, 1); 1945 wret = push_node_left(trans, mid, right, 1);
1949 if (wret < 0 && wret != -ENOSPC) 1946 if (wret < 0 && wret != -ENOSPC)
1950 ret = wret; 1947 ret = wret;
1951 if (btrfs_header_nritems(right) == 0) { 1948 if (btrfs_header_nritems(right) == 0) {
1952 clean_tree_block(fs_info, right); 1949 btrfs_clean_tree_block(right);
1953 btrfs_tree_unlock(right); 1950 btrfs_tree_unlock(right);
1954 del_ptr(root, path, level + 1, pslot + 1); 1951 del_ptr(root, path, level + 1, pslot + 1);
1955 root_sub_used(root, right->len); 1952 root_sub_used(root, right->len);
@@ -1981,20 +1978,20 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1981 btrfs_handle_fs_error(fs_info, ret, NULL); 1978 btrfs_handle_fs_error(fs_info, ret, NULL);
1982 goto enospc; 1979 goto enospc;
1983 } 1980 }
1984 wret = balance_node_right(trans, fs_info, mid, left); 1981 wret = balance_node_right(trans, mid, left);
1985 if (wret < 0) { 1982 if (wret < 0) {
1986 ret = wret; 1983 ret = wret;
1987 goto enospc; 1984 goto enospc;
1988 } 1985 }
1989 if (wret == 1) { 1986 if (wret == 1) {
1990 wret = push_node_left(trans, fs_info, left, mid, 1); 1987 wret = push_node_left(trans, left, mid, 1);
1991 if (wret < 0) 1988 if (wret < 0)
1992 ret = wret; 1989 ret = wret;
1993 } 1990 }
1994 BUG_ON(wret == 1); 1991 BUG_ON(wret == 1);
1995 } 1992 }
1996 if (btrfs_header_nritems(mid) == 0) { 1993 if (btrfs_header_nritems(mid) == 0) {
1997 clean_tree_block(fs_info, mid); 1994 btrfs_clean_tree_block(mid);
1998 btrfs_tree_unlock(mid); 1995 btrfs_tree_unlock(mid);
1999 del_ptr(root, path, level + 1, pslot); 1996 del_ptr(root, path, level + 1, pslot);
2000 root_sub_used(root, mid->len); 1997 root_sub_used(root, mid->len);
@@ -2078,7 +2075,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
2078 if (!parent) 2075 if (!parent)
2079 return 1; 2076 return 1;
2080 2077
2081 left = read_node_slot(fs_info, parent, pslot - 1); 2078 left = read_node_slot(parent, pslot - 1);
2082 if (IS_ERR(left)) 2079 if (IS_ERR(left))
2083 left = NULL; 2080 left = NULL;
2084 2081
@@ -2098,8 +2095,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
2098 if (ret) 2095 if (ret)
2099 wret = 1; 2096 wret = 1;
2100 else { 2097 else {
2101 wret = push_node_left(trans, fs_info, 2098 wret = push_node_left(trans, left, mid, 0);
2102 left, mid, 0);
2103 } 2099 }
2104 } 2100 }
2105 if (wret < 0) 2101 if (wret < 0)
@@ -2131,7 +2127,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
2131 btrfs_tree_unlock(left); 2127 btrfs_tree_unlock(left);
2132 free_extent_buffer(left); 2128 free_extent_buffer(left);
2133 } 2129 }
2134 right = read_node_slot(fs_info, parent, pslot + 1); 2130 right = read_node_slot(parent, pslot + 1);
2135 if (IS_ERR(right)) 2131 if (IS_ERR(right))
2136 right = NULL; 2132 right = NULL;
2137 2133
@@ -2154,8 +2150,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
2154 if (ret) 2150 if (ret)
2155 wret = 1; 2151 wret = 1;
2156 else { 2152 else {
2157 wret = balance_node_right(trans, fs_info, 2153 wret = balance_node_right(trans, right, mid);
2158 right, mid);
2159 } 2154 }
2160 } 2155 }
2161 if (wret < 0) 2156 if (wret < 0)
@@ -2416,6 +2411,16 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2416 if (tmp) { 2411 if (tmp) {
2417 /* first we do an atomic uptodate check */ 2412 /* first we do an atomic uptodate check */
2418 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 2413 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
2414 /*
2415 * Do extra check for first_key, eb can be stale due to
2416 * being cached, read from scrub, or have multiple
2417 * parents (shared tree blocks).
2418 */
2419 if (btrfs_verify_level_key(tmp,
2420 parent_level - 1, &first_key, gen)) {
2421 free_extent_buffer(tmp);
2422 return -EUCLEAN;
2423 }
2419 *eb_ret = tmp; 2424 *eb_ret = tmp;
2420 return 0; 2425 return 0;
2421 } 2426 }
@@ -2706,7 +2711,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2706 const struct btrfs_key *key, struct btrfs_path *p, 2711 const struct btrfs_key *key, struct btrfs_path *p,
2707 int ins_len, int cow) 2712 int ins_len, int cow)
2708{ 2713{
2709 struct btrfs_fs_info *fs_info = root->fs_info;
2710 struct extent_buffer *b; 2714 struct extent_buffer *b;
2711 int slot; 2715 int slot;
2712 int ret; 2716 int ret;
@@ -2904,7 +2908,7 @@ cow_done:
2904 } else { 2908 } else {
2905 p->slots[level] = slot; 2909 p->slots[level] = slot;
2906 if (ins_len > 0 && 2910 if (ins_len > 0 &&
2907 btrfs_leaf_free_space(fs_info, b) < ins_len) { 2911 btrfs_leaf_free_space(b) < ins_len) {
2908 if (write_lock_level < 1) { 2912 if (write_lock_level < 1) {
2909 write_lock_level = 1; 2913 write_lock_level = 1;
2910 btrfs_release_path(p); 2914 btrfs_release_path(p);
@@ -3181,11 +3185,31 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3181 slot = path->slots[0]; 3185 slot = path->slots[0];
3182 if (slot > 0) { 3186 if (slot > 0) {
3183 btrfs_item_key(eb, &disk_key, slot - 1); 3187 btrfs_item_key(eb, &disk_key, slot - 1);
3184 BUG_ON(comp_keys(&disk_key, new_key) >= 0); 3188 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
3189 btrfs_crit(fs_info,
3190 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3191 slot, btrfs_disk_key_objectid(&disk_key),
3192 btrfs_disk_key_type(&disk_key),
3193 btrfs_disk_key_offset(&disk_key),
3194 new_key->objectid, new_key->type,
3195 new_key->offset);
3196 btrfs_print_leaf(eb);
3197 BUG();
3198 }
3185 } 3199 }
3186 if (slot < btrfs_header_nritems(eb) - 1) { 3200 if (slot < btrfs_header_nritems(eb) - 1) {
3187 btrfs_item_key(eb, &disk_key, slot + 1); 3201 btrfs_item_key(eb, &disk_key, slot + 1);
3188 BUG_ON(comp_keys(&disk_key, new_key) <= 0); 3202 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
3203 btrfs_crit(fs_info,
3204 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3205 slot, btrfs_disk_key_objectid(&disk_key),
3206 btrfs_disk_key_type(&disk_key),
3207 btrfs_disk_key_offset(&disk_key),
3208 new_key->objectid, new_key->type,
3209 new_key->offset);
3210 btrfs_print_leaf(eb);
3211 BUG();
3212 }
3189 } 3213 }
3190 3214
3191 btrfs_cpu_key_to_disk(&disk_key, new_key); 3215 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3203,10 +3227,10 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3203 * error, and > 0 if there was no room in the left hand block. 3227 * error, and > 0 if there was no room in the left hand block.
3204 */ 3228 */
3205static int push_node_left(struct btrfs_trans_handle *trans, 3229static int push_node_left(struct btrfs_trans_handle *trans,
3206 struct btrfs_fs_info *fs_info,
3207 struct extent_buffer *dst, 3230 struct extent_buffer *dst,
3208 struct extent_buffer *src, int empty) 3231 struct extent_buffer *src, int empty)
3209{ 3232{
3233 struct btrfs_fs_info *fs_info = trans->fs_info;
3210 int push_items = 0; 3234 int push_items = 0;
3211 int src_nritems; 3235 int src_nritems;
3212 int dst_nritems; 3236 int dst_nritems;
@@ -3239,8 +3263,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
3239 } else 3263 } else
3240 push_items = min(src_nritems - 8, push_items); 3264 push_items = min(src_nritems - 8, push_items);
3241 3265
3242 ret = tree_mod_log_eb_copy(fs_info, dst, src, dst_nritems, 0, 3266 ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
3243 push_items);
3244 if (ret) { 3267 if (ret) {
3245 btrfs_abort_transaction(trans, ret); 3268 btrfs_abort_transaction(trans, ret);
3246 return ret; 3269 return ret;
@@ -3278,10 +3301,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
3278 * this will only push up to 1/2 the contents of the left node over 3301 * this will only push up to 1/2 the contents of the left node over
3279 */ 3302 */
3280static int balance_node_right(struct btrfs_trans_handle *trans, 3303static int balance_node_right(struct btrfs_trans_handle *trans,
3281 struct btrfs_fs_info *fs_info,
3282 struct extent_buffer *dst, 3304 struct extent_buffer *dst,
3283 struct extent_buffer *src) 3305 struct extent_buffer *src)
3284{ 3306{
3307 struct btrfs_fs_info *fs_info = trans->fs_info;
3285 int push_items = 0; 3308 int push_items = 0;
3286 int max_push; 3309 int max_push;
3287 int src_nritems; 3310 int src_nritems;
@@ -3315,8 +3338,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3315 (dst_nritems) * 3338 (dst_nritems) *
3316 sizeof(struct btrfs_key_ptr)); 3339 sizeof(struct btrfs_key_ptr));
3317 3340
3318 ret = tree_mod_log_eb_copy(fs_info, dst, src, 0, 3341 ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
3319 src_nritems - push_items, push_items); 3342 push_items);
3320 if (ret) { 3343 if (ret) {
3321 btrfs_abort_transaction(trans, ret); 3344 btrfs_abort_transaction(trans, ret);
3322 return ret; 3345 return ret;
@@ -3404,7 +3427,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3404 * blocknr is the block the key points to. 3427 * blocknr is the block the key points to.
3405 */ 3428 */
3406static void insert_ptr(struct btrfs_trans_handle *trans, 3429static void insert_ptr(struct btrfs_trans_handle *trans,
3407 struct btrfs_fs_info *fs_info, struct btrfs_path *path, 3430 struct btrfs_path *path,
3408 struct btrfs_disk_key *key, u64 bytenr, 3431 struct btrfs_disk_key *key, u64 bytenr,
3409 int slot, int level) 3432 int slot, int level)
3410{ 3433{
@@ -3417,7 +3440,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3417 lower = path->nodes[level]; 3440 lower = path->nodes[level];
3418 nritems = btrfs_header_nritems(lower); 3441 nritems = btrfs_header_nritems(lower);
3419 BUG_ON(slot > nritems); 3442 BUG_ON(slot > nritems);
3420 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 3443 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
3421 if (slot != nritems) { 3444 if (slot != nritems) {
3422 if (level) { 3445 if (level) {
3423 ret = tree_mod_log_insert_move(lower, slot + 1, slot, 3446 ret = tree_mod_log_insert_move(lower, slot + 1, slot,
@@ -3501,7 +3524,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3501 root_add_used(root, fs_info->nodesize); 3524 root_add_used(root, fs_info->nodesize);
3502 ASSERT(btrfs_header_level(c) == level); 3525 ASSERT(btrfs_header_level(c) == level);
3503 3526
3504 ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); 3527 ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
3505 if (ret) { 3528 if (ret) {
3506 btrfs_abort_transaction(trans, ret); 3529 btrfs_abort_transaction(trans, ret);
3507 return ret; 3530 return ret;
@@ -3517,7 +3540,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3517 btrfs_mark_buffer_dirty(c); 3540 btrfs_mark_buffer_dirty(c);
3518 btrfs_mark_buffer_dirty(split); 3541 btrfs_mark_buffer_dirty(split);
3519 3542
3520 insert_ptr(trans, fs_info, path, &disk_key, split->start, 3543 insert_ptr(trans, path, &disk_key, split->start,
3521 path->slots[level + 1] + 1, level + 1); 3544 path->slots[level + 1] + 1, level + 1);
3522 3545
3523 if (path->slots[level] >= mid) { 3546 if (path->slots[level] >= mid) {
@@ -3565,9 +3588,9 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
3565 * the start of the leaf data. IOW, how much room 3588 * the start of the leaf data. IOW, how much room
3566 * the leaf has left for both items and data 3589 * the leaf has left for both items and data
3567 */ 3590 */
3568noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, 3591noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
3569 struct extent_buffer *leaf)
3570{ 3592{
3593 struct btrfs_fs_info *fs_info = leaf->fs_info;
3571 int nritems = btrfs_header_nritems(leaf); 3594 int nritems = btrfs_header_nritems(leaf);
3572 int ret; 3595 int ret;
3573 3596
@@ -3586,13 +3609,13 @@ noinline int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info,
3586 * min slot controls the lowest index we're willing to push to the 3609 * min slot controls the lowest index we're willing to push to the
3587 * right. We'll push up to and including min_slot, but no lower 3610 * right. We'll push up to and including min_slot, but no lower
3588 */ 3611 */
3589static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info, 3612static noinline int __push_leaf_right(struct btrfs_path *path,
3590 struct btrfs_path *path,
3591 int data_size, int empty, 3613 int data_size, int empty,
3592 struct extent_buffer *right, 3614 struct extent_buffer *right,
3593 int free_space, u32 left_nritems, 3615 int free_space, u32 left_nritems,
3594 u32 min_slot) 3616 u32 min_slot)
3595{ 3617{
3618 struct btrfs_fs_info *fs_info = right->fs_info;
3596 struct extent_buffer *left = path->nodes[0]; 3619 struct extent_buffer *left = path->nodes[0];
3597 struct extent_buffer *upper = path->nodes[1]; 3620 struct extent_buffer *upper = path->nodes[1];
3598 struct btrfs_map_token token; 3621 struct btrfs_map_token token;
@@ -3626,7 +3649,8 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
3626 if (path->slots[0] > i) 3649 if (path->slots[0] > i)
3627 break; 3650 break;
3628 if (path->slots[0] == i) { 3651 if (path->slots[0] == i) {
3629 int space = btrfs_leaf_free_space(fs_info, left); 3652 int space = btrfs_leaf_free_space(left);
3653
3630 if (space + push_space * 2 > free_space) 3654 if (space + push_space * 2 > free_space)
3631 break; 3655 break;
3632 } 3656 }
@@ -3655,10 +3679,10 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
3655 right_nritems = btrfs_header_nritems(right); 3679 right_nritems = btrfs_header_nritems(right);
3656 3680
3657 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 3681 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
3658 push_space -= leaf_data_end(fs_info, left); 3682 push_space -= leaf_data_end(left);
3659 3683
3660 /* make room in the right data area */ 3684 /* make room in the right data area */
3661 data_end = leaf_data_end(fs_info, right); 3685 data_end = leaf_data_end(right);
3662 memmove_extent_buffer(right, 3686 memmove_extent_buffer(right,
3663 BTRFS_LEAF_DATA_OFFSET + data_end - push_space, 3687 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
3664 BTRFS_LEAF_DATA_OFFSET + data_end, 3688 BTRFS_LEAF_DATA_OFFSET + data_end,
@@ -3667,7 +3691,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
3667 /* copy from the left data area */ 3691 /* copy from the left data area */
3668 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + 3692 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
3669 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 3693 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3670 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, left), 3694 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
3671 push_space); 3695 push_space);
3672 3696
3673 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 3697 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
@@ -3695,7 +3719,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
3695 if (left_nritems) 3719 if (left_nritems)
3696 btrfs_mark_buffer_dirty(left); 3720 btrfs_mark_buffer_dirty(left);
3697 else 3721 else
3698 clean_tree_block(fs_info, left); 3722 btrfs_clean_tree_block(left);
3699 3723
3700 btrfs_mark_buffer_dirty(right); 3724 btrfs_mark_buffer_dirty(right);
3701 3725
@@ -3707,7 +3731,7 @@ static noinline int __push_leaf_right(struct btrfs_fs_info *fs_info,
3707 if (path->slots[0] >= left_nritems) { 3731 if (path->slots[0] >= left_nritems) {
3708 path->slots[0] -= left_nritems; 3732 path->slots[0] -= left_nritems;
3709 if (btrfs_header_nritems(path->nodes[0]) == 0) 3733 if (btrfs_header_nritems(path->nodes[0]) == 0)
3710 clean_tree_block(fs_info, path->nodes[0]); 3734 btrfs_clean_tree_block(path->nodes[0]);
3711 btrfs_tree_unlock(path->nodes[0]); 3735 btrfs_tree_unlock(path->nodes[0]);
3712 free_extent_buffer(path->nodes[0]); 3736 free_extent_buffer(path->nodes[0]);
3713 path->nodes[0] = right; 3737 path->nodes[0] = right;
@@ -3739,7 +3763,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3739 int min_data_size, int data_size, 3763 int min_data_size, int data_size,
3740 int empty, u32 min_slot) 3764 int empty, u32 min_slot)
3741{ 3765{
3742 struct btrfs_fs_info *fs_info = root->fs_info;
3743 struct extent_buffer *left = path->nodes[0]; 3766 struct extent_buffer *left = path->nodes[0];
3744 struct extent_buffer *right; 3767 struct extent_buffer *right;
3745 struct extent_buffer *upper; 3768 struct extent_buffer *upper;
@@ -3758,7 +3781,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3758 3781
3759 btrfs_assert_tree_locked(path->nodes[1]); 3782 btrfs_assert_tree_locked(path->nodes[1]);
3760 3783
3761 right = read_node_slot(fs_info, upper, slot + 1); 3784 right = read_node_slot(upper, slot + 1);
3762 /* 3785 /*
3763 * slot + 1 is not valid or we fail to read the right node, 3786 * slot + 1 is not valid or we fail to read the right node,
3764 * no big deal, just return. 3787 * no big deal, just return.
@@ -3769,7 +3792,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3769 btrfs_tree_lock(right); 3792 btrfs_tree_lock(right);
3770 btrfs_set_lock_blocking_write(right); 3793 btrfs_set_lock_blocking_write(right);
3771 3794
3772 free_space = btrfs_leaf_free_space(fs_info, right); 3795 free_space = btrfs_leaf_free_space(right);
3773 if (free_space < data_size) 3796 if (free_space < data_size)
3774 goto out_unlock; 3797 goto out_unlock;
3775 3798
@@ -3779,7 +3802,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3779 if (ret) 3802 if (ret)
3780 goto out_unlock; 3803 goto out_unlock;
3781 3804
3782 free_space = btrfs_leaf_free_space(fs_info, right); 3805 free_space = btrfs_leaf_free_space(right);
3783 if (free_space < data_size) 3806 if (free_space < data_size)
3784 goto out_unlock; 3807 goto out_unlock;
3785 3808
@@ -3800,7 +3823,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3800 return 0; 3823 return 0;
3801 } 3824 }
3802 3825
3803 return __push_leaf_right(fs_info, path, min_data_size, empty, 3826 return __push_leaf_right(path, min_data_size, empty,
3804 right, free_space, left_nritems, min_slot); 3827 right, free_space, left_nritems, min_slot);
3805out_unlock: 3828out_unlock:
3806 btrfs_tree_unlock(right); 3829 btrfs_tree_unlock(right);
@@ -3816,12 +3839,12 @@ out_unlock:
3816 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 3839 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3817 * items 3840 * items
3818 */ 3841 */
3819static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info, 3842static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
3820 struct btrfs_path *path, int data_size,
3821 int empty, struct extent_buffer *left, 3843 int empty, struct extent_buffer *left,
3822 int free_space, u32 right_nritems, 3844 int free_space, u32 right_nritems,
3823 u32 max_slot) 3845 u32 max_slot)
3824{ 3846{
3847 struct btrfs_fs_info *fs_info = left->fs_info;
3825 struct btrfs_disk_key disk_key; 3848 struct btrfs_disk_key disk_key;
3826 struct extent_buffer *right = path->nodes[0]; 3849 struct extent_buffer *right = path->nodes[0];
3827 int i; 3850 int i;
@@ -3849,7 +3872,8 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
3849 if (path->slots[0] < i) 3872 if (path->slots[0] < i)
3850 break; 3873 break;
3851 if (path->slots[0] == i) { 3874 if (path->slots[0] == i) {
3852 int space = btrfs_leaf_free_space(fs_info, right); 3875 int space = btrfs_leaf_free_space(right);
3876
3853 if (space + push_space * 2 > free_space) 3877 if (space + push_space * 2 > free_space)
3854 break; 3878 break;
3855 } 3879 }
@@ -3882,7 +3906,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
3882 btrfs_item_offset_nr(right, push_items - 1); 3906 btrfs_item_offset_nr(right, push_items - 1);
3883 3907
3884 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + 3908 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
3885 leaf_data_end(fs_info, left) - push_space, 3909 leaf_data_end(left) - push_space,
3886 BTRFS_LEAF_DATA_OFFSET + 3910 BTRFS_LEAF_DATA_OFFSET +
3887 btrfs_item_offset_nr(right, push_items - 1), 3911 btrfs_item_offset_nr(right, push_items - 1),
3888 push_space); 3912 push_space);
@@ -3909,11 +3933,11 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
3909 3933
3910 if (push_items < right_nritems) { 3934 if (push_items < right_nritems) {
3911 push_space = btrfs_item_offset_nr(right, push_items - 1) - 3935 push_space = btrfs_item_offset_nr(right, push_items - 1) -
3912 leaf_data_end(fs_info, right); 3936 leaf_data_end(right);
3913 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + 3937 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
3914 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 3938 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3915 BTRFS_LEAF_DATA_OFFSET + 3939 BTRFS_LEAF_DATA_OFFSET +
3916 leaf_data_end(fs_info, right), push_space); 3940 leaf_data_end(right), push_space);
3917 3941
3918 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 3942 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3919 btrfs_item_nr_offset(push_items), 3943 btrfs_item_nr_offset(push_items),
@@ -3935,7 +3959,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
3935 if (right_nritems) 3959 if (right_nritems)
3936 btrfs_mark_buffer_dirty(right); 3960 btrfs_mark_buffer_dirty(right);
3937 else 3961 else
3938 clean_tree_block(fs_info, right); 3962 btrfs_clean_tree_block(right);
3939 3963
3940 btrfs_item_key(right, &disk_key, 0); 3964 btrfs_item_key(right, &disk_key, 0);
3941 fixup_low_keys(path, &disk_key, 1); 3965 fixup_low_keys(path, &disk_key, 1);
@@ -3972,7 +3996,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3972 *root, struct btrfs_path *path, int min_data_size, 3996 *root, struct btrfs_path *path, int min_data_size,
3973 int data_size, int empty, u32 max_slot) 3997 int data_size, int empty, u32 max_slot)
3974{ 3998{
3975 struct btrfs_fs_info *fs_info = root->fs_info;
3976 struct extent_buffer *right = path->nodes[0]; 3999 struct extent_buffer *right = path->nodes[0];
3977 struct extent_buffer *left; 4000 struct extent_buffer *left;
3978 int slot; 4001 int slot;
@@ -3992,7 +4015,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3992 4015
3993 btrfs_assert_tree_locked(path->nodes[1]); 4016 btrfs_assert_tree_locked(path->nodes[1]);
3994 4017
3995 left = read_node_slot(fs_info, path->nodes[1], slot - 1); 4018 left = read_node_slot(path->nodes[1], slot - 1);
3996 /* 4019 /*
3997 * slot - 1 is not valid or we fail to read the left node, 4020 * slot - 1 is not valid or we fail to read the left node,
3998 * no big deal, just return. 4021 * no big deal, just return.
@@ -4003,7 +4026,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
4003 btrfs_tree_lock(left); 4026 btrfs_tree_lock(left);
4004 btrfs_set_lock_blocking_write(left); 4027 btrfs_set_lock_blocking_write(left);
4005 4028
4006 free_space = btrfs_leaf_free_space(fs_info, left); 4029 free_space = btrfs_leaf_free_space(left);
4007 if (free_space < data_size) { 4030 if (free_space < data_size) {
4008 ret = 1; 4031 ret = 1;
4009 goto out; 4032 goto out;
@@ -4019,13 +4042,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
4019 goto out; 4042 goto out;
4020 } 4043 }
4021 4044
4022 free_space = btrfs_leaf_free_space(fs_info, left); 4045 free_space = btrfs_leaf_free_space(left);
4023 if (free_space < data_size) { 4046 if (free_space < data_size) {
4024 ret = 1; 4047 ret = 1;
4025 goto out; 4048 goto out;
4026 } 4049 }
4027 4050
4028 return __push_leaf_left(fs_info, path, min_data_size, 4051 return __push_leaf_left(path, min_data_size,
4029 empty, left, free_space, right_nritems, 4052 empty, left, free_space, right_nritems,
4030 max_slot); 4053 max_slot);
4031out: 4054out:
@@ -4039,12 +4062,12 @@ out:
4039 * available for the resulting leaf level of the path. 4062 * available for the resulting leaf level of the path.
4040 */ 4063 */
4041static noinline void copy_for_split(struct btrfs_trans_handle *trans, 4064static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4042 struct btrfs_fs_info *fs_info,
4043 struct btrfs_path *path, 4065 struct btrfs_path *path,
4044 struct extent_buffer *l, 4066 struct extent_buffer *l,
4045 struct extent_buffer *right, 4067 struct extent_buffer *right,
4046 int slot, int mid, int nritems) 4068 int slot, int mid, int nritems)
4047{ 4069{
4070 struct btrfs_fs_info *fs_info = trans->fs_info;
4048 int data_copy_size; 4071 int data_copy_size;
4049 int rt_data_off; 4072 int rt_data_off;
4050 int i; 4073 int i;
@@ -4055,7 +4078,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4055 4078
4056 nritems = nritems - mid; 4079 nritems = nritems - mid;
4057 btrfs_set_header_nritems(right, nritems); 4080 btrfs_set_header_nritems(right, nritems);
4058 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(fs_info, l); 4081 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
4059 4082
4060 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 4083 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4061 btrfs_item_nr_offset(mid), 4084 btrfs_item_nr_offset(mid),
@@ -4064,7 +4087,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4064 copy_extent_buffer(right, l, 4087 copy_extent_buffer(right, l,
4065 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - 4088 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
4066 data_copy_size, BTRFS_LEAF_DATA_OFFSET + 4089 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
4067 leaf_data_end(fs_info, l), data_copy_size); 4090 leaf_data_end(l), data_copy_size);
4068 4091
4069 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); 4092 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
4070 4093
@@ -4079,8 +4102,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
4079 4102
4080 btrfs_set_header_nritems(l, mid); 4103 btrfs_set_header_nritems(l, mid);
4081 btrfs_item_key(right, &disk_key, 0); 4104 btrfs_item_key(right, &disk_key, 0);
4082 insert_ptr(trans, fs_info, path, &disk_key, right->start, 4105 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
4083 path->slots[1] + 1, 1);
4084 4106
4085 btrfs_mark_buffer_dirty(right); 4107 btrfs_mark_buffer_dirty(right);
4086 btrfs_mark_buffer_dirty(l); 4108 btrfs_mark_buffer_dirty(l);
@@ -4115,7 +4137,6 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4115 struct btrfs_path *path, 4137 struct btrfs_path *path,
4116 int data_size) 4138 int data_size)
4117{ 4139{
4118 struct btrfs_fs_info *fs_info = root->fs_info;
4119 int ret; 4140 int ret;
4120 int progress = 0; 4141 int progress = 0;
4121 int slot; 4142 int slot;
@@ -4124,7 +4145,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4124 4145
4125 slot = path->slots[0]; 4146 slot = path->slots[0];
4126 if (slot < btrfs_header_nritems(path->nodes[0])) 4147 if (slot < btrfs_header_nritems(path->nodes[0]))
4127 space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]); 4148 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
4128 4149
4129 /* 4150 /*
4130 * try to push all the items after our slot into the 4151 * try to push all the items after our slot into the
@@ -4145,14 +4166,14 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4145 if (path->slots[0] == 0 || path->slots[0] == nritems) 4166 if (path->slots[0] == 0 || path->slots[0] == nritems)
4146 return 0; 4167 return 0;
4147 4168
4148 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size) 4169 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
4149 return 0; 4170 return 0;
4150 4171
4151 /* try to push all the items before our slot into the next leaf */ 4172 /* try to push all the items before our slot into the next leaf */
4152 slot = path->slots[0]; 4173 slot = path->slots[0];
4153 space_needed = data_size; 4174 space_needed = data_size;
4154 if (slot > 0) 4175 if (slot > 0)
4155 space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]); 4176 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
4156 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); 4177 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
4157 if (ret < 0) 4178 if (ret < 0)
4158 return ret; 4179 return ret;
@@ -4201,7 +4222,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
4201 int space_needed = data_size; 4222 int space_needed = data_size;
4202 4223
4203 if (slot < btrfs_header_nritems(l)) 4224 if (slot < btrfs_header_nritems(l))
4204 space_needed -= btrfs_leaf_free_space(fs_info, l); 4225 space_needed -= btrfs_leaf_free_space(l);
4205 4226
4206 wret = push_leaf_right(trans, root, path, space_needed, 4227 wret = push_leaf_right(trans, root, path, space_needed,
4207 space_needed, 0, 0); 4228 space_needed, 0, 0);
@@ -4210,8 +4231,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
4210 if (wret) { 4231 if (wret) {
4211 space_needed = data_size; 4232 space_needed = data_size;
4212 if (slot > 0) 4233 if (slot > 0)
4213 space_needed -= btrfs_leaf_free_space(fs_info, 4234 space_needed -= btrfs_leaf_free_space(l);
4214 l);
4215 wret = push_leaf_left(trans, root, path, space_needed, 4235 wret = push_leaf_left(trans, root, path, space_needed,
4216 space_needed, 0, (u32)-1); 4236 space_needed, 0, (u32)-1);
4217 if (wret < 0) 4237 if (wret < 0)
@@ -4220,7 +4240,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
4220 l = path->nodes[0]; 4240 l = path->nodes[0];
4221 4241
4222 /* did the pushes work? */ 4242 /* did the pushes work? */
4223 if (btrfs_leaf_free_space(fs_info, l) >= data_size) 4243 if (btrfs_leaf_free_space(l) >= data_size)
4224 return 0; 4244 return 0;
4225 } 4245 }
4226 4246
@@ -4288,7 +4308,7 @@ again:
4288 if (split == 0) { 4308 if (split == 0) {
4289 if (mid <= slot) { 4309 if (mid <= slot) {
4290 btrfs_set_header_nritems(right, 0); 4310 btrfs_set_header_nritems(right, 0);
4291 insert_ptr(trans, fs_info, path, &disk_key, 4311 insert_ptr(trans, path, &disk_key,
4292 right->start, path->slots[1] + 1, 1); 4312 right->start, path->slots[1] + 1, 1);
4293 btrfs_tree_unlock(path->nodes[0]); 4313 btrfs_tree_unlock(path->nodes[0]);
4294 free_extent_buffer(path->nodes[0]); 4314 free_extent_buffer(path->nodes[0]);
@@ -4297,7 +4317,7 @@ again:
4297 path->slots[1] += 1; 4317 path->slots[1] += 1;
4298 } else { 4318 } else {
4299 btrfs_set_header_nritems(right, 0); 4319 btrfs_set_header_nritems(right, 0);
4300 insert_ptr(trans, fs_info, path, &disk_key, 4320 insert_ptr(trans, path, &disk_key,
4301 right->start, path->slots[1], 1); 4321 right->start, path->slots[1], 1);
4302 btrfs_tree_unlock(path->nodes[0]); 4322 btrfs_tree_unlock(path->nodes[0]);
4303 free_extent_buffer(path->nodes[0]); 4323 free_extent_buffer(path->nodes[0]);
@@ -4314,7 +4334,7 @@ again:
4314 return ret; 4334 return ret;
4315 } 4335 }
4316 4336
4317 copy_for_split(trans, fs_info, path, l, right, slot, mid, nritems); 4337 copy_for_split(trans, path, l, right, slot, mid, nritems);
4318 4338
4319 if (split == 2) { 4339 if (split == 2) {
4320 BUG_ON(num_doubles != 0); 4340 BUG_ON(num_doubles != 0);
@@ -4327,7 +4347,7 @@ again:
4327push_for_double: 4347push_for_double:
4328 push_for_double_split(trans, root, path, data_size); 4348 push_for_double_split(trans, root, path, data_size);
4329 tried_avoid_double = 1; 4349 tried_avoid_double = 1;
4330 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= data_size) 4350 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
4331 return 0; 4351 return 0;
4332 goto again; 4352 goto again;
4333} 4353}
@@ -4336,7 +4356,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4336 struct btrfs_root *root, 4356 struct btrfs_root *root,
4337 struct btrfs_path *path, int ins_len) 4357 struct btrfs_path *path, int ins_len)
4338{ 4358{
4339 struct btrfs_fs_info *fs_info = root->fs_info;
4340 struct btrfs_key key; 4359 struct btrfs_key key;
4341 struct extent_buffer *leaf; 4360 struct extent_buffer *leaf;
4342 struct btrfs_file_extent_item *fi; 4361 struct btrfs_file_extent_item *fi;
@@ -4350,7 +4369,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4350 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 4369 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4351 key.type != BTRFS_EXTENT_CSUM_KEY); 4370 key.type != BTRFS_EXTENT_CSUM_KEY);
4352 4371
4353 if (btrfs_leaf_free_space(fs_info, leaf) >= ins_len) 4372 if (btrfs_leaf_free_space(leaf) >= ins_len)
4354 return 0; 4373 return 0;
4355 4374
4356 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 4375 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -4377,7 +4396,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4377 goto err; 4396 goto err;
4378 4397
4379 /* the leaf has changed, it now has room. return now */ 4398 /* the leaf has changed, it now has room. return now */
4380 if (btrfs_leaf_free_space(fs_info, path->nodes[0]) >= ins_len) 4399 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
4381 goto err; 4400 goto err;
4382 4401
4383 if (key.type == BTRFS_EXTENT_DATA_KEY) { 4402 if (key.type == BTRFS_EXTENT_DATA_KEY) {
@@ -4400,8 +4419,7 @@ err:
4400 return ret; 4419 return ret;
4401} 4420}
4402 4421
4403static noinline int split_item(struct btrfs_fs_info *fs_info, 4422static noinline int split_item(struct btrfs_path *path,
4404 struct btrfs_path *path,
4405 const struct btrfs_key *new_key, 4423 const struct btrfs_key *new_key,
4406 unsigned long split_offset) 4424 unsigned long split_offset)
4407{ 4425{
@@ -4416,7 +4434,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
4416 struct btrfs_disk_key disk_key; 4434 struct btrfs_disk_key disk_key;
4417 4435
4418 leaf = path->nodes[0]; 4436 leaf = path->nodes[0];
4419 BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < sizeof(struct btrfs_item)); 4437 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
4420 4438
4421 btrfs_set_path_blocking(path); 4439 btrfs_set_path_blocking(path);
4422 4440
@@ -4465,7 +4483,7 @@ static noinline int split_item(struct btrfs_fs_info *fs_info,
4465 item_size - split_offset); 4483 item_size - split_offset);
4466 btrfs_mark_buffer_dirty(leaf); 4484 btrfs_mark_buffer_dirty(leaf);
4467 4485
4468 BUG_ON(btrfs_leaf_free_space(fs_info, leaf) < 0); 4486 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
4469 kfree(buf); 4487 kfree(buf);
4470 return 0; 4488 return 0;
4471} 4489}
@@ -4497,7 +4515,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
4497 if (ret) 4515 if (ret)
4498 return ret; 4516 return ret;
4499 4517
4500 ret = split_item(root->fs_info, path, new_key, split_offset); 4518 ret = split_item(path, new_key, split_offset);
4501 return ret; 4519 return ret;
4502} 4520}
4503 4521
@@ -4543,8 +4561,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4543 * off the end of the item or if we shift the item to chop bytes off 4561 * off the end of the item or if we shift the item to chop bytes off
4544 * the front. 4562 * the front.
4545 */ 4563 */
4546void btrfs_truncate_item(struct btrfs_fs_info *fs_info, 4564void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
4547 struct btrfs_path *path, u32 new_size, int from_end)
4548{ 4565{
4549 int slot; 4566 int slot;
4550 struct extent_buffer *leaf; 4567 struct extent_buffer *leaf;
@@ -4567,7 +4584,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
4567 return; 4584 return;
4568 4585
4569 nritems = btrfs_header_nritems(leaf); 4586 nritems = btrfs_header_nritems(leaf);
4570 data_end = leaf_data_end(fs_info, leaf); 4587 data_end = leaf_data_end(leaf);
4571 4588
4572 old_data_start = btrfs_item_offset_nr(leaf, slot); 4589 old_data_start = btrfs_item_offset_nr(leaf, slot);
4573 4590
@@ -4633,7 +4650,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
4633 btrfs_set_item_size(leaf, item, new_size); 4650 btrfs_set_item_size(leaf, item, new_size);
4634 btrfs_mark_buffer_dirty(leaf); 4651 btrfs_mark_buffer_dirty(leaf);
4635 4652
4636 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4653 if (btrfs_leaf_free_space(leaf) < 0) {
4637 btrfs_print_leaf(leaf); 4654 btrfs_print_leaf(leaf);
4638 BUG(); 4655 BUG();
4639 } 4656 }
@@ -4642,8 +4659,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
4642/* 4659/*
4643 * make the item pointed to by the path bigger, data_size is the added size. 4660 * make the item pointed to by the path bigger, data_size is the added size.
4644 */ 4661 */
4645void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 4662void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
4646 u32 data_size)
4647{ 4663{
4648 int slot; 4664 int slot;
4649 struct extent_buffer *leaf; 4665 struct extent_buffer *leaf;
@@ -4660,9 +4676,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
4660 leaf = path->nodes[0]; 4676 leaf = path->nodes[0];
4661 4677
4662 nritems = btrfs_header_nritems(leaf); 4678 nritems = btrfs_header_nritems(leaf);
4663 data_end = leaf_data_end(fs_info, leaf); 4679 data_end = leaf_data_end(leaf);
4664 4680
4665 if (btrfs_leaf_free_space(fs_info, leaf) < data_size) { 4681 if (btrfs_leaf_free_space(leaf) < data_size) {
4666 btrfs_print_leaf(leaf); 4682 btrfs_print_leaf(leaf);
4667 BUG(); 4683 BUG();
4668 } 4684 }
@@ -4672,9 +4688,9 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
4672 BUG_ON(slot < 0); 4688 BUG_ON(slot < 0);
4673 if (slot >= nritems) { 4689 if (slot >= nritems) {
4674 btrfs_print_leaf(leaf); 4690 btrfs_print_leaf(leaf);
4675 btrfs_crit(fs_info, "slot %d too large, nritems %d", 4691 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
4676 slot, nritems); 4692 slot, nritems);
4677 BUG_ON(1); 4693 BUG();
4678 } 4694 }
4679 4695
4680 /* 4696 /*
@@ -4701,7 +4717,7 @@ void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
4701 btrfs_set_item_size(leaf, item, old_size + data_size); 4717 btrfs_set_item_size(leaf, item, old_size + data_size);
4702 btrfs_mark_buffer_dirty(leaf); 4718 btrfs_mark_buffer_dirty(leaf);
4703 4719
4704 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4720 if (btrfs_leaf_free_space(leaf) < 0) {
4705 btrfs_print_leaf(leaf); 4721 btrfs_print_leaf(leaf);
4706 BUG(); 4722 BUG();
4707 } 4723 }
@@ -4738,12 +4754,12 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4738 slot = path->slots[0]; 4754 slot = path->slots[0];
4739 4755
4740 nritems = btrfs_header_nritems(leaf); 4756 nritems = btrfs_header_nritems(leaf);
4741 data_end = leaf_data_end(fs_info, leaf); 4757 data_end = leaf_data_end(leaf);
4742 4758
4743 if (btrfs_leaf_free_space(fs_info, leaf) < total_size) { 4759 if (btrfs_leaf_free_space(leaf) < total_size) {
4744 btrfs_print_leaf(leaf); 4760 btrfs_print_leaf(leaf);
4745 btrfs_crit(fs_info, "not enough freespace need %u have %d", 4761 btrfs_crit(fs_info, "not enough freespace need %u have %d",
4746 total_size, btrfs_leaf_free_space(fs_info, leaf)); 4762 total_size, btrfs_leaf_free_space(leaf));
4747 BUG(); 4763 BUG();
4748 } 4764 }
4749 4765
@@ -4754,7 +4770,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4754 btrfs_print_leaf(leaf); 4770 btrfs_print_leaf(leaf);
4755 btrfs_crit(fs_info, "slot %d old_data %d data_end %d", 4771 btrfs_crit(fs_info, "slot %d old_data %d data_end %d",
4756 slot, old_data, data_end); 4772 slot, old_data, data_end);
4757 BUG_ON(1); 4773 BUG();
4758 } 4774 }
4759 /* 4775 /*
4760 * item0..itemN ... dataN.offset..dataN.size .. data0.size 4776 * item0..itemN ... dataN.offset..dataN.size .. data0.size
@@ -4794,7 +4810,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4794 btrfs_set_header_nritems(leaf, nritems + nr); 4810 btrfs_set_header_nritems(leaf, nritems + nr);
4795 btrfs_mark_buffer_dirty(leaf); 4811 btrfs_mark_buffer_dirty(leaf);
4796 4812
4797 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4813 if (btrfs_leaf_free_space(leaf) < 0) {
4798 btrfs_print_leaf(leaf); 4814 btrfs_print_leaf(leaf);
4799 BUG(); 4815 BUG();
4800 } 4816 }
@@ -4966,7 +4982,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4966 nritems = btrfs_header_nritems(leaf); 4982 nritems = btrfs_header_nritems(leaf);
4967 4983
4968 if (slot + nr != nritems) { 4984 if (slot + nr != nritems) {
4969 int data_end = leaf_data_end(fs_info, leaf); 4985 int data_end = leaf_data_end(leaf);
4970 4986
4971 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 4987 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4972 data_end + dsize, 4988 data_end + dsize,
@@ -4996,7 +5012,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4996 btrfs_set_header_level(leaf, 0); 5012 btrfs_set_header_level(leaf, 0);
4997 } else { 5013 } else {
4998 btrfs_set_path_blocking(path); 5014 btrfs_set_path_blocking(path);
4999 clean_tree_block(fs_info, leaf); 5015 btrfs_clean_tree_block(leaf);
5000 btrfs_del_leaf(trans, root, path, leaf); 5016 btrfs_del_leaf(trans, root, path, leaf);
5001 } 5017 }
5002 } else { 5018 } else {
@@ -5126,7 +5142,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
5126 struct btrfs_path *path, 5142 struct btrfs_path *path,
5127 u64 min_trans) 5143 u64 min_trans)
5128{ 5144{
5129 struct btrfs_fs_info *fs_info = root->fs_info;
5130 struct extent_buffer *cur; 5145 struct extent_buffer *cur;
5131 struct btrfs_key found_key; 5146 struct btrfs_key found_key;
5132 int slot; 5147 int slot;
@@ -5207,7 +5222,7 @@ find_next_key:
5207 goto out; 5222 goto out;
5208 } 5223 }
5209 btrfs_set_path_blocking(path); 5224 btrfs_set_path_blocking(path);
5210 cur = read_node_slot(fs_info, cur, slot); 5225 cur = read_node_slot(cur, slot);
5211 if (IS_ERR(cur)) { 5226 if (IS_ERR(cur)) {
5212 ret = PTR_ERR(cur); 5227 ret = PTR_ERR(cur);
5213 goto out; 5228 goto out;
@@ -5229,14 +5244,12 @@ out:
5229 return ret; 5244 return ret;
5230} 5245}
5231 5246
5232static int tree_move_down(struct btrfs_fs_info *fs_info, 5247static int tree_move_down(struct btrfs_path *path, int *level)
5233 struct btrfs_path *path,
5234 int *level)
5235{ 5248{
5236 struct extent_buffer *eb; 5249 struct extent_buffer *eb;
5237 5250
5238 BUG_ON(*level == 0); 5251 BUG_ON(*level == 0);
5239 eb = read_node_slot(fs_info, path->nodes[*level], path->slots[*level]); 5252 eb = read_node_slot(path->nodes[*level], path->slots[*level]);
5240 if (IS_ERR(eb)) 5253 if (IS_ERR(eb))
5241 return PTR_ERR(eb); 5254 return PTR_ERR(eb);
5242 5255
@@ -5276,8 +5289,7 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,
5276 * Returns 1 if it had to move up and next. 0 is returned if it moved only next 5289 * Returns 1 if it had to move up and next. 0 is returned if it moved only next
5277 * or down. 5290 * or down.
5278 */ 5291 */
5279static int tree_advance(struct btrfs_fs_info *fs_info, 5292static int tree_advance(struct btrfs_path *path,
5280 struct btrfs_path *path,
5281 int *level, int root_level, 5293 int *level, int root_level,
5282 int allow_down, 5294 int allow_down,
5283 struct btrfs_key *key) 5295 struct btrfs_key *key)
@@ -5287,7 +5299,7 @@ static int tree_advance(struct btrfs_fs_info *fs_info,
5287 if (*level == 0 || !allow_down) { 5299 if (*level == 0 || !allow_down) {
5288 ret = tree_move_next_or_upnext(path, level, root_level); 5300 ret = tree_move_next_or_upnext(path, level, root_level);
5289 } else { 5301 } else {
5290 ret = tree_move_down(fs_info, path, level); 5302 ret = tree_move_down(path, level);
5291 } 5303 }
5292 if (ret >= 0) { 5304 if (ret >= 0) {
5293 if (*level == 0) 5305 if (*level == 0)
@@ -5464,7 +5476,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5464 5476
5465 while (1) { 5477 while (1) {
5466 if (advance_left && !left_end_reached) { 5478 if (advance_left && !left_end_reached) {
5467 ret = tree_advance(fs_info, left_path, &left_level, 5479 ret = tree_advance(left_path, &left_level,
5468 left_root_level, 5480 left_root_level,
5469 advance_left != ADVANCE_ONLY_NEXT, 5481 advance_left != ADVANCE_ONLY_NEXT,
5470 &left_key); 5482 &left_key);
@@ -5475,7 +5487,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5475 advance_left = 0; 5487 advance_left = 0;
5476 } 5488 }
5477 if (advance_right && !right_end_reached) { 5489 if (advance_right && !right_end_reached) {
5478 ret = tree_advance(fs_info, right_path, &right_level, 5490 ret = tree_advance(right_path, &right_level,
5479 right_root_level, 5491 right_root_level,
5480 advance_right != ADVANCE_ONLY_NEXT, 5492 advance_right != ADVANCE_ONLY_NEXT,
5481 &right_key); 5493 &right_key);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5260a9263d73..0a61dff27f57 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
41extern struct kmem_cache *btrfs_path_cachep; 41extern struct kmem_cache *btrfs_path_cachep;
42extern struct kmem_cache *btrfs_free_space_cachep; 42extern struct kmem_cache *btrfs_free_space_cachep;
43struct btrfs_ordered_sum; 43struct btrfs_ordered_sum;
44struct btrfs_ref;
44 45
45#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ 46#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
46 47
@@ -1015,6 +1016,7 @@ struct btrfs_fs_info {
1015 /* used to keep from writing metadata until there is a nice batch */ 1016 /* used to keep from writing metadata until there is a nice batch */
1016 struct percpu_counter dirty_metadata_bytes; 1017 struct percpu_counter dirty_metadata_bytes;
1017 struct percpu_counter delalloc_bytes; 1018 struct percpu_counter delalloc_bytes;
1019 struct percpu_counter dio_bytes;
1018 s32 dirty_metadata_batch; 1020 s32 dirty_metadata_batch;
1019 s32 delalloc_batch; 1021 s32 delalloc_batch;
1020 1022
@@ -1092,10 +1094,7 @@ struct btrfs_fs_info {
1092 1094
1093 /* holds configuration and tracking. Protected by qgroup_lock */ 1095 /* holds configuration and tracking. Protected by qgroup_lock */
1094 struct rb_root qgroup_tree; 1096 struct rb_root qgroup_tree;
1095 struct rb_root qgroup_op_tree;
1096 spinlock_t qgroup_lock; 1097 spinlock_t qgroup_lock;
1097 spinlock_t qgroup_op_lock;
1098 atomic_t qgroup_op_seq;
1099 1098
1100 /* 1099 /*
1101 * used to avoid frequently calling ulist_alloc()/ulist_free() 1100 * used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1152,12 +1151,6 @@ struct btrfs_fs_info {
1152 struct mutex unused_bg_unpin_mutex; 1151 struct mutex unused_bg_unpin_mutex;
1153 struct mutex delete_unused_bgs_mutex; 1152 struct mutex delete_unused_bgs_mutex;
1154 1153
1155 /*
1156 * Chunks that can't be freed yet (under a trim/discard operation)
1157 * and will be latter freed. Protected by fs_info->chunk_mutex.
1158 */
1159 struct list_head pinned_chunks;
1160
1161 /* Cached block sizes */ 1154 /* Cached block sizes */
1162 u32 nodesize; 1155 u32 nodesize;
1163 u32 sectorsize; 1156 u32 sectorsize;
@@ -1348,6 +1341,12 @@ struct btrfs_root {
1348 * manipulation with the read-only status via SUBVOL_SETFLAGS 1341 * manipulation with the read-only status via SUBVOL_SETFLAGS
1349 */ 1342 */
1350 int send_in_progress; 1343 int send_in_progress;
1344 /*
1345 * Number of currently running deduplication operations that have a
1346 * destination inode belonging to this root. Protected by the lock
1347 * root_item_lock.
1348 */
1349 int dedupe_in_progress;
1351 struct btrfs_subvolume_writers *subv_writers; 1350 struct btrfs_subvolume_writers *subv_writers;
1352 atomic_t will_be_snapshotted; 1351 atomic_t will_be_snapshotted;
1353 atomic_t snapshot_force_cow; 1352 atomic_t snapshot_force_cow;
@@ -1540,6 +1539,21 @@ do { \
1540 1539
1541#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) 1540#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1542 1541
1542#define BTRFS_INODE_FLAG_MASK \
1543 (BTRFS_INODE_NODATASUM | \
1544 BTRFS_INODE_NODATACOW | \
1545 BTRFS_INODE_READONLY | \
1546 BTRFS_INODE_NOCOMPRESS | \
1547 BTRFS_INODE_PREALLOC | \
1548 BTRFS_INODE_SYNC | \
1549 BTRFS_INODE_IMMUTABLE | \
1550 BTRFS_INODE_APPEND | \
1551 BTRFS_INODE_NODUMP | \
1552 BTRFS_INODE_NOATIME | \
1553 BTRFS_INODE_DIRSYNC | \
1554 BTRFS_INODE_COMPRESS | \
1555 BTRFS_INODE_ROOT_ITEM_INIT)
1556
1543struct btrfs_map_token { 1557struct btrfs_map_token {
1544 const struct extent_buffer *eb; 1558 const struct extent_buffer *eb;
1545 char *kaddr; 1559 char *kaddr;
@@ -2163,18 +2177,16 @@ static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
2163 return (btrfs_header_flags(eb) & flag) == flag; 2177 return (btrfs_header_flags(eb) & flag) == flag;
2164} 2178}
2165 2179
2166static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) 2180static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
2167{ 2181{
2168 u64 flags = btrfs_header_flags(eb); 2182 u64 flags = btrfs_header_flags(eb);
2169 btrfs_set_header_flags(eb, flags | flag); 2183 btrfs_set_header_flags(eb, flags | flag);
2170 return (flags & flag) == flag;
2171} 2184}
2172 2185
2173static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) 2186static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
2174{ 2187{
2175 u64 flags = btrfs_header_flags(eb); 2188 u64 flags = btrfs_header_flags(eb);
2176 btrfs_set_header_flags(eb, flags & ~flag); 2189 btrfs_set_header_flags(eb, flags & ~flag);
2177 return (flags & flag) == flag;
2178} 2190}
2179 2191
2180static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) 2192static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
@@ -2445,13 +2457,12 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
2445 * this returns the address of the start of the last item, 2457 * this returns the address of the start of the last item,
2446 * which is the stop of the leaf data stack 2458 * which is the stop of the leaf data stack
2447 */ 2459 */
2448static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info, 2460static inline unsigned int leaf_data_end(const struct extent_buffer *leaf)
2449 const struct extent_buffer *leaf)
2450{ 2461{
2451 u32 nr = btrfs_header_nritems(leaf); 2462 u32 nr = btrfs_header_nritems(leaf);
2452 2463
2453 if (nr == 0) 2464 if (nr == 0)
2454 return BTRFS_LEAF_DATA_SIZE(fs_info); 2465 return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
2455 return btrfs_item_offset_nr(leaf, nr - 1); 2466 return btrfs_item_offset_nr(leaf, nr - 1);
2456} 2467}
2457 2468
@@ -2698,8 +2709,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
2698void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2709void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
2699int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2710int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2700 unsigned long count); 2711 unsigned long count);
2701int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
2702 unsigned long count, u64 transid, int wait);
2703void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, 2712void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2704 struct btrfs_delayed_ref_root *delayed_refs, 2713 struct btrfs_delayed_ref_root *delayed_refs,
2705 struct btrfs_delayed_ref_head *head); 2714 struct btrfs_delayed_ref_head *head);
@@ -2711,8 +2720,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2711 u64 bytenr, u64 num, int reserved); 2720 u64 bytenr, u64 num, int reserved);
2712int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, 2721int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
2713 u64 bytenr, u64 num_bytes); 2722 u64 bytenr, u64 num_bytes);
2714int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, 2723int btrfs_exclude_logged_extents(struct extent_buffer *eb);
2715 struct extent_buffer *eb);
2716int btrfs_cross_ref_exist(struct btrfs_root *root, 2724int btrfs_cross_ref_exist(struct btrfs_root *root,
2717 u64 objectid, u64 offset, u64 bytenr); 2725 u64 objectid, u64 offset, u64 bytenr);
2718struct btrfs_block_group_cache *btrfs_lookup_block_group( 2726struct btrfs_block_group_cache *btrfs_lookup_block_group(
@@ -2745,13 +2753,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2745int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2753int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2746 struct extent_buffer *buf, int full_backref); 2754 struct extent_buffer *buf, int full_backref);
2747int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2755int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2748 struct btrfs_fs_info *fs_info,
2749 u64 bytenr, u64 num_bytes, u64 flags, 2756 u64 bytenr, u64 num_bytes, u64 flags,
2750 int level, int is_data); 2757 int level, int is_data);
2751int btrfs_free_extent(struct btrfs_trans_handle *trans, 2758int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
2752 struct btrfs_root *root,
2753 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
2754 u64 owner, u64 offset);
2755 2759
2756int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, 2760int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
2757 u64 start, u64 len, int delalloc); 2761 u64 start, u64 len, int delalloc);
@@ -2760,15 +2764,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
2760void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); 2764void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
2761int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); 2765int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
2762int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2766int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2763 struct btrfs_root *root, 2767 struct btrfs_ref *generic_ref);
2764 u64 bytenr, u64 num_bytes, u64 parent,
2765 u64 root_objectid, u64 owner, u64 offset);
2766 2768
2767int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); 2769int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
2768int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2770int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
2769 struct btrfs_fs_info *fs_info); 2771int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
2770int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
2771 struct btrfs_fs_info *fs_info);
2772int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr); 2772int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
2773int btrfs_free_block_groups(struct btrfs_fs_info *info); 2773int btrfs_free_block_groups(struct btrfs_fs_info *info);
2774int btrfs_read_block_groups(struct btrfs_fs_info *info); 2774int btrfs_read_block_groups(struct btrfs_fs_info *info);
@@ -2936,10 +2936,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
2936 struct extent_buffer **cow_ret, u64 new_root_objectid); 2936 struct extent_buffer **cow_ret, u64 new_root_objectid);
2937int btrfs_block_can_be_shared(struct btrfs_root *root, 2937int btrfs_block_can_be_shared(struct btrfs_root *root,
2938 struct extent_buffer *buf); 2938 struct extent_buffer *buf);
2939void btrfs_extend_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2939void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
2940 u32 data_size); 2940void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
2941void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
2942 struct btrfs_path *path, u32 new_size, int from_end);
2943int btrfs_split_item(struct btrfs_trans_handle *trans, 2941int btrfs_split_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root, 2942 struct btrfs_root *root,
2945 struct btrfs_path *path, 2943 struct btrfs_path *path,
@@ -3015,8 +3013,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
3015{ 3013{
3016 return btrfs_next_old_item(root, p, 0); 3014 return btrfs_next_old_item(root, p, 0);
3017} 3015}
3018int btrfs_leaf_free_space(struct btrfs_fs_info *fs_info, 3016int btrfs_leaf_free_space(struct extent_buffer *leaf);
3019 struct extent_buffer *leaf);
3020int __must_check btrfs_drop_snapshot(struct btrfs_root *root, 3017int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
3021 struct btrfs_block_rsv *block_rsv, 3018 struct btrfs_block_rsv *block_rsv,
3022 int update_ref, int for_reloc); 3019 int update_ref, int for_reloc);
@@ -3756,8 +3753,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3756void btrfs_scrub_pause(struct btrfs_fs_info *fs_info); 3753void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
3757void btrfs_scrub_continue(struct btrfs_fs_info *fs_info); 3754void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
3758int btrfs_scrub_cancel(struct btrfs_fs_info *info); 3755int btrfs_scrub_cancel(struct btrfs_fs_info *info);
3759int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, 3756int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
3760 struct btrfs_device *dev);
3761int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, 3757int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
3762 struct btrfs_scrub_progress *progress); 3758 struct btrfs_scrub_progress *progress);
3763static inline void btrfs_init_full_stripe_locks_tree( 3759static inline void btrfs_init_full_stripe_locks_tree(
@@ -3806,6 +3802,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
3806 return signal_pending(current); 3802 return signal_pending(current);
3807} 3803}
3808 3804
3805#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
3806
3809/* Sanity test specific functions */ 3807/* Sanity test specific functions */
3810#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3808#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3811void btrfs_test_inode_set_ops(struct inode *inode); 3809void btrfs_test_inode_set_ops(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c669f250d4a0..43fdb2992956 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -691,7 +691,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
691 struct btrfs_path *path, 691 struct btrfs_path *path,
692 struct btrfs_delayed_item *item) 692 struct btrfs_delayed_item *item)
693{ 693{
694 struct btrfs_fs_info *fs_info = root->fs_info;
695 struct btrfs_delayed_item *curr, *next; 694 struct btrfs_delayed_item *curr, *next;
696 int free_space; 695 int free_space;
697 int total_data_size = 0, total_size = 0; 696 int total_data_size = 0, total_size = 0;
@@ -708,7 +707,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
708 BUG_ON(!path->nodes[0]); 707 BUG_ON(!path->nodes[0]);
709 708
710 leaf = path->nodes[0]; 709 leaf = path->nodes[0];
711 free_space = btrfs_leaf_free_space(fs_info, leaf); 710 free_space = btrfs_leaf_free_space(leaf);
712 INIT_LIST_HEAD(&head); 711 INIT_LIST_HEAD(&head);
713 712
714 next = item; 713 next = item;
@@ -1692,7 +1691,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1692 name = (char *)(di + 1); 1691 name = (char *)(di + 1);
1693 name_len = btrfs_stack_dir_name_len(di); 1692 name_len = btrfs_stack_dir_name_len(di);
1694 1693
1695 d_type = btrfs_filetype_table[di->type]; 1694 d_type = fs_ftype_to_dtype(di->type);
1696 btrfs_disk_key_to_cpu(&location, &di->location); 1695 btrfs_disk_key_to_cpu(&location, &di->location);
1697 1696
1698 over = !dir_emit(ctx, name, name_len, 1697 over = !dir_emit(ctx, name, name_len,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 7d2a413df90d..a73fc23e2961 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -735,8 +735,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
735 * transaction commits. 735 * transaction commits.
736 */ 736 */
737int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 737int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
738 u64 bytenr, u64 num_bytes, u64 parent, 738 struct btrfs_ref *generic_ref,
739 u64 ref_root, int level, int action,
740 struct btrfs_delayed_extent_op *extent_op, 739 struct btrfs_delayed_extent_op *extent_op,
741 int *old_ref_mod, int *new_ref_mod) 740 int *old_ref_mod, int *new_ref_mod)
742{ 741{
@@ -746,10 +745,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
746 struct btrfs_delayed_ref_root *delayed_refs; 745 struct btrfs_delayed_ref_root *delayed_refs;
747 struct btrfs_qgroup_extent_record *record = NULL; 746 struct btrfs_qgroup_extent_record *record = NULL;
748 int qrecord_inserted; 747 int qrecord_inserted;
749 bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); 748 bool is_system;
749 int action = generic_ref->action;
750 int level = generic_ref->tree_ref.level;
750 int ret; 751 int ret;
752 u64 bytenr = generic_ref->bytenr;
753 u64 num_bytes = generic_ref->len;
754 u64 parent = generic_ref->parent;
751 u8 ref_type; 755 u8 ref_type;
752 756
757 is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
758
759 ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
753 BUG_ON(extent_op && extent_op->is_data); 760 BUG_ON(extent_op && extent_op->is_data);
754 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 761 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
755 if (!ref) 762 if (!ref)
@@ -762,7 +769,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
762 } 769 }
763 770
764 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 771 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
765 is_fstree(ref_root)) { 772 is_fstree(generic_ref->real_root) &&
773 is_fstree(generic_ref->tree_ref.root) &&
774 !generic_ref->skip_qgroup) {
766 record = kzalloc(sizeof(*record), GFP_NOFS); 775 record = kzalloc(sizeof(*record), GFP_NOFS);
767 if (!record) { 776 if (!record) {
768 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 777 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -777,13 +786,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
777 ref_type = BTRFS_TREE_BLOCK_REF_KEY; 786 ref_type = BTRFS_TREE_BLOCK_REF_KEY;
778 787
779 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 788 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
780 ref_root, action, ref_type); 789 generic_ref->tree_ref.root, action, ref_type);
781 ref->root = ref_root; 790 ref->root = generic_ref->tree_ref.root;
782 ref->parent = parent; 791 ref->parent = parent;
783 ref->level = level; 792 ref->level = level;
784 793
785 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, 794 init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
786 ref_root, 0, action, false, is_system); 795 generic_ref->tree_ref.root, 0, action, false,
796 is_system);
787 head_ref->extent_op = extent_op; 797 head_ref->extent_op = extent_op;
788 798
789 delayed_refs = &trans->transaction->delayed_refs; 799 delayed_refs = &trans->transaction->delayed_refs;
@@ -822,10 +832,9 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
822 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. 832 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
823 */ 833 */
824int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 834int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
825 u64 bytenr, u64 num_bytes, 835 struct btrfs_ref *generic_ref,
826 u64 parent, u64 ref_root, 836 u64 reserved, int *old_ref_mod,
827 u64 owner, u64 offset, u64 reserved, int action, 837 int *new_ref_mod)
828 int *old_ref_mod, int *new_ref_mod)
829{ 838{
830 struct btrfs_fs_info *fs_info = trans->fs_info; 839 struct btrfs_fs_info *fs_info = trans->fs_info;
831 struct btrfs_delayed_data_ref *ref; 840 struct btrfs_delayed_data_ref *ref;
@@ -833,9 +842,17 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
833 struct btrfs_delayed_ref_root *delayed_refs; 842 struct btrfs_delayed_ref_root *delayed_refs;
834 struct btrfs_qgroup_extent_record *record = NULL; 843 struct btrfs_qgroup_extent_record *record = NULL;
835 int qrecord_inserted; 844 int qrecord_inserted;
845 int action = generic_ref->action;
836 int ret; 846 int ret;
847 u64 bytenr = generic_ref->bytenr;
848 u64 num_bytes = generic_ref->len;
849 u64 parent = generic_ref->parent;
850 u64 ref_root = generic_ref->data_ref.ref_root;
851 u64 owner = generic_ref->data_ref.ino;
852 u64 offset = generic_ref->data_ref.offset;
837 u8 ref_type; 853 u8 ref_type;
838 854
855 ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
839 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); 856 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
840 if (!ref) 857 if (!ref)
841 return -ENOMEM; 858 return -ENOMEM;
@@ -859,7 +876,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
859 } 876 }
860 877
861 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 878 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
862 is_fstree(ref_root)) { 879 is_fstree(ref_root) &&
880 is_fstree(generic_ref->real_root) &&
881 !generic_ref->skip_qgroup) {
863 record = kzalloc(sizeof(*record), GFP_NOFS); 882 record = kzalloc(sizeof(*record), GFP_NOFS);
864 if (!record) { 883 if (!record) {
865 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 884 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -905,8 +924,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
905 return 0; 924 return 0;
906} 925}
907 926
908int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 927int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
909 struct btrfs_trans_handle *trans,
910 u64 bytenr, u64 num_bytes, 928 u64 bytenr, u64 num_bytes,
911 struct btrfs_delayed_extent_op *extent_op) 929 struct btrfs_delayed_extent_op *extent_op)
912{ 930{
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70606da440aa..c18f93ea88ed 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -176,6 +176,83 @@ struct btrfs_delayed_ref_root {
176 u64 qgroup_to_skip; 176 u64 qgroup_to_skip;
177}; 177};
178 178
179enum btrfs_ref_type {
180 BTRFS_REF_NOT_SET,
181 BTRFS_REF_DATA,
182 BTRFS_REF_METADATA,
183 BTRFS_REF_LAST,
184};
185
186struct btrfs_data_ref {
187 /* For EXTENT_DATA_REF */
188
189 /* Root which refers to this data extent */
190 u64 ref_root;
191
192 /* Inode which refers to this data extent */
193 u64 ino;
194
195 /*
196 * file_offset - extent_offset
197 *
198 * file_offset is the key.offset of the EXTENT_DATA key.
199 * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
200 */
201 u64 offset;
202};
203
204struct btrfs_tree_ref {
205 /*
206 * Level of this tree block
207 *
208 * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
209 */
210 int level;
211
212 /*
213 * Root which refers to this tree block.
214 *
215 * For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
216 */
217 u64 root;
218
219 /* For non-skinny metadata, no special member needed */
220};
221
222struct btrfs_ref {
223 enum btrfs_ref_type type;
224 int action;
225
226 /*
227 * Whether this extent should go through qgroup record.
228 *
229 * Normally false, but for certain cases like delayed subtree scan,
230 * setting this flag can hugely reduce qgroup overhead.
231 */
232 bool skip_qgroup;
233
234 /*
235 * Optional. For which root is this modification.
236 * Mostly used for qgroup optimization.
237 *
238 * When unset, data/tree ref init code will populate it.
239 * In certain cases, we're modifying reference for a different root.
240 * E.g. COW fs tree blocks for balance.
241 * In that case, tree_ref::root will be fs tree, but we're doing this
242 * for reloc tree, then we should set @real_root to reloc tree.
243 */
244 u64 real_root;
245 u64 bytenr;
246 u64 len;
247
248 /* Bytenr of the parent tree block */
249 u64 parent;
250 union {
251 struct btrfs_data_ref data_ref;
252 struct btrfs_tree_ref tree_ref;
253 };
254};
255
179extern struct kmem_cache *btrfs_delayed_ref_head_cachep; 256extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
180extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; 257extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
181extern struct kmem_cache *btrfs_delayed_data_ref_cachep; 258extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
@@ -184,6 +261,38 @@ extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
184int __init btrfs_delayed_ref_init(void); 261int __init btrfs_delayed_ref_init(void);
185void __cold btrfs_delayed_ref_exit(void); 262void __cold btrfs_delayed_ref_exit(void);
186 263
264static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
265 int action, u64 bytenr, u64 len, u64 parent)
266{
267 generic_ref->action = action;
268 generic_ref->bytenr = bytenr;
269 generic_ref->len = len;
270 generic_ref->parent = parent;
271}
272
273static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
274 int level, u64 root)
275{
276 /* If @real_root not set, use @root as fallback */
277 if (!generic_ref->real_root)
278 generic_ref->real_root = root;
279 generic_ref->tree_ref.level = level;
280 generic_ref->tree_ref.root = root;
281 generic_ref->type = BTRFS_REF_METADATA;
282}
283
284static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
285 u64 ref_root, u64 ino, u64 offset)
286{
287 /* If @real_root not set, use @root as fallback */
288 if (!generic_ref->real_root)
289 generic_ref->real_root = ref_root;
290 generic_ref->data_ref.ref_root = ref_root;
291 generic_ref->data_ref.ino = ino;
292 generic_ref->data_ref.offset = offset;
293 generic_ref->type = BTRFS_REF_DATA;
294}
295
187static inline struct btrfs_delayed_extent_op * 296static inline struct btrfs_delayed_extent_op *
188btrfs_alloc_delayed_extent_op(void) 297btrfs_alloc_delayed_extent_op(void)
189{ 298{
@@ -224,17 +333,14 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
224} 333}
225 334
226int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 335int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
227 u64 bytenr, u64 num_bytes, u64 parent, 336 struct btrfs_ref *generic_ref,
228 u64 ref_root, int level, int action,
229 struct btrfs_delayed_extent_op *extent_op, 337 struct btrfs_delayed_extent_op *extent_op,
230 int *old_ref_mod, int *new_ref_mod); 338 int *old_ref_mod, int *new_ref_mod);
231int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 339int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
232 u64 bytenr, u64 num_bytes, 340 struct btrfs_ref *generic_ref,
233 u64 parent, u64 ref_root, 341 u64 reserved, int *old_ref_mod,
234 u64 owner, u64 offset, u64 reserved, int action, 342 int *new_ref_mod);
235 int *old_ref_mod, int *new_ref_mod); 343int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
236int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
237 struct btrfs_trans_handle *trans,
238 u64 bytenr, u64 num_bytes, 344 u64 bytenr, u64 num_bytes,
239 struct btrfs_delayed_extent_op *extent_op); 345 struct btrfs_delayed_extent_op *extent_op);
240void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 346void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee193c5222b2..55c15f31d00d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -273,9 +273,9 @@ error:
273 * called from commit_transaction. Writes changed device replace state to 273 * called from commit_transaction. Writes changed device replace state to
274 * disk. 274 * disk.
275 */ 275 */
276int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, 276int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
277 struct btrfs_fs_info *fs_info)
278{ 277{
278 struct btrfs_fs_info *fs_info = trans->fs_info;
279 int ret; 279 int ret;
280 struct btrfs_root *dev_root = fs_info->dev_root; 280 struct btrfs_root *dev_root = fs_info->dev_root;
281 struct btrfs_path *path; 281 struct btrfs_path *path;
@@ -662,7 +662,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
662 btrfs_device_set_disk_total_bytes(tgt_device, 662 btrfs_device_set_disk_total_bytes(tgt_device,
663 src_device->disk_total_bytes); 663 src_device->disk_total_bytes);
664 btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); 664 btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
665 ASSERT(list_empty(&src_device->resized_list)); 665 ASSERT(list_empty(&src_device->post_commit_list));
666 tgt_device->commit_total_bytes = src_device->commit_total_bytes; 666 tgt_device->commit_total_bytes = src_device->commit_total_bytes;
667 tgt_device->commit_bytes_used = src_device->bytes_used; 667 tgt_device->commit_bytes_used = src_device->bytes_used;
668 668
@@ -696,7 +696,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
696 696
697 /* replace the sysfs entry */ 697 /* replace the sysfs entry */
698 btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device); 698 btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
699 btrfs_rm_dev_replace_free_srcdev(fs_info, src_device); 699 btrfs_rm_dev_replace_free_srcdev(src_device);
700 700
701 /* write back the superblocks */ 701 /* write back the superblocks */
702 trans = btrfs_start_transaction(root, 0); 702 trans = btrfs_start_transaction(root, 0);
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 4aa40bacc6cc..78c5d8f1adda 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -9,8 +9,7 @@
9struct btrfs_ioctl_dev_replace_args; 9struct btrfs_ioctl_dev_replace_args;
10 10
11int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); 11int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
12int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, 12int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
13 struct btrfs_fs_info *fs_info);
14int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info, 13int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
15 struct btrfs_ioctl_dev_replace_args *args); 14 struct btrfs_ioctl_dev_replace_args *args);
16void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, 15void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 8de74d835dba..863367c2c620 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -36,7 +36,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
36 di = btrfs_match_dir_item_name(fs_info, path, name, name_len); 36 di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
37 if (di) 37 if (di)
38 return ERR_PTR(-EEXIST); 38 return ERR_PTR(-EEXIST);
39 btrfs_extend_item(fs_info, path, data_size); 39 btrfs_extend_item(path, data_size);
40 } else if (ret < 0) 40 } else if (ret < 0)
41 return ERR_PTR(ret); 41 return ERR_PTR(ret);
42 WARN_ON(ret > 0); 42 WARN_ON(ret > 0);
@@ -429,8 +429,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
429 start = btrfs_item_ptr_offset(leaf, path->slots[0]); 429 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
430 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 430 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
431 item_len - (ptr + sub_item_len - start)); 431 item_len - (ptr + sub_item_len - start));
432 btrfs_truncate_item(root->fs_info, path, 432 btrfs_truncate_item(path, item_len - sub_item_len, 1);
433 item_len - sub_item_len, 1);
434 } 433 }
435 return ret; 434 return ret;
436} 435}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6fe9197f6ee4..663efce22d98 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -260,15 +260,12 @@ void btrfs_csum_final(u32 crc, u8 *result)
260} 260}
261 261
262/* 262/*
263 * compute the csum for a btree block, and either verify it or write it 263 * Compute the csum of a btree block and store the result to provided buffer.
264 * into the csum field of the block. 264 *
265 * Returns error if the extent buffer cannot be mapped.
265 */ 266 */
266static int csum_tree_block(struct btrfs_fs_info *fs_info, 267static int csum_tree_block(struct extent_buffer *buf, u8 *result)
267 struct extent_buffer *buf,
268 int verify)
269{ 268{
270 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
271 char result[BTRFS_CSUM_SIZE];
272 unsigned long len; 269 unsigned long len;
273 unsigned long cur_len; 270 unsigned long cur_len;
274 unsigned long offset = BTRFS_CSUM_SIZE; 271 unsigned long offset = BTRFS_CSUM_SIZE;
@@ -288,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
288 */ 285 */
289 err = map_private_extent_buffer(buf, offset, 32, 286 err = map_private_extent_buffer(buf, offset, 32,
290 &kaddr, &map_start, &map_len); 287 &kaddr, &map_start, &map_len);
291 if (err) 288 if (WARN_ON(err))
292 return err; 289 return err;
293 cur_len = min(len, map_len - (offset - map_start)); 290 cur_len = min(len, map_len - (offset - map_start));
294 crc = btrfs_csum_data(kaddr + offset - map_start, 291 crc = btrfs_csum_data(kaddr + offset - map_start,
@@ -300,23 +297,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
300 297
301 btrfs_csum_final(crc, result); 298 btrfs_csum_final(crc, result);
302 299
303 if (verify) {
304 if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
305 u32 val;
306 u32 found = 0;
307 memcpy(&found, result, csum_size);
308
309 read_extent_buffer(buf, &val, 0, csum_size);
310 btrfs_warn_rl(fs_info,
311 "%s checksum verify failed on %llu wanted %X found %X level %d",
312 fs_info->sb->s_id, buf->start,
313 val, found, btrfs_header_level(buf));
314 return -EUCLEAN;
315 }
316 } else {
317 write_extent_buffer(buf, result, 0, csum_size);
318 }
319
320 return 0; 300 return 0;
321} 301}
322 302
@@ -414,22 +394,21 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
414 return ret; 394 return ret;
415} 395}
416 396
417static int verify_level_key(struct btrfs_fs_info *fs_info, 397int btrfs_verify_level_key(struct extent_buffer *eb, int level,
418 struct extent_buffer *eb, int level, 398 struct btrfs_key *first_key, u64 parent_transid)
419 struct btrfs_key *first_key, u64 parent_transid)
420{ 399{
400 struct btrfs_fs_info *fs_info = eb->fs_info;
421 int found_level; 401 int found_level;
422 struct btrfs_key found_key; 402 struct btrfs_key found_key;
423 int ret; 403 int ret;
424 404
425 found_level = btrfs_header_level(eb); 405 found_level = btrfs_header_level(eb);
426 if (found_level != level) { 406 if (found_level != level) {
427#ifdef CONFIG_BTRFS_DEBUG 407 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
428 WARN_ON(1); 408 KERN_ERR "BTRFS: tree level check failed\n");
429 btrfs_err(fs_info, 409 btrfs_err(fs_info,
430"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", 410"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
431 eb->start, level, found_level); 411 eb->start, level, found_level);
432#endif
433 return -EIO; 412 return -EIO;
434 } 413 }
435 414
@@ -450,9 +429,9 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
450 btrfs_item_key_to_cpu(eb, &found_key, 0); 429 btrfs_item_key_to_cpu(eb, &found_key, 0);
451 ret = btrfs_comp_cpu_keys(first_key, &found_key); 430 ret = btrfs_comp_cpu_keys(first_key, &found_key);
452 431
453#ifdef CONFIG_BTRFS_DEBUG
454 if (ret) { 432 if (ret) {
455 WARN_ON(1); 433 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
434 KERN_ERR "BTRFS: tree first key check failed\n");
456 btrfs_err(fs_info, 435 btrfs_err(fs_info,
457"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", 436"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
458 eb->start, parent_transid, first_key->objectid, 437 eb->start, parent_transid, first_key->objectid,
@@ -460,7 +439,6 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
460 found_key.objectid, found_key.type, 439 found_key.objectid, found_key.type,
461 found_key.offset); 440 found_key.offset);
462 } 441 }
463#endif
464 return ret; 442 return ret;
465} 443}
466 444
@@ -472,11 +450,11 @@ static int verify_level_key(struct btrfs_fs_info *fs_info,
472 * @level: expected level, mandatory check 450 * @level: expected level, mandatory check
473 * @first_key: expected key of first slot, skip check if NULL 451 * @first_key: expected key of first slot, skip check if NULL
474 */ 452 */
475static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, 453static int btree_read_extent_buffer_pages(struct extent_buffer *eb,
476 struct extent_buffer *eb,
477 u64 parent_transid, int level, 454 u64 parent_transid, int level,
478 struct btrfs_key *first_key) 455 struct btrfs_key *first_key)
479{ 456{
457 struct btrfs_fs_info *fs_info = eb->fs_info;
480 struct extent_io_tree *io_tree; 458 struct extent_io_tree *io_tree;
481 int failed = 0; 459 int failed = 0;
482 int ret; 460 int ret;
@@ -487,14 +465,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
487 io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; 465 io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
488 while (1) { 466 while (1) {
489 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 467 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
490 ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, 468 ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num);
491 mirror_num);
492 if (!ret) { 469 if (!ret) {
493 if (verify_parent_transid(io_tree, eb, 470 if (verify_parent_transid(io_tree, eb,
494 parent_transid, 0)) 471 parent_transid, 0))
495 ret = -EIO; 472 ret = -EIO;
496 else if (verify_level_key(fs_info, eb, level, 473 else if (btrfs_verify_level_key(eb, level,
497 first_key, parent_transid)) 474 first_key, parent_transid))
498 ret = -EUCLEAN; 475 ret = -EUCLEAN;
499 else 476 else
500 break; 477 break;
@@ -519,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
519 } 496 }
520 497
521 if (failed && !ret && failed_mirror) 498 if (failed && !ret && failed_mirror)
522 repair_eb_io_failure(fs_info, eb, failed_mirror); 499 btrfs_repair_eb_io_failure(eb, failed_mirror);
523 500
524 return ret; 501 return ret;
525} 502}
@@ -533,7 +510,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
533{ 510{
534 u64 start = page_offset(page); 511 u64 start = page_offset(page);
535 u64 found_start; 512 u64 found_start;
513 u8 result[BTRFS_CSUM_SIZE];
514 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
536 struct extent_buffer *eb; 515 struct extent_buffer *eb;
516 int ret;
537 517
538 eb = (struct extent_buffer *)page->private; 518 eb = (struct extent_buffer *)page->private;
539 if (page != eb->pages[0]) 519 if (page != eb->pages[0])
@@ -552,12 +532,28 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
552 ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, 532 ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
553 btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); 533 btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
554 534
555 return csum_tree_block(fs_info, eb, 0); 535 if (csum_tree_block(eb, result))
536 return -EINVAL;
537
538 if (btrfs_header_level(eb))
539 ret = btrfs_check_node(eb);
540 else
541 ret = btrfs_check_leaf_full(eb);
542
543 if (ret < 0) {
544 btrfs_err(fs_info,
545 "block=%llu write time tree block corruption detected",
546 eb->start);
547 return ret;
548 }
549 write_extent_buffer(eb, result, 0, csum_size);
550
551 return 0;
556} 552}
557 553
558static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, 554static int check_tree_block_fsid(struct extent_buffer *eb)
559 struct extent_buffer *eb)
560{ 555{
556 struct btrfs_fs_info *fs_info = eb->fs_info;
561 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 557 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
562 u8 fsid[BTRFS_FSID_SIZE]; 558 u8 fsid[BTRFS_FSID_SIZE];
563 int ret = 1; 559 int ret = 1;
@@ -595,7 +591,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
595 struct extent_buffer *eb; 591 struct extent_buffer *eb;
596 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 592 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
597 struct btrfs_fs_info *fs_info = root->fs_info; 593 struct btrfs_fs_info *fs_info = root->fs_info;
594 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
598 int ret = 0; 595 int ret = 0;
596 u8 result[BTRFS_CSUM_SIZE];
599 int reads_done; 597 int reads_done;
600 598
601 if (!page->private) 599 if (!page->private)
@@ -625,7 +623,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
625 ret = -EIO; 623 ret = -EIO;
626 goto err; 624 goto err;
627 } 625 }
628 if (check_tree_block_fsid(fs_info, eb)) { 626 if (check_tree_block_fsid(eb)) {
629 btrfs_err_rl(fs_info, "bad fsid on block %llu", 627 btrfs_err_rl(fs_info, "bad fsid on block %llu",
630 eb->start); 628 eb->start);
631 ret = -EIO; 629 ret = -EIO;
@@ -642,25 +640,44 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
642 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 640 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
643 eb, found_level); 641 eb, found_level);
644 642
645 ret = csum_tree_block(fs_info, eb, 1); 643 ret = csum_tree_block(eb, result);
646 if (ret) 644 if (ret)
647 goto err; 645 goto err;
648 646
647 if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
648 u32 val;
649 u32 found = 0;
650
651 memcpy(&found, result, csum_size);
652
653 read_extent_buffer(eb, &val, 0, csum_size);
654 btrfs_warn_rl(fs_info,
655 "%s checksum verify failed on %llu wanted %x found %x level %d",
656 fs_info->sb->s_id, eb->start,
657 val, found, btrfs_header_level(eb));
658 ret = -EUCLEAN;
659 goto err;
660 }
661
649 /* 662 /*
650 * If this is a leaf block and it is corrupt, set the corrupt bit so 663 * If this is a leaf block and it is corrupt, set the corrupt bit so
651 * that we don't try and read the other copies of this block, just 664 * that we don't try and read the other copies of this block, just
652 * return -EIO. 665 * return -EIO.
653 */ 666 */
654 if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) { 667 if (found_level == 0 && btrfs_check_leaf_full(eb)) {
655 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 668 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
656 ret = -EIO; 669 ret = -EIO;
657 } 670 }
658 671
659 if (found_level > 0 && btrfs_check_node(fs_info, eb)) 672 if (found_level > 0 && btrfs_check_node(eb))
660 ret = -EIO; 673 ret = -EIO;
661 674
662 if (!ret) 675 if (!ret)
663 set_extent_buffer_uptodate(eb); 676 set_extent_buffer_uptodate(eb);
677 else
678 btrfs_err(fs_info,
679 "block=%llu read time tree block corruption detected",
680 eb->start);
664err: 681err:
665 if (reads_done && 682 if (reads_done &&
666 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 683 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
@@ -867,11 +884,10 @@ static int check_async_write(struct btrfs_inode *bi)
867 return 1; 884 return 1;
868} 885}
869 886
870static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, 887static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
871 int mirror_num, unsigned long bio_flags, 888 int mirror_num,
872 u64 bio_offset) 889 unsigned long bio_flags)
873{ 890{
874 struct inode *inode = private_data;
875 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 891 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
876 int async = check_async_write(BTRFS_I(inode)); 892 int async = check_async_write(BTRFS_I(inode));
877 blk_status_t ret; 893 blk_status_t ret;
@@ -897,8 +913,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
897 * checksumming can happen in parallel across all CPUs 913 * checksumming can happen in parallel across all CPUs
898 */ 914 */
899 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, 915 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
900 bio_offset, private_data, 916 0, inode, btree_submit_bio_start);
901 btree_submit_bio_start);
902 } 917 }
903 918
904 if (ret) 919 if (ret)
@@ -1017,22 +1032,23 @@ static const struct address_space_operations btree_aops = {
1017void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) 1032void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
1018{ 1033{
1019 struct extent_buffer *buf = NULL; 1034 struct extent_buffer *buf = NULL;
1020 struct inode *btree_inode = fs_info->btree_inode; 1035 int ret;
1021 1036
1022 buf = btrfs_find_create_tree_block(fs_info, bytenr); 1037 buf = btrfs_find_create_tree_block(fs_info, bytenr);
1023 if (IS_ERR(buf)) 1038 if (IS_ERR(buf))
1024 return; 1039 return;
1025 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1040
1026 buf, WAIT_NONE, 0); 1041 ret = read_extent_buffer_pages(buf, WAIT_NONE, 0);
1027 free_extent_buffer(buf); 1042 if (ret < 0)
1043 free_extent_buffer_stale(buf);
1044 else
1045 free_extent_buffer(buf);
1028} 1046}
1029 1047
1030int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, 1048int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
1031 int mirror_num, struct extent_buffer **eb) 1049 int mirror_num, struct extent_buffer **eb)
1032{ 1050{
1033 struct extent_buffer *buf = NULL; 1051 struct extent_buffer *buf = NULL;
1034 struct inode *btree_inode = fs_info->btree_inode;
1035 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1036 int ret; 1052 int ret;
1037 1053
1038 buf = btrfs_find_create_tree_block(fs_info, bytenr); 1054 buf = btrfs_find_create_tree_block(fs_info, bytenr);
@@ -1041,15 +1057,14 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
1041 1057
1042 set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); 1058 set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
1043 1059
1044 ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, 1060 ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
1045 mirror_num);
1046 if (ret) { 1061 if (ret) {
1047 free_extent_buffer(buf); 1062 free_extent_buffer_stale(buf);
1048 return ret; 1063 return ret;
1049 } 1064 }
1050 1065
1051 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { 1066 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
1052 free_extent_buffer(buf); 1067 free_extent_buffer_stale(buf);
1053 return -EIO; 1068 return -EIO;
1054 } else if (extent_buffer_uptodate(buf)) { 1069 } else if (extent_buffer_uptodate(buf)) {
1055 *eb = buf; 1070 *eb = buf;
@@ -1068,19 +1083,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
1068 return alloc_extent_buffer(fs_info, bytenr); 1083 return alloc_extent_buffer(fs_info, bytenr);
1069} 1084}
1070 1085
1071
1072int btrfs_write_tree_block(struct extent_buffer *buf)
1073{
1074 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
1075 buf->start + buf->len - 1);
1076}
1077
1078void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1079{
1080 filemap_fdatawait_range(buf->pages[0]->mapping,
1081 buf->start, buf->start + buf->len - 1);
1082}
1083
1084/* 1086/*
1085 * Read tree block at logical address @bytenr and do variant basic but critical 1087 * Read tree block at logical address @bytenr and do variant basic but critical
1086 * verification. 1088 * verification.
@@ -1100,19 +1102,19 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
1100 if (IS_ERR(buf)) 1102 if (IS_ERR(buf))
1101 return buf; 1103 return buf;
1102 1104
1103 ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid, 1105 ret = btree_read_extent_buffer_pages(buf, parent_transid,
1104 level, first_key); 1106 level, first_key);
1105 if (ret) { 1107 if (ret) {
1106 free_extent_buffer(buf); 1108 free_extent_buffer_stale(buf);
1107 return ERR_PTR(ret); 1109 return ERR_PTR(ret);
1108 } 1110 }
1109 return buf; 1111 return buf;
1110 1112
1111} 1113}
1112 1114
1113void clean_tree_block(struct btrfs_fs_info *fs_info, 1115void btrfs_clean_tree_block(struct extent_buffer *buf)
1114 struct extent_buffer *buf)
1115{ 1116{
1117 struct btrfs_fs_info *fs_info = buf->fs_info;
1116 if (btrfs_header_generation(buf) == 1118 if (btrfs_header_generation(buf) ==
1117 fs_info->running_transaction->transid) { 1119 fs_info->running_transaction->transid) {
1118 btrfs_assert_tree_locked(buf); 1120 btrfs_assert_tree_locked(buf);
@@ -1208,7 +1210,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
1208 root->log_transid_committed = -1; 1210 root->log_transid_committed = -1;
1209 root->last_log_commit = 0; 1211 root->last_log_commit = 0;
1210 if (!dummy) 1212 if (!dummy)
1211 extent_io_tree_init(&root->dirty_log_pages, NULL); 1213 extent_io_tree_init(fs_info, &root->dirty_log_pages,
1214 IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL);
1212 1215
1213 memset(&root->root_key, 0, sizeof(root->root_key)); 1216 memset(&root->root_key, 0, sizeof(root->root_key));
1214 memset(&root->root_item, 0, sizeof(root->root_item)); 1217 memset(&root->root_item, 0, sizeof(root->root_item));
@@ -1255,9 +1258,9 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
1255#endif 1258#endif
1256 1259
1257struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 1260struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1258 struct btrfs_fs_info *fs_info,
1259 u64 objectid) 1261 u64 objectid)
1260{ 1262{
1263 struct btrfs_fs_info *fs_info = trans->fs_info;
1261 struct extent_buffer *leaf; 1264 struct extent_buffer *leaf;
1262 struct btrfs_root *tree_root = fs_info->tree_root; 1265 struct btrfs_root *tree_root = fs_info->tree_root;
1263 struct btrfs_root *root; 1266 struct btrfs_root *root;
@@ -2138,8 +2141,9 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
2138 inode->i_mapping->a_ops = &btree_aops; 2141 inode->i_mapping->a_ops = &btree_aops;
2139 2142
2140 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 2143 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
2141 extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode); 2144 extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
2142 BTRFS_I(inode)->io_tree.track_uptodate = 0; 2145 IO_TREE_INODE_IO, inode);
2146 BTRFS_I(inode)->io_tree.track_uptodate = false;
2143 extent_map_tree_init(&BTRFS_I(inode)->extent_tree); 2147 extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
2144 2148
2145 BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops; 2149 BTRFS_I(inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2162,7 +2166,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2162 spin_lock_init(&fs_info->qgroup_lock); 2166 spin_lock_init(&fs_info->qgroup_lock);
2163 mutex_init(&fs_info->qgroup_ioctl_lock); 2167 mutex_init(&fs_info->qgroup_ioctl_lock);
2164 fs_info->qgroup_tree = RB_ROOT; 2168 fs_info->qgroup_tree = RB_ROOT;
2165 fs_info->qgroup_op_tree = RB_ROOT;
2166 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2169 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2167 fs_info->qgroup_seq = 1; 2170 fs_info->qgroup_seq = 1;
2168 fs_info->qgroup_ulist = NULL; 2171 fs_info->qgroup_ulist = NULL;
@@ -2630,11 +2633,17 @@ int open_ctree(struct super_block *sb,
2630 goto fail; 2633 goto fail;
2631 } 2634 }
2632 2635
2633 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); 2636 ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
2634 if (ret) { 2637 if (ret) {
2635 err = ret; 2638 err = ret;
2636 goto fail_srcu; 2639 goto fail_srcu;
2637 } 2640 }
2641
2642 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
2643 if (ret) {
2644 err = ret;
2645 goto fail_dio_bytes;
2646 }
2638 fs_info->dirty_metadata_batch = PAGE_SIZE * 2647 fs_info->dirty_metadata_batch = PAGE_SIZE *
2639 (1 + ilog2(nr_cpu_ids)); 2648 (1 + ilog2(nr_cpu_ids));
2640 2649
@@ -2667,7 +2676,6 @@ int open_ctree(struct super_block *sb,
2667 spin_lock_init(&fs_info->defrag_inodes_lock); 2676 spin_lock_init(&fs_info->defrag_inodes_lock);
2668 spin_lock_init(&fs_info->tree_mod_seq_lock); 2677 spin_lock_init(&fs_info->tree_mod_seq_lock);
2669 spin_lock_init(&fs_info->super_lock); 2678 spin_lock_init(&fs_info->super_lock);
2670 spin_lock_init(&fs_info->qgroup_op_lock);
2671 spin_lock_init(&fs_info->buffer_lock); 2679 spin_lock_init(&fs_info->buffer_lock);
2672 spin_lock_init(&fs_info->unused_bgs_lock); 2680 spin_lock_init(&fs_info->unused_bgs_lock);
2673 rwlock_init(&fs_info->tree_mod_log_lock); 2681 rwlock_init(&fs_info->tree_mod_log_lock);
@@ -2694,7 +2702,6 @@ int open_ctree(struct super_block *sb,
2694 2702
2695 atomic_set(&fs_info->async_delalloc_pages, 0); 2703 atomic_set(&fs_info->async_delalloc_pages, 0);
2696 atomic_set(&fs_info->defrag_running, 0); 2704 atomic_set(&fs_info->defrag_running, 0);
2697 atomic_set(&fs_info->qgroup_op_seq, 0);
2698 atomic_set(&fs_info->reada_works_cnt, 0); 2705 atomic_set(&fs_info->reada_works_cnt, 0);
2699 atomic_set(&fs_info->nr_delayed_iputs, 0); 2706 atomic_set(&fs_info->nr_delayed_iputs, 0);
2700 atomic64_set(&fs_info->tree_mod_seq, 0); 2707 atomic64_set(&fs_info->tree_mod_seq, 0);
@@ -2748,8 +2755,10 @@ int open_ctree(struct super_block *sb,
2748 fs_info->block_group_cache_tree = RB_ROOT; 2755 fs_info->block_group_cache_tree = RB_ROOT;
2749 fs_info->first_logical_byte = (u64)-1; 2756 fs_info->first_logical_byte = (u64)-1;
2750 2757
2751 extent_io_tree_init(&fs_info->freed_extents[0], NULL); 2758 extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
2752 extent_io_tree_init(&fs_info->freed_extents[1], NULL); 2759 IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
2760 extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
2761 IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
2753 fs_info->pinned_extents = &fs_info->freed_extents[0]; 2762 fs_info->pinned_extents = &fs_info->freed_extents[0];
2754 set_bit(BTRFS_FS_BARRIER, &fs_info->flags); 2763 set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
2755 2764
@@ -2776,8 +2785,6 @@ int open_ctree(struct super_block *sb,
2776 init_waitqueue_head(&fs_info->async_submit_wait); 2785 init_waitqueue_head(&fs_info->async_submit_wait);
2777 init_waitqueue_head(&fs_info->delayed_iputs_wait); 2786 init_waitqueue_head(&fs_info->delayed_iputs_wait);
2778 2787
2779 INIT_LIST_HEAD(&fs_info->pinned_chunks);
2780
2781 /* Usable values until the real ones are cached from the superblock */ 2788 /* Usable values until the real ones are cached from the superblock */
2782 fs_info->nodesize = 4096; 2789 fs_info->nodesize = 4096;
2783 fs_info->sectorsize = 4096; 2790 fs_info->sectorsize = 4096;
@@ -3335,6 +3342,8 @@ fail_delalloc_bytes:
3335 percpu_counter_destroy(&fs_info->delalloc_bytes); 3342 percpu_counter_destroy(&fs_info->delalloc_bytes);
3336fail_dirty_metadata_bytes: 3343fail_dirty_metadata_bytes:
3337 percpu_counter_destroy(&fs_info->dirty_metadata_bytes); 3344 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
3345fail_dio_bytes:
3346 percpu_counter_destroy(&fs_info->dio_bytes);
3338fail_srcu: 3347fail_srcu:
3339 cleanup_srcu_struct(&fs_info->subvol_srcu); 3348 cleanup_srcu_struct(&fs_info->subvol_srcu);
3340fail: 3349fail:
@@ -4016,6 +4025,10 @@ void close_ctree(struct btrfs_fs_info *fs_info)
4016 percpu_counter_sum(&fs_info->delalloc_bytes)); 4025 percpu_counter_sum(&fs_info->delalloc_bytes));
4017 } 4026 }
4018 4027
4028 if (percpu_counter_sum(&fs_info->dio_bytes))
4029 btrfs_info(fs_info, "at unmount dio bytes count %lld",
4030 percpu_counter_sum(&fs_info->dio_bytes));
4031
4019 btrfs_sysfs_remove_mounted(fs_info); 4032 btrfs_sysfs_remove_mounted(fs_info);
4020 btrfs_sysfs_remove_fsid(fs_info->fs_devices); 4033 btrfs_sysfs_remove_fsid(fs_info->fs_devices);
4021 4034
@@ -4042,25 +4055,17 @@ void close_ctree(struct btrfs_fs_info *fs_info)
4042 btrfsic_unmount(fs_info->fs_devices); 4055 btrfsic_unmount(fs_info->fs_devices);
4043#endif 4056#endif
4044 4057
4045 btrfs_close_devices(fs_info->fs_devices);
4046 btrfs_mapping_tree_free(&fs_info->mapping_tree); 4058 btrfs_mapping_tree_free(&fs_info->mapping_tree);
4059 btrfs_close_devices(fs_info->fs_devices);
4047 4060
4048 percpu_counter_destroy(&fs_info->dirty_metadata_bytes); 4061 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
4049 percpu_counter_destroy(&fs_info->delalloc_bytes); 4062 percpu_counter_destroy(&fs_info->delalloc_bytes);
4063 percpu_counter_destroy(&fs_info->dio_bytes);
4050 percpu_counter_destroy(&fs_info->dev_replace.bio_counter); 4064 percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
4051 cleanup_srcu_struct(&fs_info->subvol_srcu); 4065 cleanup_srcu_struct(&fs_info->subvol_srcu);
4052 4066
4053 btrfs_free_stripe_hash_table(fs_info); 4067 btrfs_free_stripe_hash_table(fs_info);
4054 btrfs_free_ref_cache(fs_info); 4068 btrfs_free_ref_cache(fs_info);
4055
4056 while (!list_empty(&fs_info->pinned_chunks)) {
4057 struct extent_map *em;
4058
4059 em = list_first_entry(&fs_info->pinned_chunks,
4060 struct extent_map, list);
4061 list_del_init(&em->list);
4062 free_extent_map(em);
4063 }
4064} 4069}
4065 4070
4066int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, 4071int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
@@ -4114,7 +4119,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
4114 * So here we should only check item pointers, not item data. 4119 * So here we should only check item pointers, not item data.
4115 */ 4120 */
4116 if (btrfs_header_level(buf) == 0 && 4121 if (btrfs_header_level(buf) == 0 &&
4117 btrfs_check_leaf_relaxed(fs_info, buf)) { 4122 btrfs_check_leaf_relaxed(buf)) {
4118 btrfs_print_leaf(buf); 4123 btrfs_print_leaf(buf);
4119 ASSERT(0); 4124 ASSERT(0);
4120 } 4125 }
@@ -4157,10 +4162,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
4157int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, 4162int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
4158 struct btrfs_key *first_key) 4163 struct btrfs_key *first_key)
4159{ 4164{
4160 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; 4165 return btree_read_extent_buffer_pages(buf, parent_transid,
4161 struct btrfs_fs_info *fs_info = root->fs_info;
4162
4163 return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
4164 level, first_key); 4166 level, first_key);
4165} 4167}
4166 4168
@@ -4484,10 +4486,17 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
4484void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4486void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4485 struct btrfs_fs_info *fs_info) 4487 struct btrfs_fs_info *fs_info)
4486{ 4488{
4489 struct btrfs_device *dev, *tmp;
4490
4487 btrfs_cleanup_dirty_bgs(cur_trans, fs_info); 4491 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
4488 ASSERT(list_empty(&cur_trans->dirty_bgs)); 4492 ASSERT(list_empty(&cur_trans->dirty_bgs));
4489 ASSERT(list_empty(&cur_trans->io_bgs)); 4493 ASSERT(list_empty(&cur_trans->io_bgs));
4490 4494
4495 list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
4496 post_commit_list) {
4497 list_del_init(&dev->post_commit_list);
4498 }
4499
4491 btrfs_destroy_delayed_refs(cur_trans, fs_info); 4500 btrfs_destroy_delayed_refs(cur_trans, fs_info);
4492 4501
4493 cur_trans->state = TRANS_STATE_COMMIT_START; 4502 cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 987a64bc0c66..a0161aa1ea0b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
39struct btrfs_device; 39struct btrfs_device;
40struct btrfs_fs_devices; 40struct btrfs_fs_devices;
41 41
42int btrfs_verify_level_key(struct extent_buffer *eb, int level,
43 struct btrfs_key *first_key, u64 parent_transid);
42struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, 44struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
43 u64 parent_transid, int level, 45 u64 parent_transid, int level,
44 struct btrfs_key *first_key); 46 struct btrfs_key *first_key);
@@ -48,7 +50,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
48struct extent_buffer *btrfs_find_create_tree_block( 50struct extent_buffer *btrfs_find_create_tree_block(
49 struct btrfs_fs_info *fs_info, 51 struct btrfs_fs_info *fs_info,
50 u64 bytenr); 52 u64 bytenr);
51void clean_tree_block(struct btrfs_fs_info *fs_info, struct extent_buffer *buf); 53void btrfs_clean_tree_block(struct extent_buffer *buf);
52int open_ctree(struct super_block *sb, 54int open_ctree(struct super_block *sb,
53 struct btrfs_fs_devices *fs_devices, 55 struct btrfs_fs_devices *fs_devices,
54 char *options); 56 char *options);
@@ -123,8 +125,6 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
123 extent_submit_bio_start_t *submit_bio_start); 125 extent_submit_bio_start_t *submit_bio_start);
124blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, 126blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
125 int mirror_num); 127 int mirror_num);
126int btrfs_write_tree_block(struct extent_buffer *buf);
127void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
128int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 128int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
129 struct btrfs_fs_info *fs_info); 129 struct btrfs_fs_info *fs_info);
130int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 130int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
@@ -134,7 +134,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
134void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 134void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
135 struct btrfs_fs_info *fs_info); 135 struct btrfs_fs_info *fs_info);
136struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 136struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
137 struct btrfs_fs_info *fs_info,
138 u64 objectid); 137 u64 objectid);
139int btree_lock_page_hook(struct page *page, void *data, 138int btree_lock_page_hook(struct page *page, void *data,
140 void (*flush_fn)(void *)); 139 void (*flush_fn)(void *));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c5880329ae37..f79e477a378e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -643,7 +643,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
643 643
644 if (btrfs_test_opt(fs_info, SPACE_CACHE)) { 644 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
645 mutex_lock(&caching_ctl->mutex); 645 mutex_lock(&caching_ctl->mutex);
646 ret = load_free_space_cache(fs_info, cache); 646 ret = load_free_space_cache(cache);
647 647
648 spin_lock(&cache->lock); 648 spin_lock(&cache->lock);
649 if (ret == 1) { 649 if (ret == 1) {
@@ -756,14 +756,15 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
756 return NULL; 756 return NULL;
757} 757}
758 758
759static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes, 759static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
760 bool metadata, u64 root_objectid) 760 struct btrfs_ref *ref)
761{ 761{
762 struct btrfs_space_info *space_info; 762 struct btrfs_space_info *space_info;
763 s64 num_bytes = -ref->len;
763 u64 flags; 764 u64 flags;
764 765
765 if (metadata) { 766 if (ref->type == BTRFS_REF_METADATA) {
766 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) 767 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
767 flags = BTRFS_BLOCK_GROUP_SYSTEM; 768 flags = BTRFS_BLOCK_GROUP_SYSTEM;
768 else 769 else
769 flags = BTRFS_BLOCK_GROUP_METADATA; 770 flags = BTRFS_BLOCK_GROUP_METADATA;
@@ -1704,7 +1705,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1704 type = extent_ref_type(parent, owner); 1705 type = extent_ref_type(parent, owner);
1705 size = btrfs_extent_inline_ref_size(type); 1706 size = btrfs_extent_inline_ref_size(type);
1706 1707
1707 btrfs_extend_item(fs_info, path, size); 1708 btrfs_extend_item(path, size);
1708 1709
1709 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1710 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1710 refs = btrfs_extent_refs(leaf, ei); 1711 refs = btrfs_extent_refs(leaf, ei);
@@ -1779,7 +1780,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
1779 int *last_ref) 1780 int *last_ref)
1780{ 1781{
1781 struct extent_buffer *leaf = path->nodes[0]; 1782 struct extent_buffer *leaf = path->nodes[0];
1782 struct btrfs_fs_info *fs_info = leaf->fs_info;
1783 struct btrfs_extent_item *ei; 1783 struct btrfs_extent_item *ei;
1784 struct btrfs_extent_data_ref *dref = NULL; 1784 struct btrfs_extent_data_ref *dref = NULL;
1785 struct btrfs_shared_data_ref *sref = NULL; 1785 struct btrfs_shared_data_ref *sref = NULL;
@@ -1834,7 +1834,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
1834 memmove_extent_buffer(leaf, ptr, ptr + size, 1834 memmove_extent_buffer(leaf, ptr, ptr + size,
1835 end - ptr - size); 1835 end - ptr - size);
1836 item_size -= size; 1836 item_size -= size;
1837 btrfs_truncate_item(fs_info, path, item_size, 1); 1837 btrfs_truncate_item(path, item_size, 1);
1838 } 1838 }
1839 btrfs_mark_buffer_dirty(leaf); 1839 btrfs_mark_buffer_dirty(leaf);
1840} 1840}
@@ -1905,7 +1905,6 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1905 return ret; 1905 return ret;
1906} 1906}
1907 1907
1908#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1909static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, 1908static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1910 u64 *discarded_bytes) 1909 u64 *discarded_bytes)
1911{ 1910{
@@ -2043,39 +2042,28 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2043 2042
2044/* Can return -ENOMEM */ 2043/* Can return -ENOMEM */
2045int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2044int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2046 struct btrfs_root *root, 2045 struct btrfs_ref *generic_ref)
2047 u64 bytenr, u64 num_bytes, u64 parent,
2048 u64 root_objectid, u64 owner, u64 offset)
2049{ 2046{
2050 struct btrfs_fs_info *fs_info = root->fs_info; 2047 struct btrfs_fs_info *fs_info = trans->fs_info;
2051 int old_ref_mod, new_ref_mod; 2048 int old_ref_mod, new_ref_mod;
2052 int ret; 2049 int ret;
2053 2050
2054 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 2051 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
2055 root_objectid == BTRFS_TREE_LOG_OBJECTID); 2052 generic_ref->action);
2053 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
2054 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
2056 2055
2057 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, 2056 if (generic_ref->type == BTRFS_REF_METADATA)
2058 owner, offset, BTRFS_ADD_DELAYED_REF); 2057 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
2059 2058 NULL, &old_ref_mod, &new_ref_mod);
2060 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 2059 else
2061 ret = btrfs_add_delayed_tree_ref(trans, bytenr, 2060 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
2062 num_bytes, parent,
2063 root_objectid, (int)owner,
2064 BTRFS_ADD_DELAYED_REF, NULL,
2065 &old_ref_mod, &new_ref_mod);
2066 } else {
2067 ret = btrfs_add_delayed_data_ref(trans, bytenr,
2068 num_bytes, parent,
2069 root_objectid, owner, offset,
2070 0, BTRFS_ADD_DELAYED_REF,
2071 &old_ref_mod, &new_ref_mod); 2061 &old_ref_mod, &new_ref_mod);
2072 }
2073 2062
2074 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) { 2063 btrfs_ref_tree_mod(fs_info, generic_ref);
2075 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
2076 2064
2077 add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid); 2065 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2078 } 2066 add_pinned_bytes(fs_info, generic_ref);
2079 2067
2080 return ret; 2068 return ret;
2081} 2069}
@@ -2877,97 +2865,6 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
2877 return btrfs_check_space_for_delayed_refs(trans->fs_info); 2865 return btrfs_check_space_for_delayed_refs(trans->fs_info);
2878} 2866}
2879 2867
2880struct async_delayed_refs {
2881 struct btrfs_root *root;
2882 u64 transid;
2883 int count;
2884 int error;
2885 int sync;
2886 struct completion wait;
2887 struct btrfs_work work;
2888};
2889
2890static inline struct async_delayed_refs *
2891to_async_delayed_refs(struct btrfs_work *work)
2892{
2893 return container_of(work, struct async_delayed_refs, work);
2894}
2895
2896static void delayed_ref_async_start(struct btrfs_work *work)
2897{
2898 struct async_delayed_refs *async = to_async_delayed_refs(work);
2899 struct btrfs_trans_handle *trans;
2900 struct btrfs_fs_info *fs_info = async->root->fs_info;
2901 int ret;
2902
2903 /* if the commit is already started, we don't need to wait here */
2904 if (btrfs_transaction_blocked(fs_info))
2905 goto done;
2906
2907 trans = btrfs_join_transaction(async->root);
2908 if (IS_ERR(trans)) {
2909 async->error = PTR_ERR(trans);
2910 goto done;
2911 }
2912
2913 /*
2914 * trans->sync means that when we call end_transaction, we won't
2915 * wait on delayed refs
2916 */
2917 trans->sync = true;
2918
2919 /* Don't bother flushing if we got into a different transaction */
2920 if (trans->transid > async->transid)
2921 goto end;
2922
2923 ret = btrfs_run_delayed_refs(trans, async->count);
2924 if (ret)
2925 async->error = ret;
2926end:
2927 ret = btrfs_end_transaction(trans);
2928 if (ret && !async->error)
2929 async->error = ret;
2930done:
2931 if (async->sync)
2932 complete(&async->wait);
2933 else
2934 kfree(async);
2935}
2936
2937int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
2938 unsigned long count, u64 transid, int wait)
2939{
2940 struct async_delayed_refs *async;
2941 int ret;
2942
2943 async = kmalloc(sizeof(*async), GFP_NOFS);
2944 if (!async)
2945 return -ENOMEM;
2946
2947 async->root = fs_info->tree_root;
2948 async->count = count;
2949 async->error = 0;
2950 async->transid = transid;
2951 if (wait)
2952 async->sync = 1;
2953 else
2954 async->sync = 0;
2955 init_completion(&async->wait);
2956
2957 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2958 delayed_ref_async_start, NULL, NULL);
2959
2960 btrfs_queue_work(fs_info->extent_workers, &async->work);
2961
2962 if (wait) {
2963 wait_for_completion(&async->wait);
2964 ret = async->error;
2965 kfree(async);
2966 return ret;
2967 }
2968 return 0;
2969}
2970
2971/* 2868/*
2972 * this starts processing the delayed reference count updates and 2869 * this starts processing the delayed reference count updates and
2973 * extent insertions we have queued up so far. count can be 2870 * extent insertions we have queued up so far. count can be
@@ -3036,7 +2933,6 @@ out:
3036} 2933}
3037 2934
3038int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2935int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3039 struct btrfs_fs_info *fs_info,
3040 u64 bytenr, u64 num_bytes, u64 flags, 2936 u64 bytenr, u64 num_bytes, u64 flags,
3041 int level, int is_data) 2937 int level, int is_data)
3042{ 2938{
@@ -3053,8 +2949,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3053 extent_op->is_data = is_data ? true : false; 2949 extent_op->is_data = is_data ? true : false;
3054 extent_op->level = level; 2950 extent_op->level = level;
3055 2951
3056 ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr, 2952 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
3057 num_bytes, extent_op);
3058 if (ret) 2953 if (ret)
3059 btrfs_free_delayed_extent_op(extent_op); 2954 btrfs_free_delayed_extent_op(extent_op);
3060 return ret; 2955 return ret;
@@ -3246,13 +3141,12 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3246 u32 nritems; 3141 u32 nritems;
3247 struct btrfs_key key; 3142 struct btrfs_key key;
3248 struct btrfs_file_extent_item *fi; 3143 struct btrfs_file_extent_item *fi;
3144 struct btrfs_ref generic_ref = { 0 };
3145 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
3249 int i; 3146 int i;
3147 int action;
3250 int level; 3148 int level;
3251 int ret = 0; 3149 int ret = 0;
3252 int (*process_func)(struct btrfs_trans_handle *,
3253 struct btrfs_root *,
3254 u64, u64, u64, u64, u64, u64);
3255
3256 3150
3257 if (btrfs_is_testing(fs_info)) 3151 if (btrfs_is_testing(fs_info))
3258 return 0; 3152 return 0;
@@ -3264,15 +3158,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3264 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0) 3158 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3265 return 0; 3159 return 0;
3266 3160
3267 if (inc)
3268 process_func = btrfs_inc_extent_ref;
3269 else
3270 process_func = btrfs_free_extent;
3271
3272 if (full_backref) 3161 if (full_backref)
3273 parent = buf->start; 3162 parent = buf->start;
3274 else 3163 else
3275 parent = 0; 3164 parent = 0;
3165 if (inc)
3166 action = BTRFS_ADD_DELAYED_REF;
3167 else
3168 action = BTRFS_DROP_DELAYED_REF;
3276 3169
3277 for (i = 0; i < nritems; i++) { 3170 for (i = 0; i < nritems; i++) {
3278 if (level == 0) { 3171 if (level == 0) {
@@ -3290,16 +3183,30 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3290 3183
3291 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 3184 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3292 key.offset -= btrfs_file_extent_offset(buf, fi); 3185 key.offset -= btrfs_file_extent_offset(buf, fi);
3293 ret = process_func(trans, root, bytenr, num_bytes, 3186 btrfs_init_generic_ref(&generic_ref, action, bytenr,
3294 parent, ref_root, key.objectid, 3187 num_bytes, parent);
3295 key.offset); 3188 generic_ref.real_root = root->root_key.objectid;
3189 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
3190 key.offset);
3191 generic_ref.skip_qgroup = for_reloc;
3192 if (inc)
3193 ret = btrfs_inc_extent_ref(trans, &generic_ref);
3194 else
3195 ret = btrfs_free_extent(trans, &generic_ref);
3296 if (ret) 3196 if (ret)
3297 goto fail; 3197 goto fail;
3298 } else { 3198 } else {
3299 bytenr = btrfs_node_blockptr(buf, i); 3199 bytenr = btrfs_node_blockptr(buf, i);
3300 num_bytes = fs_info->nodesize; 3200 num_bytes = fs_info->nodesize;
3301 ret = process_func(trans, root, bytenr, num_bytes, 3201 btrfs_init_generic_ref(&generic_ref, action, bytenr,
3302 parent, ref_root, level - 1, 0); 3202 num_bytes, parent);
3203 generic_ref.real_root = root->root_key.objectid;
3204 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
3205 generic_ref.skip_qgroup = for_reloc;
3206 if (inc)
3207 ret = btrfs_inc_extent_ref(trans, &generic_ref);
3208 else
3209 ret = btrfs_free_extent(trans, &generic_ref);
3303 if (ret) 3210 if (ret)
3304 goto fail; 3211 goto fail;
3305 } 3212 }
@@ -3322,10 +3229,10 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3322} 3229}
3323 3230
3324static int write_one_cache_group(struct btrfs_trans_handle *trans, 3231static int write_one_cache_group(struct btrfs_trans_handle *trans,
3325 struct btrfs_fs_info *fs_info,
3326 struct btrfs_path *path, 3232 struct btrfs_path *path,
3327 struct btrfs_block_group_cache *cache) 3233 struct btrfs_block_group_cache *cache)
3328{ 3234{
3235 struct btrfs_fs_info *fs_info = trans->fs_info;
3329 int ret; 3236 int ret;
3330 struct btrfs_root *extent_root = fs_info->extent_root; 3237 struct btrfs_root *extent_root = fs_info->extent_root;
3331 unsigned long bi; 3238 unsigned long bi;
@@ -3348,10 +3255,10 @@ fail:
3348 3255
3349} 3256}
3350 3257
3351static struct btrfs_block_group_cache * 3258static struct btrfs_block_group_cache *next_block_group(
3352next_block_group(struct btrfs_fs_info *fs_info, 3259 struct btrfs_block_group_cache *cache)
3353 struct btrfs_block_group_cache *cache)
3354{ 3260{
3261 struct btrfs_fs_info *fs_info = cache->fs_info;
3355 struct rb_node *node; 3262 struct rb_node *node;
3356 3263
3357 spin_lock(&fs_info->block_group_cache_lock); 3264 spin_lock(&fs_info->block_group_cache_lock);
@@ -3404,7 +3311,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3404 if (trans->aborted) 3311 if (trans->aborted)
3405 return 0; 3312 return 0;
3406again: 3313again:
3407 inode = lookup_free_space_inode(fs_info, block_group, path); 3314 inode = lookup_free_space_inode(block_group, path);
3408 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 3315 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3409 ret = PTR_ERR(inode); 3316 ret = PTR_ERR(inode);
3410 btrfs_release_path(path); 3317 btrfs_release_path(path);
@@ -3418,8 +3325,7 @@ again:
3418 if (block_group->ro) 3325 if (block_group->ro)
3419 goto out_free; 3326 goto out_free;
3420 3327
3421 ret = create_free_space_inode(fs_info, trans, block_group, 3328 ret = create_free_space_inode(trans, block_group, path);
3422 path);
3423 if (ret) 3329 if (ret)
3424 goto out_free; 3330 goto out_free;
3425 goto again; 3331 goto again;
@@ -3538,9 +3444,9 @@ out:
3538 return ret; 3444 return ret;
3539} 3445}
3540 3446
3541int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, 3447int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
3542 struct btrfs_fs_info *fs_info)
3543{ 3448{
3449 struct btrfs_fs_info *fs_info = trans->fs_info;
3544 struct btrfs_block_group_cache *cache, *tmp; 3450 struct btrfs_block_group_cache *cache, *tmp;
3545 struct btrfs_transaction *cur_trans = trans->transaction; 3451 struct btrfs_transaction *cur_trans = trans->transaction;
3546 struct btrfs_path *path; 3452 struct btrfs_path *path;
@@ -3652,8 +3558,7 @@ again:
3652 3558
3653 if (cache->disk_cache_state == BTRFS_DC_SETUP) { 3559 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3654 cache->io_ctl.inode = NULL; 3560 cache->io_ctl.inode = NULL;
3655 ret = btrfs_write_out_cache(fs_info, trans, 3561 ret = btrfs_write_out_cache(trans, cache, path);
3656 cache, path);
3657 if (ret == 0 && cache->io_ctl.inode) { 3562 if (ret == 0 && cache->io_ctl.inode) {
3658 num_started++; 3563 num_started++;
3659 should_put = 0; 3564 should_put = 0;
@@ -3673,8 +3578,7 @@ again:
3673 } 3578 }
3674 } 3579 }
3675 if (!ret) { 3580 if (!ret) {
3676 ret = write_one_cache_group(trans, fs_info, 3581 ret = write_one_cache_group(trans, path, cache);
3677 path, cache);
3678 /* 3582 /*
3679 * Our block group might still be attached to the list 3583 * Our block group might still be attached to the list
3680 * of new block groups in the transaction handle of some 3584 * of new block groups in the transaction handle of some
@@ -3744,9 +3648,9 @@ again:
3744 return ret; 3648 return ret;
3745} 3649}
3746 3650
3747int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3651int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
3748 struct btrfs_fs_info *fs_info)
3749{ 3652{
3653 struct btrfs_fs_info *fs_info = trans->fs_info;
3750 struct btrfs_block_group_cache *cache; 3654 struct btrfs_block_group_cache *cache;
3751 struct btrfs_transaction *cur_trans = trans->transaction; 3655 struct btrfs_transaction *cur_trans = trans->transaction;
3752 int ret = 0; 3656 int ret = 0;
@@ -3809,8 +3713,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3809 3713
3810 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { 3714 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3811 cache->io_ctl.inode = NULL; 3715 cache->io_ctl.inode = NULL;
3812 ret = btrfs_write_out_cache(fs_info, trans, 3716 ret = btrfs_write_out_cache(trans, cache, path);
3813 cache, path);
3814 if (ret == 0 && cache->io_ctl.inode) { 3717 if (ret == 0 && cache->io_ctl.inode) {
3815 num_started++; 3718 num_started++;
3816 should_put = 0; 3719 should_put = 0;
@@ -3824,8 +3727,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3824 } 3727 }
3825 } 3728 }
3826 if (!ret) { 3729 if (!ret) {
3827 ret = write_one_cache_group(trans, fs_info, 3730 ret = write_one_cache_group(trans, path, cache);
3828 path, cache);
3829 /* 3731 /*
3830 * One of the free space endio workers might have 3732 * One of the free space endio workers might have
3831 * created a new block group while updating a free space 3733 * created a new block group while updating a free space
@@ -3842,8 +3744,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3842 if (ret == -ENOENT) { 3744 if (ret == -ENOENT) {
3843 wait_event(cur_trans->writer_wait, 3745 wait_event(cur_trans->writer_wait,
3844 atomic_read(&cur_trans->num_writers) == 1); 3746 atomic_read(&cur_trans->num_writers) == 1);
3845 ret = write_one_cache_group(trans, fs_info, 3747 ret = write_one_cache_group(trans, path, cache);
3846 path, cache);
3847 } 3748 }
3848 if (ret) 3749 if (ret)
3849 btrfs_abort_transaction(trans, ret); 3750 btrfs_abort_transaction(trans, ret);
@@ -4732,6 +4633,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4732 struct btrfs_space_info *space_info; 4633 struct btrfs_space_info *space_info;
4733 struct btrfs_trans_handle *trans; 4634 struct btrfs_trans_handle *trans;
4734 u64 delalloc_bytes; 4635 u64 delalloc_bytes;
4636 u64 dio_bytes;
4735 u64 async_pages; 4637 u64 async_pages;
4736 u64 items; 4638 u64 items;
4737 long time_left; 4639 long time_left;
@@ -4747,7 +4649,8 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4747 4649
4748 delalloc_bytes = percpu_counter_sum_positive( 4650 delalloc_bytes = percpu_counter_sum_positive(
4749 &fs_info->delalloc_bytes); 4651 &fs_info->delalloc_bytes);
4750 if (delalloc_bytes == 0) { 4652 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4653 if (delalloc_bytes == 0 && dio_bytes == 0) {
4751 if (trans) 4654 if (trans)
4752 return; 4655 return;
4753 if (wait_ordered) 4656 if (wait_ordered)
@@ -4755,8 +4658,16 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4755 return; 4658 return;
4756 } 4659 }
4757 4660
4661 /*
4662 * If we are doing more ordered than delalloc we need to just wait on
4663 * ordered extents, otherwise we'll waste time trying to flush delalloc
4664 * that likely won't give us the space back we need.
4665 */
4666 if (dio_bytes > delalloc_bytes)
4667 wait_ordered = true;
4668
4758 loops = 0; 4669 loops = 0;
4759 while (delalloc_bytes && loops < 3) { 4670 while ((delalloc_bytes || dio_bytes) && loops < 3) {
4760 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; 4671 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
4761 4672
4762 /* 4673 /*
@@ -4806,6 +4717,7 @@ skip_async:
4806 } 4717 }
4807 delalloc_bytes = percpu_counter_sum_positive( 4718 delalloc_bytes = percpu_counter_sum_positive(
4808 &fs_info->delalloc_bytes); 4719 &fs_info->delalloc_bytes);
4720 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4809 } 4721 }
4810} 4722}
4811 4723
@@ -5803,85 +5715,6 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
5803 return ret; 5715 return ret;
5804} 5716}
5805 5717
5806static void calc_refill_bytes(struct btrfs_block_rsv *block_rsv,
5807 u64 *metadata_bytes, u64 *qgroup_bytes)
5808{
5809 *metadata_bytes = 0;
5810 *qgroup_bytes = 0;
5811
5812 spin_lock(&block_rsv->lock);
5813 if (block_rsv->reserved < block_rsv->size)
5814 *metadata_bytes = block_rsv->size - block_rsv->reserved;
5815 if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
5816 *qgroup_bytes = block_rsv->qgroup_rsv_size -
5817 block_rsv->qgroup_rsv_reserved;
5818 spin_unlock(&block_rsv->lock);
5819}
5820
5821/**
5822 * btrfs_inode_rsv_refill - refill the inode block rsv.
5823 * @inode - the inode we are refilling.
5824 * @flush - the flushing restriction.
5825 *
5826 * Essentially the same as btrfs_block_rsv_refill, except it uses the
5827 * block_rsv->size as the minimum size. We'll either refill the missing amount
5828 * or return if we already have enough space. This will also handle the reserve
5829 * tracepoint for the reserved amount.
5830 */
5831static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5832 enum btrfs_reserve_flush_enum flush)
5833{
5834 struct btrfs_root *root = inode->root;
5835 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5836 u64 num_bytes, last = 0;
5837 u64 qgroup_num_bytes;
5838 int ret = -ENOSPC;
5839
5840 calc_refill_bytes(block_rsv, &num_bytes, &qgroup_num_bytes);
5841 if (num_bytes == 0)
5842 return 0;
5843
5844 do {
5845 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes,
5846 true);
5847 if (ret)
5848 return ret;
5849 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5850 if (ret) {
5851 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5852 last = num_bytes;
5853 /*
5854 * If we are fragmented we can end up with a lot of
5855 * outstanding extents which will make our size be much
5856 * larger than our reserved amount.
5857 *
5858 * If the reservation happens here, it might be very
5859 * big though not needed in the end, if the delalloc
5860 * flushing happens.
5861 *
5862 * If this is the case try and do the reserve again.
5863 */
5864 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5865 calc_refill_bytes(block_rsv, &num_bytes,
5866 &qgroup_num_bytes);
5867 if (num_bytes == 0)
5868 return 0;
5869 }
5870 } while (ret && last != num_bytes);
5871
5872 if (!ret) {
5873 block_rsv_add_bytes(block_rsv, num_bytes, false);
5874 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5875 btrfs_ino(inode), num_bytes, 1);
5876
5877 /* Don't forget to increase qgroup_rsv_reserved */
5878 spin_lock(&block_rsv->lock);
5879 block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
5880 spin_unlock(&block_rsv->lock);
5881 }
5882 return ret;
5883}
5884
5885static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, 5718static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5886 struct btrfs_block_rsv *block_rsv, 5719 struct btrfs_block_rsv *block_rsv,
5887 u64 num_bytes, u64 *qgroup_to_release) 5720 u64 num_bytes, u64 *qgroup_to_release)
@@ -6182,9 +6015,25 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6182 spin_unlock(&block_rsv->lock); 6015 spin_unlock(&block_rsv->lock);
6183} 6016}
6184 6017
6018static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
6019 u64 num_bytes, u64 *meta_reserve,
6020 u64 *qgroup_reserve)
6021{
6022 u64 nr_extents = count_max_extents(num_bytes);
6023 u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
6024
6025 /* We add one for the inode update at finish ordered time */
6026 *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
6027 nr_extents + csum_leaves + 1);
6028 *qgroup_reserve = nr_extents * fs_info->nodesize;
6029}
6030
6185int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) 6031int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6186{ 6032{
6187 struct btrfs_fs_info *fs_info = inode->root->fs_info; 6033 struct btrfs_root *root = inode->root;
6034 struct btrfs_fs_info *fs_info = root->fs_info;
6035 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6036 u64 meta_reserve, qgroup_reserve;
6188 unsigned nr_extents; 6037 unsigned nr_extents;
6189 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 6038 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6190 int ret = 0; 6039 int ret = 0;
@@ -6214,7 +6063,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6214 6063
6215 num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 6064 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6216 6065
6217 /* Add our new extents and calculate the new rsv size. */ 6066 /*
6067 * We always want to do it this way, every other way is wrong and ends
6068 * in tears. Pre-reserving the amount we are going to add will always
6069 * be the right way, because otherwise if we have enough parallelism we
6070 * could end up with thousands of inodes all holding little bits of
6071 * reservations they were able to make previously and the only way to
6072 * reclaim that space is to ENOSPC out the operations and clear
6073 * everything out and try again, which is bad. This way we just
6074 * over-reserve slightly, and clean up the mess when we are done.
6075 */
6076 calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
6077 &qgroup_reserve);
6078 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
6079 if (ret)
6080 goto out_fail;
6081 ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
6082 if (ret)
6083 goto out_qgroup;
6084
6085 /*
6086 * Now we need to update our outstanding extents and csum bytes _first_
6087 * and then add the reservation to the block_rsv. This keeps us from
6088 * racing with an ordered completion or some such that would think it
6089 * needs to free the reservation we just made.
6090 */
6218 spin_lock(&inode->lock); 6091 spin_lock(&inode->lock);
6219 nr_extents = count_max_extents(num_bytes); 6092 nr_extents = count_max_extents(num_bytes);
6220 btrfs_mod_outstanding_extents(inode, nr_extents); 6093 btrfs_mod_outstanding_extents(inode, nr_extents);
@@ -6222,22 +6095,21 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6222 btrfs_calculate_inode_block_rsv_size(fs_info, inode); 6095 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6223 spin_unlock(&inode->lock); 6096 spin_unlock(&inode->lock);
6224 6097
6225 ret = btrfs_inode_rsv_refill(inode, flush); 6098 /* Now we can safely add our space to our block rsv */
6226 if (unlikely(ret)) 6099 block_rsv_add_bytes(block_rsv, meta_reserve, false);
6227 goto out_fail; 6100 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6101 btrfs_ino(inode), meta_reserve, 1);
6102
6103 spin_lock(&block_rsv->lock);
6104 block_rsv->qgroup_rsv_reserved += qgroup_reserve;
6105 spin_unlock(&block_rsv->lock);
6228 6106
6229 if (delalloc_lock) 6107 if (delalloc_lock)
6230 mutex_unlock(&inode->delalloc_mutex); 6108 mutex_unlock(&inode->delalloc_mutex);
6231 return 0; 6109 return 0;
6232 6110out_qgroup:
6111 btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
6233out_fail: 6112out_fail:
6234 spin_lock(&inode->lock);
6235 nr_extents = count_max_extents(num_bytes);
6236 btrfs_mod_outstanding_extents(inode, -nr_extents);
6237 inode->csum_bytes -= num_bytes;
6238 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6239 spin_unlock(&inode->lock);
6240
6241 btrfs_inode_rsv_release(inode, true); 6113 btrfs_inode_rsv_release(inode, true);
6242 if (delalloc_lock) 6114 if (delalloc_lock)
6243 mutex_unlock(&inode->delalloc_mutex); 6115 mutex_unlock(&inode->delalloc_mutex);
@@ -6361,9 +6233,9 @@ void btrfs_delalloc_release_space(struct inode *inode,
6361} 6233}
6362 6234
6363static int update_block_group(struct btrfs_trans_handle *trans, 6235static int update_block_group(struct btrfs_trans_handle *trans,
6364 struct btrfs_fs_info *info, u64 bytenr, 6236 u64 bytenr, u64 num_bytes, int alloc)
6365 u64 num_bytes, int alloc)
6366{ 6237{
6238 struct btrfs_fs_info *info = trans->fs_info;
6367 struct btrfs_block_group_cache *cache = NULL; 6239 struct btrfs_block_group_cache *cache = NULL;
6368 u64 total = num_bytes; 6240 u64 total = num_bytes;
6369 u64 old_val; 6241 u64 old_val;
@@ -6444,7 +6316,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6444 if (list_empty(&cache->dirty_list)) { 6316 if (list_empty(&cache->dirty_list)) {
6445 list_add_tail(&cache->dirty_list, 6317 list_add_tail(&cache->dirty_list,
6446 &trans->transaction->dirty_bgs); 6318 &trans->transaction->dirty_bgs);
6447 trans->transaction->num_dirty_bgs++;
6448 trans->delayed_ref_updates++; 6319 trans->delayed_ref_updates++;
6449 btrfs_get_block_group(cache); 6320 btrfs_get_block_group(cache);
6450 } 6321 }
@@ -6491,10 +6362,11 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
6491 return bytenr; 6362 return bytenr;
6492} 6363}
6493 6364
6494static int pin_down_extent(struct btrfs_fs_info *fs_info, 6365static int pin_down_extent(struct btrfs_block_group_cache *cache,
6495 struct btrfs_block_group_cache *cache,
6496 u64 bytenr, u64 num_bytes, int reserved) 6366 u64 bytenr, u64 num_bytes, int reserved)
6497{ 6367{
6368 struct btrfs_fs_info *fs_info = cache->fs_info;
6369
6498 spin_lock(&cache->space_info->lock); 6370 spin_lock(&cache->space_info->lock);
6499 spin_lock(&cache->lock); 6371 spin_lock(&cache->lock);
6500 cache->pinned += num_bytes; 6372 cache->pinned += num_bytes;
@@ -6526,7 +6398,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
6526 cache = btrfs_lookup_block_group(fs_info, bytenr); 6398 cache = btrfs_lookup_block_group(fs_info, bytenr);
6527 BUG_ON(!cache); /* Logic error */ 6399 BUG_ON(!cache); /* Logic error */
6528 6400
6529 pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved); 6401 pin_down_extent(cache, bytenr, num_bytes, reserved);
6530 6402
6531 btrfs_put_block_group(cache); 6403 btrfs_put_block_group(cache);
6532 return 0; 6404 return 0;
@@ -6553,7 +6425,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
6553 */ 6425 */
6554 cache_block_group(cache, 1); 6426 cache_block_group(cache, 1);
6555 6427
6556 pin_down_extent(fs_info, cache, bytenr, num_bytes, 0); 6428 pin_down_extent(cache, bytenr, num_bytes, 0);
6557 6429
6558 /* remove us from the free space cache (if we're there at all) */ 6430 /* remove us from the free space cache (if we're there at all) */
6559 ret = btrfs_remove_free_space(cache, bytenr, num_bytes); 6431 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
@@ -6607,9 +6479,9 @@ out_lock:
6607 return ret; 6479 return ret;
6608} 6480}
6609 6481
6610int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info, 6482int btrfs_exclude_logged_extents(struct extent_buffer *eb)
6611 struct extent_buffer *eb)
6612{ 6483{
6484 struct btrfs_fs_info *fs_info = eb->fs_info;
6613 struct btrfs_file_extent_item *item; 6485 struct btrfs_file_extent_item *item;
6614 struct btrfs_key key; 6486 struct btrfs_key key;
6615 int found_type; 6487 int found_type;
@@ -7198,7 +7070,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
7198 goto out; 7070 goto out;
7199 } 7071 }
7200 7072
7201 ret = update_block_group(trans, info, bytenr, num_bytes, 0); 7073 ret = update_block_group(trans, bytenr, num_bytes, 0);
7202 if (ret) { 7074 if (ret) {
7203 btrfs_abort_transaction(trans, ret); 7075 btrfs_abort_transaction(trans, ret);
7204 goto out; 7076 goto out;
@@ -7272,21 +7144,20 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7272 u64 parent, int last_ref) 7144 u64 parent, int last_ref)
7273{ 7145{
7274 struct btrfs_fs_info *fs_info = root->fs_info; 7146 struct btrfs_fs_info *fs_info = root->fs_info;
7147 struct btrfs_ref generic_ref = { 0 };
7275 int pin = 1; 7148 int pin = 1;
7276 int ret; 7149 int ret;
7277 7150
7151 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
7152 buf->start, buf->len, parent);
7153 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
7154 root->root_key.objectid);
7155
7278 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 7156 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7279 int old_ref_mod, new_ref_mod; 7157 int old_ref_mod, new_ref_mod;
7280 7158
7281 btrfs_ref_tree_mod(root, buf->start, buf->len, parent, 7159 btrfs_ref_tree_mod(fs_info, &generic_ref);
7282 root->root_key.objectid, 7160 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
7283 btrfs_header_level(buf), 0,
7284 BTRFS_DROP_DELAYED_REF);
7285 ret = btrfs_add_delayed_tree_ref(trans, buf->start,
7286 buf->len, parent,
7287 root->root_key.objectid,
7288 btrfs_header_level(buf),
7289 BTRFS_DROP_DELAYED_REF, NULL,
7290 &old_ref_mod, &new_ref_mod); 7161 &old_ref_mod, &new_ref_mod);
7291 BUG_ON(ret); /* -ENOMEM */ 7162 BUG_ON(ret); /* -ENOMEM */
7292 pin = old_ref_mod >= 0 && new_ref_mod < 0; 7163 pin = old_ref_mod >= 0 && new_ref_mod < 0;
@@ -7305,8 +7176,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7305 cache = btrfs_lookup_block_group(fs_info, buf->start); 7176 cache = btrfs_lookup_block_group(fs_info, buf->start);
7306 7177
7307 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 7178 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7308 pin_down_extent(fs_info, cache, buf->start, 7179 pin_down_extent(cache, buf->start, buf->len, 1);
7309 buf->len, 1);
7310 btrfs_put_block_group(cache); 7180 btrfs_put_block_group(cache);
7311 goto out; 7181 goto out;
7312 } 7182 }
@@ -7320,8 +7190,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7320 } 7190 }
7321out: 7191out:
7322 if (pin) 7192 if (pin)
7323 add_pinned_bytes(fs_info, buf->len, true, 7193 add_pinned_bytes(fs_info, &generic_ref);
7324 root->root_key.objectid);
7325 7194
7326 if (last_ref) { 7195 if (last_ref) {
7327 /* 7196 /*
@@ -7333,52 +7202,43 @@ out:
7333} 7202}
7334 7203
7335/* Can return -ENOMEM */ 7204/* Can return -ENOMEM */
7336int btrfs_free_extent(struct btrfs_trans_handle *trans, 7205int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
7337 struct btrfs_root *root,
7338 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7339 u64 owner, u64 offset)
7340{ 7206{
7341 struct btrfs_fs_info *fs_info = root->fs_info; 7207 struct btrfs_fs_info *fs_info = trans->fs_info;
7342 int old_ref_mod, new_ref_mod; 7208 int old_ref_mod, new_ref_mod;
7343 int ret; 7209 int ret;
7344 7210
7345 if (btrfs_is_testing(fs_info)) 7211 if (btrfs_is_testing(fs_info))
7346 return 0; 7212 return 0;
7347 7213
7348 if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
7349 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
7350 root_objectid, owner, offset,
7351 BTRFS_DROP_DELAYED_REF);
7352
7353 /* 7214 /*
7354 * tree log blocks never actually go into the extent allocation 7215 * tree log blocks never actually go into the extent allocation
7355 * tree, just update pinning info and exit early. 7216 * tree, just update pinning info and exit early.
7356 */ 7217 */
7357 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 7218 if ((ref->type == BTRFS_REF_METADATA &&
7358 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 7219 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
7220 (ref->type == BTRFS_REF_DATA &&
7221 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
7359 /* unlocks the pinned mutex */ 7222 /* unlocks the pinned mutex */
7360 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); 7223 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
7361 old_ref_mod = new_ref_mod = 0; 7224 old_ref_mod = new_ref_mod = 0;
7362 ret = 0; 7225 ret = 0;
7363 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 7226 } else if (ref->type == BTRFS_REF_METADATA) {
7364 ret = btrfs_add_delayed_tree_ref(trans, bytenr, 7227 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
7365 num_bytes, parent,
7366 root_objectid, (int)owner,
7367 BTRFS_DROP_DELAYED_REF, NULL,
7368 &old_ref_mod, &new_ref_mod); 7228 &old_ref_mod, &new_ref_mod);
7369 } else { 7229 } else {
7370 ret = btrfs_add_delayed_data_ref(trans, bytenr, 7230 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
7371 num_bytes, parent,
7372 root_objectid, owner, offset,
7373 0, BTRFS_DROP_DELAYED_REF,
7374 &old_ref_mod, &new_ref_mod); 7231 &old_ref_mod, &new_ref_mod);
7375 } 7232 }
7376 7233
7377 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) { 7234 if (!((ref->type == BTRFS_REF_METADATA &&
7378 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 7235 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
7236 (ref->type == BTRFS_REF_DATA &&
7237 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
7238 btrfs_ref_tree_mod(fs_info, ref);
7379 7239
7380 add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid); 7240 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7381 } 7241 add_pinned_bytes(fs_info, ref);
7382 7242
7383 return ret; 7243 return ret;
7384} 7244}
@@ -7569,7 +7429,6 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
7569 struct find_free_extent_ctl *ffe_ctl, 7429 struct find_free_extent_ctl *ffe_ctl,
7570 struct btrfs_block_group_cache **cluster_bg_ret) 7430 struct btrfs_block_group_cache **cluster_bg_ret)
7571{ 7431{
7572 struct btrfs_fs_info *fs_info = bg->fs_info;
7573 struct btrfs_block_group_cache *cluster_bg; 7432 struct btrfs_block_group_cache *cluster_bg;
7574 u64 aligned_cluster; 7433 u64 aligned_cluster;
7575 u64 offset; 7434 u64 offset;
@@ -7629,9 +7488,8 @@ refill_cluster:
7629 aligned_cluster = max_t(u64, 7488 aligned_cluster = max_t(u64,
7630 ffe_ctl->empty_cluster + ffe_ctl->empty_size, 7489 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
7631 bg->full_stripe_len); 7490 bg->full_stripe_len);
7632 ret = btrfs_find_space_cluster(fs_info, bg, last_ptr, 7491 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
7633 ffe_ctl->search_start, ffe_ctl->num_bytes, 7492 ffe_ctl->num_bytes, aligned_cluster);
7634 aligned_cluster);
7635 if (ret == 0) { 7493 if (ret == 0) {
7636 /* Now pull our allocation out of this cluster */ 7494 /* Now pull our allocation out of this cluster */
7637 offset = btrfs_alloc_from_cluster(bg, last_ptr, 7495 offset = btrfs_alloc_from_cluster(bg, last_ptr,
@@ -8281,7 +8139,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8281 } 8139 }
8282 8140
8283 if (pin) 8141 if (pin)
8284 pin_down_extent(fs_info, cache, start, len, 1); 8142 pin_down_extent(cache, start, len, 1);
8285 else { 8143 else {
8286 if (btrfs_test_opt(fs_info, DISCARD)) 8144 if (btrfs_test_opt(fs_info, DISCARD))
8287 ret = btrfs_discard_extent(fs_info, start, len, NULL); 8145 ret = btrfs_discard_extent(fs_info, start, len, NULL);
@@ -8370,7 +8228,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8370 if (ret) 8228 if (ret)
8371 return ret; 8229 return ret;
8372 8230
8373 ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1); 8231 ret = update_block_group(trans, ins->objectid, ins->offset, 1);
8374 if (ret) { /* -ENOENT, logic error */ 8232 if (ret) { /* -ENOENT, logic error */
8375 btrfs_err(fs_info, "update block group failed for %llu %llu", 8233 btrfs_err(fs_info, "update block group failed for %llu %llu",
8376 ins->objectid, ins->offset); 8234 ins->objectid, ins->offset);
@@ -8460,7 +8318,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8460 if (ret) 8318 if (ret)
8461 return ret; 8319 return ret;
8462 8320
8463 ret = update_block_group(trans, fs_info, extent_key.objectid, 8321 ret = update_block_group(trans, extent_key.objectid,
8464 fs_info->nodesize, 1); 8322 fs_info->nodesize, 1);
8465 if (ret) { /* -ENOENT, logic error */ 8323 if (ret) { /* -ENOENT, logic error */
8466 btrfs_err(fs_info, "update block group failed for %llu %llu", 8324 btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -8478,19 +8336,17 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8478 u64 offset, u64 ram_bytes, 8336 u64 offset, u64 ram_bytes,
8479 struct btrfs_key *ins) 8337 struct btrfs_key *ins)
8480{ 8338{
8339 struct btrfs_ref generic_ref = { 0 };
8481 int ret; 8340 int ret;
8482 8341
8483 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 8342 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
8484 8343
8485 btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0, 8344 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
8486 root->root_key.objectid, owner, offset, 8345 ins->objectid, ins->offset, 0);
8487 BTRFS_ADD_DELAYED_EXTENT); 8346 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
8488 8347 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
8489 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, 8348 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
8490 ins->offset, 0, 8349 ram_bytes, NULL, NULL);
8491 root->root_key.objectid, owner,
8492 offset, ram_bytes,
8493 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
8494 return ret; 8350 return ret;
8495} 8351}
8496 8352
@@ -8563,7 +8419,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8563 8419
8564 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 8420 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8565 btrfs_tree_lock(buf); 8421 btrfs_tree_lock(buf);
8566 clean_tree_block(fs_info, buf); 8422 btrfs_clean_tree_block(buf);
8567 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); 8423 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8568 8424
8569 btrfs_set_lock_blocking_write(buf); 8425 btrfs_set_lock_blocking_write(buf);
@@ -8682,6 +8538,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8682 struct btrfs_block_rsv *block_rsv; 8538 struct btrfs_block_rsv *block_rsv;
8683 struct extent_buffer *buf; 8539 struct extent_buffer *buf;
8684 struct btrfs_delayed_extent_op *extent_op; 8540 struct btrfs_delayed_extent_op *extent_op;
8541 struct btrfs_ref generic_ref = { 0 };
8685 u64 flags = 0; 8542 u64 flags = 0;
8686 int ret; 8543 int ret;
8687 u32 blocksize = fs_info->nodesize; 8544 u32 blocksize = fs_info->nodesize;
@@ -8736,13 +8593,12 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8736 extent_op->is_data = false; 8593 extent_op->is_data = false;
8737 extent_op->level = level; 8594 extent_op->level = level;
8738 8595
8739 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, 8596 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
8740 root_objectid, level, 0, 8597 ins.objectid, ins.offset, parent);
8741 BTRFS_ADD_DELAYED_EXTENT); 8598 generic_ref.real_root = root->root_key.objectid;
8742 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 8599 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
8743 ins.offset, parent, 8600 btrfs_ref_tree_mod(fs_info, &generic_ref);
8744 root_objectid, level, 8601 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
8745 BTRFS_ADD_DELAYED_EXTENT,
8746 extent_op, NULL, NULL); 8602 extent_op, NULL, NULL);
8747 if (ret) 8603 if (ret)
8748 goto out_free_delayed; 8604 goto out_free_delayed;
@@ -8918,7 +8774,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8918 BUG_ON(ret); /* -ENOMEM */ 8774 BUG_ON(ret); /* -ENOMEM */
8919 ret = btrfs_dec_ref(trans, root, eb, 0); 8775 ret = btrfs_dec_ref(trans, root, eb, 0);
8920 BUG_ON(ret); /* -ENOMEM */ 8776 BUG_ON(ret); /* -ENOMEM */
8921 ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start, 8777 ret = btrfs_set_disk_extent_flags(trans, eb->start,
8922 eb->len, flag, 8778 eb->len, flag,
8923 btrfs_header_level(eb), 0); 8779 btrfs_header_level(eb), 0);
8924 BUG_ON(ret); /* -ENOMEM */ 8780 BUG_ON(ret); /* -ENOMEM */
@@ -8987,6 +8843,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8987 u64 parent; 8843 u64 parent;
8988 struct btrfs_key key; 8844 struct btrfs_key key;
8989 struct btrfs_key first_key; 8845 struct btrfs_key first_key;
8846 struct btrfs_ref ref = { 0 };
8990 struct extent_buffer *next; 8847 struct extent_buffer *next;
8991 int level = wc->level; 8848 int level = wc->level;
8992 int reada = 0; 8849 int reada = 0;
@@ -9159,9 +9016,10 @@ skip:
9159 wc->drop_level = level; 9016 wc->drop_level = level;
9160 find_next_key(path, level, &wc->drop_progress); 9017 find_next_key(path, level, &wc->drop_progress);
9161 9018
9162 ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize, 9019 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
9163 parent, root->root_key.objectid, 9020 fs_info->nodesize, parent);
9164 level - 1, 0); 9021 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
9022 ret = btrfs_free_extent(trans, &ref);
9165 if (ret) 9023 if (ret)
9166 goto out_unlock; 9024 goto out_unlock;
9167 } 9025 }
@@ -9251,21 +9109,23 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
9251 else 9109 else
9252 ret = btrfs_dec_ref(trans, root, eb, 0); 9110 ret = btrfs_dec_ref(trans, root, eb, 0);
9253 BUG_ON(ret); /* -ENOMEM */ 9111 BUG_ON(ret); /* -ENOMEM */
9254 ret = btrfs_qgroup_trace_leaf_items(trans, eb); 9112 if (is_fstree(root->root_key.objectid)) {
9255 if (ret) { 9113 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
9256 btrfs_err_rl(fs_info, 9114 if (ret) {
9257 "error %d accounting leaf items. Quota is out of sync, rescan required.", 9115 btrfs_err_rl(fs_info,
9116 "error %d accounting leaf items, quota is out of sync, rescan required",
9258 ret); 9117 ret);
9118 }
9259 } 9119 }
9260 } 9120 }
9261 /* make block locked assertion in clean_tree_block happy */ 9121 /* make block locked assertion in btrfs_clean_tree_block happy */
9262 if (!path->locks[level] && 9122 if (!path->locks[level] &&
9263 btrfs_header_generation(eb) == trans->transid) { 9123 btrfs_header_generation(eb) == trans->transid) {
9264 btrfs_tree_lock(eb); 9124 btrfs_tree_lock(eb);
9265 btrfs_set_lock_blocking_write(eb); 9125 btrfs_set_lock_blocking_write(eb);
9266 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 9126 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9267 } 9127 }
9268 clean_tree_block(fs_info, eb); 9128 btrfs_clean_tree_block(eb);
9269 } 9129 }
9270 9130
9271 if (eb == root->node) { 9131 if (eb == root->node) {
@@ -9921,12 +9781,10 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
9921 */ 9781 */
9922int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) 9782int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
9923{ 9783{
9924 struct btrfs_root *root = fs_info->extent_root;
9925 struct btrfs_block_group_cache *block_group; 9784 struct btrfs_block_group_cache *block_group;
9926 struct btrfs_space_info *space_info; 9785 struct btrfs_space_info *space_info;
9927 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 9786 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
9928 struct btrfs_device *device; 9787 struct btrfs_device *device;
9929 struct btrfs_trans_handle *trans;
9930 u64 min_free; 9788 u64 min_free;
9931 u64 dev_min = 1; 9789 u64 dev_min = 1;
9932 u64 dev_nr = 0; 9790 u64 dev_nr = 0;
@@ -10025,13 +9883,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
10025 min_free = div64_u64(min_free, dev_min); 9883 min_free = div64_u64(min_free, dev_min);
10026 } 9884 }
10027 9885
10028 /* We need to do this so that we can look at pending chunks */
10029 trans = btrfs_join_transaction(root);
10030 if (IS_ERR(trans)) {
10031 ret = PTR_ERR(trans);
10032 goto out;
10033 }
10034
10035 mutex_lock(&fs_info->chunk_mutex); 9886 mutex_lock(&fs_info->chunk_mutex);
10036 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 9887 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
10037 u64 dev_offset; 9888 u64 dev_offset;
@@ -10042,7 +9893,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
10042 */ 9893 */
10043 if (device->total_bytes > device->bytes_used + min_free && 9894 if (device->total_bytes > device->bytes_used + min_free &&
10044 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { 9895 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
10045 ret = find_free_dev_extent(trans, device, min_free, 9896 ret = find_free_dev_extent(device, min_free,
10046 &dev_offset, NULL); 9897 &dev_offset, NULL);
10047 if (!ret) 9898 if (!ret)
10048 dev_nr++; 9899 dev_nr++;
@@ -10058,7 +9909,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
10058 "no space to allocate a new chunk for block group %llu", 9909 "no space to allocate a new chunk for block group %llu",
10059 block_group->key.objectid); 9910 block_group->key.objectid);
10060 mutex_unlock(&fs_info->chunk_mutex); 9911 mutex_unlock(&fs_info->chunk_mutex);
10061 btrfs_end_transaction(trans);
10062out: 9912out:
10063 btrfs_put_block_group(block_group); 9913 btrfs_put_block_group(block_group);
10064 return ret; 9914 return ret;
@@ -10159,7 +10009,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
10159 if (block_group->iref) 10009 if (block_group->iref)
10160 break; 10010 break;
10161 spin_unlock(&block_group->lock); 10011 spin_unlock(&block_group->lock);
10162 block_group = next_block_group(info, block_group); 10012 block_group = next_block_group(block_group);
10163 } 10013 }
10164 if (!block_group) { 10014 if (!block_group) {
10165 if (last == 0) 10015 if (last == 0)
@@ -10660,7 +10510,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10660 struct btrfs_block_group_cache *cache; 10510 struct btrfs_block_group_cache *cache;
10661 int ret; 10511 int ret;
10662 10512
10663 btrfs_set_log_full_commit(fs_info, trans); 10513 btrfs_set_log_full_commit(trans);
10664 10514
10665 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size); 10515 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
10666 if (!cache) 10516 if (!cache)
@@ -10808,7 +10658,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10808 * get the inode first so any iput calls done for the io_list 10658 * get the inode first so any iput calls done for the io_list
10809 * aren't the final iput (no unlinks allowed now) 10659 * aren't the final iput (no unlinks allowed now)
10810 */ 10660 */
10811 inode = lookup_free_space_inode(fs_info, block_group, path); 10661 inode = lookup_free_space_inode(block_group, path);
10812 10662
10813 mutex_lock(&trans->transaction->cache_write_mutex); 10663 mutex_lock(&trans->transaction->cache_write_mutex);
10814 /* 10664 /*
@@ -10952,10 +10802,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10952 memcpy(&key, &block_group->key, sizeof(key)); 10802 memcpy(&key, &block_group->key, sizeof(key));
10953 10803
10954 mutex_lock(&fs_info->chunk_mutex); 10804 mutex_lock(&fs_info->chunk_mutex);
10955 if (!list_empty(&em->list)) {
10956 /* We're in the transaction->pending_chunks list. */
10957 free_extent_map(em);
10958 }
10959 spin_lock(&block_group->lock); 10805 spin_lock(&block_group->lock);
10960 block_group->removed = 1; 10806 block_group->removed = 1;
10961 /* 10807 /*
@@ -10982,25 +10828,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10982 * the transaction commit has completed. 10828 * the transaction commit has completed.
10983 */ 10829 */
10984 remove_em = (atomic_read(&block_group->trimming) == 0); 10830 remove_em = (atomic_read(&block_group->trimming) == 0);
10985 /*
10986 * Make sure a trimmer task always sees the em in the pinned_chunks list
10987 * if it sees block_group->removed == 1 (needs to lock block_group->lock
10988 * before checking block_group->removed).
10989 */
10990 if (!remove_em) {
10991 /*
10992 * Our em might be in trans->transaction->pending_chunks which
10993 * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks),
10994 * and so is the fs_info->pinned_chunks list.
10995 *
10996 * So at this point we must be holding the chunk_mutex to avoid
10997 * any races with chunk allocation (more specifically at
10998 * volumes.c:contains_pending_extent()), to ensure it always
10999 * sees the em, either in the pending_chunks list or in the
11000 * pinned_chunks list.
11001 */
11002 list_move_tail(&em->list, &fs_info->pinned_chunks);
11003 }
11004 spin_unlock(&block_group->lock); 10831 spin_unlock(&block_group->lock);
11005 10832
11006 if (remove_em) { 10833 if (remove_em) {
@@ -11008,11 +10835,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
11008 10835
11009 em_tree = &fs_info->mapping_tree.map_tree; 10836 em_tree = &fs_info->mapping_tree.map_tree;
11010 write_lock(&em_tree->lock); 10837 write_lock(&em_tree->lock);
11011 /*
11012 * The em might be in the pending_chunks list, so make sure the
11013 * chunk mutex is locked, since remove_extent_mapping() will
11014 * delete us from that list.
11015 */
11016 remove_extent_mapping(em_tree, em); 10838 remove_extent_mapping(em_tree, em);
11017 write_unlock(&em_tree->lock); 10839 write_unlock(&em_tree->lock);
11018 /* once for the tree */ 10840 /* once for the tree */
@@ -11315,11 +11137,12 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
11315 * held back allocations. 11137 * held back allocations.
11316 */ 11138 */
11317static int btrfs_trim_free_extents(struct btrfs_device *device, 11139static int btrfs_trim_free_extents(struct btrfs_device *device,
11318 u64 minlen, u64 *trimmed) 11140 struct fstrim_range *range, u64 *trimmed)
11319{ 11141{
11320 u64 start = 0, len = 0; 11142 u64 start, len = 0, end = 0;
11321 int ret; 11143 int ret;
11322 11144
11145 start = max_t(u64, range->start, SZ_1M);
11323 *trimmed = 0; 11146 *trimmed = 0;
11324 11147
11325 /* Discard not supported = nothing to do. */ 11148 /* Discard not supported = nothing to do. */
@@ -11338,43 +11161,52 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
11338 11161
11339 while (1) { 11162 while (1) {
11340 struct btrfs_fs_info *fs_info = device->fs_info; 11163 struct btrfs_fs_info *fs_info = device->fs_info;
11341 struct btrfs_transaction *trans;
11342 u64 bytes; 11164 u64 bytes;
11343 11165
11344 ret = mutex_lock_interruptible(&fs_info->chunk_mutex); 11166 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
11345 if (ret) 11167 if (ret)
11346 break; 11168 break;
11347 11169
11348 ret = down_read_killable(&fs_info->commit_root_sem); 11170 find_first_clear_extent_bit(&device->alloc_state, start,
11349 if (ret) { 11171 &start, &end,
11172 CHUNK_TRIMMED | CHUNK_ALLOCATED);
11173 /*
11174 * If find_first_clear_extent_bit find a range that spans the
11175 * end of the device it will set end to -1, in this case it's up
11176 * to the caller to trim the value to the size of the device.
11177 */
11178 end = min(end, device->total_bytes - 1);
11179 len = end - start + 1;
11180
11181 /* We didn't find any extents */
11182 if (!len) {
11350 mutex_unlock(&fs_info->chunk_mutex); 11183 mutex_unlock(&fs_info->chunk_mutex);
11184 ret = 0;
11351 break; 11185 break;
11352 } 11186 }
11353 11187
11354 spin_lock(&fs_info->trans_lock); 11188 /* Keep going until we satisfy minlen or reach end of space */
11355 trans = fs_info->running_transaction; 11189 if (len < range->minlen) {
11356 if (trans) 11190 mutex_unlock(&fs_info->chunk_mutex);
11357 refcount_inc(&trans->use_count); 11191 start += len;
11358 spin_unlock(&fs_info->trans_lock); 11192 continue;
11359
11360 if (!trans)
11361 up_read(&fs_info->commit_root_sem);
11362
11363 ret = find_free_dev_extent_start(trans, device, minlen, start,
11364 &start, &len);
11365 if (trans) {
11366 up_read(&fs_info->commit_root_sem);
11367 btrfs_put_transaction(trans);
11368 } 11193 }
11369 11194
11370 if (ret) { 11195 /* If we are out of the passed range break */
11196 if (start > range->start + range->len - 1) {
11371 mutex_unlock(&fs_info->chunk_mutex); 11197 mutex_unlock(&fs_info->chunk_mutex);
11372 if (ret == -ENOSPC)
11373 ret = 0;
11374 break; 11198 break;
11375 } 11199 }
11376 11200
11377 ret = btrfs_issue_discard(device->bdev, start, len, &bytes); 11201 start = max(range->start, start);
11202 len = min(range->len, len);
11203
11204 ret = btrfs_issue_discard(device->bdev, start, len,
11205 &bytes);
11206 if (!ret)
11207 set_extent_bits(&device->alloc_state, start,
11208 start + bytes - 1,
11209 CHUNK_TRIMMED);
11378 mutex_unlock(&fs_info->chunk_mutex); 11210 mutex_unlock(&fs_info->chunk_mutex);
11379 11211
11380 if (ret) 11212 if (ret)
@@ -11383,6 +11215,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
11383 start += len; 11215 start += len;
11384 *trimmed += bytes; 11216 *trimmed += bytes;
11385 11217
11218 /* We've trimmed enough */
11219 if (*trimmed >= range->len)
11220 break;
11221
11386 if (fatal_signal_pending(current)) { 11222 if (fatal_signal_pending(current)) {
11387 ret = -ERESTARTSYS; 11223 ret = -ERESTARTSYS;
11388 break; 11224 break;
@@ -11419,7 +11255,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
11419 int ret = 0; 11255 int ret = 0;
11420 11256
11421 cache = btrfs_lookup_first_block_group(fs_info, range->start); 11257 cache = btrfs_lookup_first_block_group(fs_info, range->start);
11422 for (; cache; cache = next_block_group(fs_info, cache)) { 11258 for (; cache; cache = next_block_group(cache)) {
11423 if (cache->key.objectid >= (range->start + range->len)) { 11259 if (cache->key.objectid >= (range->start + range->len)) {
11424 btrfs_put_block_group(cache); 11260 btrfs_put_block_group(cache);
11425 break; 11261 break;
@@ -11466,8 +11302,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
11466 mutex_lock(&fs_info->fs_devices->device_list_mutex); 11302 mutex_lock(&fs_info->fs_devices->device_list_mutex);
11467 devices = &fs_info->fs_devices->devices; 11303 devices = &fs_info->fs_devices->devices;
11468 list_for_each_entry(device, devices, dev_list) { 11304 list_for_each_entry(device, devices, dev_list) {
11469 ret = btrfs_trim_free_extents(device, range->minlen, 11305 ret = btrfs_trim_free_extents(device, range, &group_trimmed);
11470 &group_trimmed);
11471 if (ret) { 11306 if (ret) {
11472 dev_failed++; 11307 dev_failed++;
11473 dev_ret = ret; 11308 dev_ret = ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ca8b8e785cf3..13fca7bfc1f2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -109,8 +109,6 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
109#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 109#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
110#endif 110#endif
111 111
112#define BUFFER_LRU_MAX 64
113
114struct tree_entry { 112struct tree_entry {
115 u64 start; 113 u64 start;
116 u64 end; 114 u64 end;
@@ -151,34 +149,51 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
151 unsigned long bio_flags) 149 unsigned long bio_flags)
152{ 150{
153 blk_status_t ret = 0; 151 blk_status_t ret = 0;
154 struct bio_vec *bvec = bio_last_bvec_all(bio);
155 struct bio_vec bv;
156 struct extent_io_tree *tree = bio->bi_private; 152 struct extent_io_tree *tree = bio->bi_private;
157 u64 start;
158
159 mp_bvec_last_segment(bvec, &bv);
160 start = page_offset(bv.bv_page) + bv.bv_offset;
161 153
162 bio->bi_private = NULL; 154 bio->bi_private = NULL;
163 155
164 if (tree->ops) 156 if (tree->ops)
165 ret = tree->ops->submit_bio_hook(tree->private_data, bio, 157 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
166 mirror_num, bio_flags, start); 158 mirror_num, bio_flags);
167 else 159 else
168 btrfsic_submit_bio(bio); 160 btrfsic_submit_bio(bio);
169 161
170 return blk_status_to_errno(ret); 162 return blk_status_to_errno(ret);
171} 163}
172 164
173static void flush_write_bio(struct extent_page_data *epd) 165/* Cleanup unsubmitted bios */
166static void end_write_bio(struct extent_page_data *epd, int ret)
174{ 167{
175 if (epd->bio) { 168 if (epd->bio) {
176 int ret; 169 epd->bio->bi_status = errno_to_blk_status(ret);
170 bio_endio(epd->bio);
171 epd->bio = NULL;
172 }
173}
177 174
175/*
176 * Submit bio from extent page data via submit_one_bio
177 *
178 * Return 0 if everything is OK.
179 * Return <0 for error.
180 */
181static int __must_check flush_write_bio(struct extent_page_data *epd)
182{
183 int ret = 0;
184
185 if (epd->bio) {
178 ret = submit_one_bio(epd->bio, 0, 0); 186 ret = submit_one_bio(epd->bio, 0, 0);
179 BUG_ON(ret < 0); /* -ENOMEM */ 187 /*
188 * Clean up of epd->bio is handled by its endio function.
189 * And endio is either triggered by successful bio execution
190 * or the error handler of submit bio hook.
191 * So at this point, no matter what happened, we don't need
192 * to clean up epd->bio.
193 */
180 epd->bio = NULL; 194 epd->bio = NULL;
181 } 195 }
196 return ret;
182} 197}
183 198
184int __init extent_io_init(void) 199int __init extent_io_init(void)
@@ -232,14 +247,46 @@ void __cold extent_io_exit(void)
232 bioset_exit(&btrfs_bioset); 247 bioset_exit(&btrfs_bioset);
233} 248}
234 249
235void extent_io_tree_init(struct extent_io_tree *tree, 250void extent_io_tree_init(struct btrfs_fs_info *fs_info,
251 struct extent_io_tree *tree, unsigned int owner,
236 void *private_data) 252 void *private_data)
237{ 253{
254 tree->fs_info = fs_info;
238 tree->state = RB_ROOT; 255 tree->state = RB_ROOT;
239 tree->ops = NULL; 256 tree->ops = NULL;
240 tree->dirty_bytes = 0; 257 tree->dirty_bytes = 0;
241 spin_lock_init(&tree->lock); 258 spin_lock_init(&tree->lock);
242 tree->private_data = private_data; 259 tree->private_data = private_data;
260 tree->owner = owner;
261}
262
263void extent_io_tree_release(struct extent_io_tree *tree)
264{
265 spin_lock(&tree->lock);
266 /*
267 * Do a single barrier for the waitqueue_active check here, the state
268 * of the waitqueue should not change once extent_io_tree_release is
269 * called.
270 */
271 smp_mb();
272 while (!RB_EMPTY_ROOT(&tree->state)) {
273 struct rb_node *node;
274 struct extent_state *state;
275
276 node = rb_first(&tree->state);
277 state = rb_entry(node, struct extent_state, rb_node);
278 rb_erase(&state->rb_node, &tree->state);
279 RB_CLEAR_NODE(&state->rb_node);
280 /*
281 * btree io trees aren't supposed to have tasks waiting for
282 * changes in the flags of extent states ever.
283 */
284 ASSERT(!waitqueue_active(&state->wq));
285 free_extent_state(state);
286
287 cond_resched_lock(&tree->lock);
288 }
289 spin_unlock(&tree->lock);
243} 290}
244 291
245static struct extent_state *alloc_extent_state(gfp_t mask) 292static struct extent_state *alloc_extent_state(gfp_t mask)
@@ -400,7 +447,7 @@ static void merge_state(struct extent_io_tree *tree,
400 struct extent_state *other; 447 struct extent_state *other;
401 struct rb_node *other_node; 448 struct rb_node *other_node;
402 449
403 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 450 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
404 return; 451 return;
405 452
406 other_node = rb_prev(&state->rb_node); 453 other_node = rb_prev(&state->rb_node);
@@ -611,6 +658,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
611 int clear = 0; 658 int clear = 0;
612 659
613 btrfs_debug_check_extent_io_range(tree, start, end); 660 btrfs_debug_check_extent_io_range(tree, start, end);
661 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
614 662
615 if (bits & EXTENT_DELALLOC) 663 if (bits & EXTENT_DELALLOC)
616 bits |= EXTENT_NORESERVE; 664 bits |= EXTENT_NORESERVE;
@@ -618,7 +666,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
618 if (delete) 666 if (delete)
619 bits |= ~EXTENT_CTLBITS; 667 bits |= ~EXTENT_CTLBITS;
620 668
621 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 669 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
622 clear = 1; 670 clear = 1;
623again: 671again:
624 if (!prealloc && gfpflags_allow_blocking(mask)) { 672 if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -850,7 +898,7 @@ static void cache_state(struct extent_state *state,
850 struct extent_state **cached_ptr) 898 struct extent_state **cached_ptr)
851{ 899{
852 return cache_state_if_flags(state, cached_ptr, 900 return cache_state_if_flags(state, cached_ptr,
853 EXTENT_IOBITS | EXTENT_BOUNDARY); 901 EXTENT_LOCKED | EXTENT_BOUNDARY);
854} 902}
855 903
856/* 904/*
@@ -880,6 +928,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
880 u64 last_end; 928 u64 last_end;
881 929
882 btrfs_debug_check_extent_io_range(tree, start, end); 930 btrfs_debug_check_extent_io_range(tree, start, end);
931 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
883 932
884again: 933again:
885 if (!prealloc && gfpflags_allow_blocking(mask)) { 934 if (!prealloc && gfpflags_allow_blocking(mask)) {
@@ -1112,6 +1161,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1112 bool first_iteration = true; 1161 bool first_iteration = true;
1113 1162
1114 btrfs_debug_check_extent_io_range(tree, start, end); 1163 btrfs_debug_check_extent_io_range(tree, start, end);
1164 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1165 clear_bits);
1115 1166
1116again: 1167again:
1117 if (!prealloc) { 1168 if (!prealloc) {
@@ -1311,6 +1362,13 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1311 changeset); 1362 changeset);
1312} 1363}
1313 1364
1365int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1366 unsigned bits)
1367{
1368 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1369 GFP_NOWAIT, NULL);
1370}
1371
1314int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 1372int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1315 unsigned bits, int wake, int delete, 1373 unsigned bits, int wake, int delete,
1316 struct extent_state **cached) 1374 struct extent_state **cached)
@@ -1478,6 +1536,79 @@ out:
1478 return ret; 1536 return ret;
1479} 1537}
1480 1538
1539/**
1540 * find_first_clear_extent_bit - finds the first range that has @bits not set
1541 * and that starts after @start
1542 *
1543 * @tree - the tree to search
1544 * @start - the offset at/after which the found extent should start
1545 * @start_ret - records the beginning of the range
1546 * @end_ret - records the end of the range (inclusive)
1547 * @bits - the set of bits which must be unset
1548 *
1549 * Since unallocated range is also considered one which doesn't have the bits
1550 * set it's possible that @end_ret contains -1, this happens in case the range
1551 * spans (last_range_end, end of device]. In this case it's up to the caller to
1552 * trim @end_ret to the appropriate size.
1553 */
1554void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1555 u64 *start_ret, u64 *end_ret, unsigned bits)
1556{
1557 struct extent_state *state;
1558 struct rb_node *node, *prev = NULL, *next;
1559
1560 spin_lock(&tree->lock);
1561
1562 /* Find first extent with bits cleared */
1563 while (1) {
1564 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1565 if (!node) {
1566 node = next;
1567 if (!node) {
1568 /*
1569 * We are past the last allocated chunk,
1570 * set start at the end of the last extent. The
1571 * device alloc tree should never be empty so
1572 * prev is always set.
1573 */
1574 ASSERT(prev);
1575 state = rb_entry(prev, struct extent_state, rb_node);
1576 *start_ret = state->end + 1;
1577 *end_ret = -1;
1578 goto out;
1579 }
1580 }
1581 state = rb_entry(node, struct extent_state, rb_node);
1582 if (in_range(start, state->start, state->end - state->start + 1) &&
1583 (state->state & bits)) {
1584 start = state->end + 1;
1585 } else {
1586 *start_ret = start;
1587 break;
1588 }
1589 }
1590
1591 /*
1592 * Find the longest stretch from start until an entry which has the
1593 * bits set
1594 */
1595 while (1) {
1596 state = rb_entry(node, struct extent_state, rb_node);
1597 if (state->end >= start && !(state->state & bits)) {
1598 *end_ret = state->end;
1599 } else {
1600 *end_ret = state->start - 1;
1601 break;
1602 }
1603
1604 node = rb_next(node);
1605 if (!node)
1606 break;
1607 }
1608out:
1609 spin_unlock(&tree->lock);
1610}
1611
1481/* 1612/*
1482 * find a contiguous range of bytes in the file marked as delalloc, not 1613 * find a contiguous range of bytes in the file marked as delalloc, not
1483 * more than 'max_bytes'. start and end are used to return the range, 1614 * more than 'max_bytes'. start and end are used to return the range,
@@ -2061,9 +2192,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2061 return 0; 2192 return 0;
2062} 2193}
2063 2194
2064int repair_eb_io_failure(struct btrfs_fs_info *fs_info, 2195int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
2065 struct extent_buffer *eb, int mirror_num)
2066{ 2196{
2197 struct btrfs_fs_info *fs_info = eb->fs_info;
2067 u64 start = eb->start; 2198 u64 start = eb->start;
2068 int i, num_pages = num_extent_pages(eb); 2199 int i, num_pages = num_extent_pages(eb);
2069 int ret = 0; 2200 int ret = 0;
@@ -2409,7 +2540,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2409 read_mode, failrec->this_mirror, failrec->in_validation); 2540 read_mode, failrec->this_mirror, failrec->in_validation);
2410 2541
2411 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror, 2542 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2412 failrec->bio_flags, 0); 2543 failrec->bio_flags);
2413 if (status) { 2544 if (status) {
2414 free_io_failure(failure_tree, tree, failrec); 2545 free_io_failure(failure_tree, tree, failrec);
2415 bio_put(bio); 2546 bio_put(bio);
@@ -2607,8 +2738,6 @@ static void end_bio_extent_readpage(struct bio *bio)
2607 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, 2738 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2608 &eb->bflags)) 2739 &eb->bflags))
2609 btree_readahead_hook(eb, -EIO); 2740 btree_readahead_hook(eb, -EIO);
2610
2611 ret = -EIO;
2612 } 2741 }
2613readpage_ok: 2742readpage_ok:
2614 if (likely(uptodate)) { 2743 if (likely(uptodate)) {
@@ -3069,7 +3198,7 @@ out:
3069 return ret; 3198 return ret;
3070} 3199}
3071 3200
3072static inline void __do_contiguous_readpages(struct extent_io_tree *tree, 3201static inline void contiguous_readpages(struct extent_io_tree *tree,
3073 struct page *pages[], int nr_pages, 3202 struct page *pages[], int nr_pages,
3074 u64 start, u64 end, 3203 u64 start, u64 end,
3075 struct extent_map **em_cached, 3204 struct extent_map **em_cached,
@@ -3100,46 +3229,6 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3100 } 3229 }
3101} 3230}
3102 3231
3103static void __extent_readpages(struct extent_io_tree *tree,
3104 struct page *pages[],
3105 int nr_pages,
3106 struct extent_map **em_cached,
3107 struct bio **bio, unsigned long *bio_flags,
3108 u64 *prev_em_start)
3109{
3110 u64 start = 0;
3111 u64 end = 0;
3112 u64 page_start;
3113 int index;
3114 int first_index = 0;
3115
3116 for (index = 0; index < nr_pages; index++) {
3117 page_start = page_offset(pages[index]);
3118 if (!end) {
3119 start = page_start;
3120 end = start + PAGE_SIZE - 1;
3121 first_index = index;
3122 } else if (end + 1 == page_start) {
3123 end += PAGE_SIZE;
3124 } else {
3125 __do_contiguous_readpages(tree, &pages[first_index],
3126 index - first_index, start,
3127 end, em_cached,
3128 bio, bio_flags,
3129 prev_em_start);
3130 start = page_start;
3131 end = start + PAGE_SIZE - 1;
3132 first_index = index;
3133 }
3134 }
3135
3136 if (end)
3137 __do_contiguous_readpages(tree, &pages[first_index],
3138 index - first_index, start,
3139 end, em_cached, bio,
3140 bio_flags, prev_em_start);
3141}
3142
3143static int __extent_read_full_page(struct extent_io_tree *tree, 3232static int __extent_read_full_page(struct extent_io_tree *tree,
3144 struct page *page, 3233 struct page *page,
3145 get_extent_t *get_extent, 3234 get_extent_t *get_extent,
@@ -3419,6 +3508,9 @@ done:
3419 * records are inserted to lock ranges in the tree, and as dirty areas 3508 * records are inserted to lock ranges in the tree, and as dirty areas
3420 * are found, they are marked writeback. Then the lock bits are removed 3509 * are found, they are marked writeback. Then the lock bits are removed
3421 * and the end_io handler clears the writeback ranges 3510 * and the end_io handler clears the writeback ranges
3511 *
3512 * Return 0 if everything goes well.
3513 * Return <0 for error.
3422 */ 3514 */
3423static int __extent_writepage(struct page *page, struct writeback_control *wbc, 3515static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3424 struct extent_page_data *epd) 3516 struct extent_page_data *epd)
@@ -3488,6 +3580,7 @@ done:
3488 end_extent_writepage(page, ret, start, page_end); 3580 end_extent_writepage(page, ret, start, page_end);
3489 } 3581 }
3490 unlock_page(page); 3582 unlock_page(page);
3583 ASSERT(ret <= 0);
3491 return ret; 3584 return ret;
3492 3585
3493done_unlocked: 3586done_unlocked:
@@ -3500,18 +3593,26 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3500 TASK_UNINTERRUPTIBLE); 3593 TASK_UNINTERRUPTIBLE);
3501} 3594}
3502 3595
3503static noinline_for_stack int 3596/*
3504lock_extent_buffer_for_io(struct extent_buffer *eb, 3597 * Lock eb pages and flush the bio if we can't the locks
3505 struct btrfs_fs_info *fs_info, 3598 *
3599 * Return 0 if nothing went wrong
3600 * Return >0 is same as 0, except bio is not submitted
3601 * Return <0 if something went wrong, no page is locked
3602 */
3603static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
3506 struct extent_page_data *epd) 3604 struct extent_page_data *epd)
3507{ 3605{
3508 int i, num_pages; 3606 struct btrfs_fs_info *fs_info = eb->fs_info;
3607 int i, num_pages, failed_page_nr;
3509 int flush = 0; 3608 int flush = 0;
3510 int ret = 0; 3609 int ret = 0;
3511 3610
3512 if (!btrfs_try_tree_write_lock(eb)) { 3611 if (!btrfs_try_tree_write_lock(eb)) {
3612 ret = flush_write_bio(epd);
3613 if (ret < 0)
3614 return ret;
3513 flush = 1; 3615 flush = 1;
3514 flush_write_bio(epd);
3515 btrfs_tree_lock(eb); 3616 btrfs_tree_lock(eb);
3516 } 3617 }
3517 3618
@@ -3520,7 +3621,9 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
3520 if (!epd->sync_io) 3621 if (!epd->sync_io)
3521 return 0; 3622 return 0;
3522 if (!flush) { 3623 if (!flush) {
3523 flush_write_bio(epd); 3624 ret = flush_write_bio(epd);
3625 if (ret < 0)
3626 return ret;
3524 flush = 1; 3627 flush = 1;
3525 } 3628 }
3526 while (1) { 3629 while (1) {
@@ -3561,7 +3664,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
3561 3664
3562 if (!trylock_page(p)) { 3665 if (!trylock_page(p)) {
3563 if (!flush) { 3666 if (!flush) {
3564 flush_write_bio(epd); 3667 ret = flush_write_bio(epd);
3668 if (ret < 0) {
3669 failed_page_nr = i;
3670 goto err_unlock;
3671 }
3565 flush = 1; 3672 flush = 1;
3566 } 3673 }
3567 lock_page(p); 3674 lock_page(p);
@@ -3569,6 +3676,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
3569 } 3676 }
3570 3677
3571 return ret; 3678 return ret;
3679err_unlock:
3680 /* Unlock already locked pages */
3681 for (i = 0; i < failed_page_nr; i++)
3682 unlock_page(eb->pages[i]);
3683 return ret;
3572} 3684}
3573 3685
3574static void end_extent_buffer_writeback(struct extent_buffer *eb) 3686static void end_extent_buffer_writeback(struct extent_buffer *eb)
@@ -3672,10 +3784,10 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
3672} 3784}
3673 3785
3674static noinline_for_stack int write_one_eb(struct extent_buffer *eb, 3786static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3675 struct btrfs_fs_info *fs_info,
3676 struct writeback_control *wbc, 3787 struct writeback_control *wbc,
3677 struct extent_page_data *epd) 3788 struct extent_page_data *epd)
3678{ 3789{
3790 struct btrfs_fs_info *fs_info = eb->fs_info;
3679 struct block_device *bdev = fs_info->fs_devices->latest_bdev; 3791 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3680 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; 3792 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3681 u64 offset = eb->start; 3793 u64 offset = eb->start;
@@ -3701,7 +3813,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3701 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 3813 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
3702 */ 3814 */
3703 start = btrfs_item_nr_offset(nritems); 3815 start = btrfs_item_nr_offset(nritems);
3704 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb); 3816 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
3705 memzero_extent_buffer(eb, start, end - start); 3817 memzero_extent_buffer(eb, start, end - start);
3706 } 3818 }
3707 3819
@@ -3744,7 +3856,6 @@ int btree_write_cache_pages(struct address_space *mapping,
3744 struct writeback_control *wbc) 3856 struct writeback_control *wbc)
3745{ 3857{
3746 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree; 3858 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3747 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3748 struct extent_buffer *eb, *prev_eb = NULL; 3859 struct extent_buffer *eb, *prev_eb = NULL;
3749 struct extent_page_data epd = { 3860 struct extent_page_data epd = {
3750 .bio = NULL, 3861 .bio = NULL,
@@ -3819,13 +3930,13 @@ retry:
3819 continue; 3930 continue;
3820 3931
3821 prev_eb = eb; 3932 prev_eb = eb;
3822 ret = lock_extent_buffer_for_io(eb, fs_info, &epd); 3933 ret = lock_extent_buffer_for_io(eb, &epd);
3823 if (!ret) { 3934 if (!ret) {
3824 free_extent_buffer(eb); 3935 free_extent_buffer(eb);
3825 continue; 3936 continue;
3826 } 3937 }
3827 3938
3828 ret = write_one_eb(eb, fs_info, wbc, &epd); 3939 ret = write_one_eb(eb, wbc, &epd);
3829 if (ret) { 3940 if (ret) {
3830 done = 1; 3941 done = 1;
3831 free_extent_buffer(eb); 3942 free_extent_buffer(eb);
@@ -3852,7 +3963,12 @@ retry:
3852 index = 0; 3963 index = 0;
3853 goto retry; 3964 goto retry;
3854 } 3965 }
3855 flush_write_bio(&epd); 3966 ASSERT(ret <= 0);
3967 if (ret < 0) {
3968 end_write_bio(&epd, ret);
3969 return ret;
3970 }
3971 ret = flush_write_bio(&epd);
3856 return ret; 3972 return ret;
3857} 3973}
3858 3974
@@ -3949,7 +4065,8 @@ retry:
3949 * tmpfs file mapping 4065 * tmpfs file mapping
3950 */ 4066 */
3951 if (!trylock_page(page)) { 4067 if (!trylock_page(page)) {
3952 flush_write_bio(epd); 4068 ret = flush_write_bio(epd);
4069 BUG_ON(ret < 0);
3953 lock_page(page); 4070 lock_page(page);
3954 } 4071 }
3955 4072
@@ -3959,8 +4076,10 @@ retry:
3959 } 4076 }
3960 4077
3961 if (wbc->sync_mode != WB_SYNC_NONE) { 4078 if (wbc->sync_mode != WB_SYNC_NONE) {
3962 if (PageWriteback(page)) 4079 if (PageWriteback(page)) {
3963 flush_write_bio(epd); 4080 ret = flush_write_bio(epd);
4081 BUG_ON(ret < 0);
4082 }
3964 wait_on_page_writeback(page); 4083 wait_on_page_writeback(page);
3965 } 4084 }
3966 4085
@@ -3971,11 +4090,6 @@ retry:
3971 } 4090 }
3972 4091
3973 ret = __extent_writepage(page, wbc, epd); 4092 ret = __extent_writepage(page, wbc, epd);
3974
3975 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
3976 unlock_page(page);
3977 ret = 0;
3978 }
3979 if (ret < 0) { 4093 if (ret < 0) {
3980 /* 4094 /*
3981 * done_index is set past this page, 4095 * done_index is set past this page,
@@ -4029,8 +4143,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4029 }; 4143 };
4030 4144
4031 ret = __extent_writepage(page, wbc, &epd); 4145 ret = __extent_writepage(page, wbc, &epd);
4146 ASSERT(ret <= 0);
4147 if (ret < 0) {
4148 end_write_bio(&epd, ret);
4149 return ret;
4150 }
4032 4151
4033 flush_write_bio(&epd); 4152 ret = flush_write_bio(&epd);
4153 ASSERT(ret <= 0);
4034 return ret; 4154 return ret;
4035} 4155}
4036 4156
@@ -4070,7 +4190,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4070 start += PAGE_SIZE; 4190 start += PAGE_SIZE;
4071 } 4191 }
4072 4192
4073 flush_write_bio(&epd); 4193 ASSERT(ret <= 0);
4194 if (ret < 0) {
4195 end_write_bio(&epd, ret);
4196 return ret;
4197 }
4198 ret = flush_write_bio(&epd);
4074 return ret; 4199 return ret;
4075} 4200}
4076 4201
@@ -4086,7 +4211,12 @@ int extent_writepages(struct address_space *mapping,
4086 }; 4211 };
4087 4212
4088 ret = extent_write_cache_pages(mapping, wbc, &epd); 4213 ret = extent_write_cache_pages(mapping, wbc, &epd);
4089 flush_write_bio(&epd); 4214 ASSERT(ret <= 0);
4215 if (ret < 0) {
4216 end_write_bio(&epd, ret);
4217 return ret;
4218 }
4219 ret = flush_write_bio(&epd);
4090 return ret; 4220 return ret;
4091} 4221}
4092 4222
@@ -4102,6 +4232,8 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
4102 u64 prev_em_start = (u64)-1; 4232 u64 prev_em_start = (u64)-1;
4103 4233
4104 while (!list_empty(pages)) { 4234 while (!list_empty(pages)) {
4235 u64 contig_end = 0;
4236
4105 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) { 4237 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
4106 struct page *page = lru_to_page(pages); 4238 struct page *page = lru_to_page(pages);
4107 4239
@@ -4110,14 +4242,22 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
4110 if (add_to_page_cache_lru(page, mapping, page->index, 4242 if (add_to_page_cache_lru(page, mapping, page->index,
4111 readahead_gfp_mask(mapping))) { 4243 readahead_gfp_mask(mapping))) {
4112 put_page(page); 4244 put_page(page);
4113 continue; 4245 break;
4114 } 4246 }
4115 4247
4116 pagepool[nr++] = page; 4248 pagepool[nr++] = page;
4249 contig_end = page_offset(page) + PAGE_SIZE - 1;
4117 } 4250 }
4118 4251
4119 __extent_readpages(tree, pagepool, nr, &em_cached, &bio, 4252 if (nr) {
4120 &bio_flags, &prev_em_start); 4253 u64 contig_start = page_offset(pagepool[0]);
4254
4255 ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
4256
4257 contiguous_readpages(tree, pagepool, nr, contig_start,
4258 contig_end, &em_cached, &bio, &bio_flags,
4259 &prev_em_start);
4260 }
4121 } 4261 }
4122 4262
4123 if (em_cached) 4263 if (em_cached)
@@ -4166,10 +4306,9 @@ static int try_release_extent_state(struct extent_io_tree *tree,
4166 u64 end = start + PAGE_SIZE - 1; 4306 u64 end = start + PAGE_SIZE - 1;
4167 int ret = 1; 4307 int ret = 1;
4168 4308
4169 if (test_range_bit(tree, start, end, 4309 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
4170 EXTENT_IOBITS, 0, NULL))
4171 ret = 0; 4310 ret = 0;
4172 else { 4311 } else {
4173 /* 4312 /*
4174 * at this point we can safely clear everything except the 4313 * at this point we can safely clear everything except the
4175 * locked bit and the nodatasum bit 4314 * locked bit and the nodatasum bit
@@ -4222,8 +4361,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
4222 } 4361 }
4223 if (!test_range_bit(tree, em->start, 4362 if (!test_range_bit(tree, em->start,
4224 extent_map_end(em) - 1, 4363 extent_map_end(em) - 1,
4225 EXTENT_LOCKED | EXTENT_WRITEBACK, 4364 EXTENT_LOCKED, 0, NULL)) {
4226 0, NULL)) {
4227 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4365 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4228 &btrfs_inode->runtime_flags); 4366 &btrfs_inode->runtime_flags);
4229 remove_extent_mapping(map, em); 4367 remove_extent_mapping(map, em);
@@ -4372,8 +4510,7 @@ try_submit_last:
4372 * In this case, the first extent range will be cached but not emitted. 4510 * In this case, the first extent range will be cached but not emitted.
4373 * So we must emit it before ending extent_fiemap(). 4511 * So we must emit it before ending extent_fiemap().
4374 */ 4512 */
4375static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, 4513static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
4376 struct fiemap_extent_info *fieinfo,
4377 struct fiemap_cache *cache) 4514 struct fiemap_cache *cache)
4378{ 4515{
4379 int ret; 4516 int ret;
@@ -4580,7 +4717,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4580 } 4717 }
4581out_free: 4718out_free:
4582 if (!ret) 4719 if (!ret)
4583 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache); 4720 ret = emit_last_fiemap_cache(fieinfo, &cache);
4584 free_extent_map(em); 4721 free_extent_map(em);
4585out: 4722out:
4586 btrfs_free_path(path); 4723 btrfs_free_path(path);
@@ -4672,13 +4809,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4672 eb->fs_info = fs_info; 4809 eb->fs_info = fs_info;
4673 eb->bflags = 0; 4810 eb->bflags = 0;
4674 rwlock_init(&eb->lock); 4811 rwlock_init(&eb->lock);
4675 atomic_set(&eb->write_locks, 0);
4676 atomic_set(&eb->read_locks, 0);
4677 atomic_set(&eb->blocking_readers, 0); 4812 atomic_set(&eb->blocking_readers, 0);
4678 atomic_set(&eb->blocking_writers, 0); 4813 atomic_set(&eb->blocking_writers, 0);
4679 atomic_set(&eb->spinning_readers, 0); 4814 eb->lock_nested = false;
4680 atomic_set(&eb->spinning_writers, 0);
4681 eb->lock_nested = 0;
4682 init_waitqueue_head(&eb->write_lock_wq); 4815 init_waitqueue_head(&eb->write_lock_wq);
4683 init_waitqueue_head(&eb->read_lock_wq); 4816 init_waitqueue_head(&eb->read_lock_wq);
4684 4817
@@ -4695,6 +4828,13 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4695 > MAX_INLINE_EXTENT_BUFFER_SIZE); 4828 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4696 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); 4829 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4697 4830
4831#ifdef CONFIG_BTRFS_DEBUG
4832 atomic_set(&eb->spinning_writers, 0);
4833 atomic_set(&eb->spinning_readers, 0);
4834 atomic_set(&eb->read_locks, 0);
4835 atomic_set(&eb->write_locks, 0);
4836#endif
4837
4698 return eb; 4838 return eb;
4699} 4839}
4700 4840
@@ -5183,8 +5323,7 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
5183 } 5323 }
5184} 5324}
5185 5325
5186int read_extent_buffer_pages(struct extent_io_tree *tree, 5326int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
5187 struct extent_buffer *eb, int wait, int mirror_num)
5188{ 5327{
5189 int i; 5328 int i;
5190 struct page *page; 5329 struct page *page;
@@ -5196,6 +5335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
5196 unsigned long num_reads = 0; 5335 unsigned long num_reads = 0;
5197 struct bio *bio = NULL; 5336 struct bio *bio = NULL;
5198 unsigned long bio_flags = 0; 5337 unsigned long bio_flags = 0;
5338 struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
5199 5339
5200 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 5340 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5201 return 0; 5341 return 0;
@@ -5746,13 +5886,13 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5746 btrfs_err(fs_info, 5886 btrfs_err(fs_info,
5747 "memmove bogus src_offset %lu move len %lu dst len %lu", 5887 "memmove bogus src_offset %lu move len %lu dst len %lu",
5748 src_offset, len, dst->len); 5888 src_offset, len, dst->len);
5749 BUG_ON(1); 5889 BUG();
5750 } 5890 }
5751 if (dst_offset + len > dst->len) { 5891 if (dst_offset + len > dst->len) {
5752 btrfs_err(fs_info, 5892 btrfs_err(fs_info,
5753 "memmove bogus dst_offset %lu move len %lu dst len %lu", 5893 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5754 dst_offset, len, dst->len); 5894 dst_offset, len, dst->len);
5755 BUG_ON(1); 5895 BUG();
5756 } 5896 }
5757 5897
5758 while (len > 0) { 5898 while (len > 0) {
@@ -5793,13 +5933,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5793 btrfs_err(fs_info, 5933 btrfs_err(fs_info,
5794 "memmove bogus src_offset %lu move len %lu len %lu", 5934 "memmove bogus src_offset %lu move len %lu len %lu",
5795 src_offset, len, dst->len); 5935 src_offset, len, dst->len);
5796 BUG_ON(1); 5936 BUG();
5797 } 5937 }
5798 if (dst_offset + len > dst->len) { 5938 if (dst_offset + len > dst->len) {
5799 btrfs_err(fs_info, 5939 btrfs_err(fs_info,
5800 "memmove bogus dst_offset %lu move len %lu len %lu", 5940 "memmove bogus dst_offset %lu move len %lu len %lu",
5801 dst_offset, len, dst->len); 5941 dst_offset, len, dst->len);
5802 BUG_ON(1); 5942 BUG();
5803 } 5943 }
5804 if (dst_offset < src_offset) { 5944 if (dst_offset < src_offset) {
5805 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 5945 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 08749e0b9c32..aa18a16a6ed7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -9,27 +9,34 @@
9 9
10/* bits for the extent state */ 10/* bits for the extent state */
11#define EXTENT_DIRTY (1U << 0) 11#define EXTENT_DIRTY (1U << 0)
12#define EXTENT_WRITEBACK (1U << 1) 12#define EXTENT_UPTODATE (1U << 1)
13#define EXTENT_UPTODATE (1U << 2) 13#define EXTENT_LOCKED (1U << 2)
14#define EXTENT_LOCKED (1U << 3) 14#define EXTENT_NEW (1U << 3)
15#define EXTENT_NEW (1U << 4) 15#define EXTENT_DELALLOC (1U << 4)
16#define EXTENT_DELALLOC (1U << 5) 16#define EXTENT_DEFRAG (1U << 5)
17#define EXTENT_DEFRAG (1U << 6) 17#define EXTENT_BOUNDARY (1U << 6)
18#define EXTENT_BOUNDARY (1U << 9) 18#define EXTENT_NODATASUM (1U << 7)
19#define EXTENT_NODATASUM (1U << 10) 19#define EXTENT_CLEAR_META_RESV (1U << 8)
20#define EXTENT_CLEAR_META_RESV (1U << 11) 20#define EXTENT_NEED_WAIT (1U << 9)
21#define EXTENT_NEED_WAIT (1U << 12) 21#define EXTENT_DAMAGED (1U << 10)
22#define EXTENT_DAMAGED (1U << 13) 22#define EXTENT_NORESERVE (1U << 11)
23#define EXTENT_NORESERVE (1U << 14) 23#define EXTENT_QGROUP_RESERVED (1U << 12)
24#define EXTENT_QGROUP_RESERVED (1U << 15) 24#define EXTENT_CLEAR_DATA_RESV (1U << 13)
25#define EXTENT_CLEAR_DATA_RESV (1U << 16) 25#define EXTENT_DELALLOC_NEW (1U << 14)
26#define EXTENT_DELALLOC_NEW (1U << 17)
27#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
28#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \ 26#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
29 EXTENT_CLEAR_DATA_RESV) 27 EXTENT_CLEAR_DATA_RESV)
30#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) 28#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
31 29
32/* 30/*
31 * Redefined bits above which are used only in the device allocation tree,
32 * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
33 * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
34 * manipulation functions
35 */
36#define CHUNK_ALLOCATED EXTENT_DIRTY
37#define CHUNK_TRIMMED EXTENT_DEFRAG
38
39/*
33 * flags for bio submission. The high bits indicate the compression 40 * flags for bio submission. The high bits indicate the compression
34 * type for this bio 41 * type for this bio
35 */ 42 */
@@ -88,9 +95,6 @@ struct btrfs_inode;
88struct btrfs_io_bio; 95struct btrfs_io_bio;
89struct io_failure_record; 96struct io_failure_record;
90 97
91typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
92 int mirror_num, unsigned long bio_flags,
93 u64 bio_offset);
94 98
95typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, 99typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
96 struct bio *bio, u64 bio_offset); 100 struct bio *bio, u64 bio_offset);
@@ -100,17 +104,34 @@ struct extent_io_ops {
100 * The following callbacks must be always defined, the function 104 * The following callbacks must be always defined, the function
101 * pointer will be called unconditionally. 105 * pointer will be called unconditionally.
102 */ 106 */
103 extent_submit_bio_hook_t *submit_bio_hook; 107 blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
108 int mirror_num, unsigned long bio_flags);
104 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, 109 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
105 struct page *page, u64 start, u64 end, 110 struct page *page, u64 start, u64 end,
106 int mirror); 111 int mirror);
107}; 112};
108 113
114enum {
115 IO_TREE_FS_INFO_FREED_EXTENTS0,
116 IO_TREE_FS_INFO_FREED_EXTENTS1,
117 IO_TREE_INODE_IO,
118 IO_TREE_INODE_IO_FAILURE,
119 IO_TREE_RELOC_BLOCKS,
120 IO_TREE_TRANS_DIRTY_PAGES,
121 IO_TREE_ROOT_DIRTY_LOG_PAGES,
122 IO_TREE_SELFTEST,
123};
124
109struct extent_io_tree { 125struct extent_io_tree {
110 struct rb_root state; 126 struct rb_root state;
127 struct btrfs_fs_info *fs_info;
111 void *private_data; 128 void *private_data;
112 u64 dirty_bytes; 129 u64 dirty_bytes;
113 int track_uptodate; 130 bool track_uptodate;
131
132 /* Who owns this io tree, should be one of IO_TREE_* */
133 u8 owner;
134
114 spinlock_t lock; 135 spinlock_t lock;
115 const struct extent_io_ops *ops; 136 const struct extent_io_ops *ops;
116}; 137};
@@ -146,14 +167,9 @@ struct extent_buffer {
146 struct rcu_head rcu_head; 167 struct rcu_head rcu_head;
147 pid_t lock_owner; 168 pid_t lock_owner;
148 169
149 /* count of read lock holders on the extent buffer */
150 atomic_t write_locks;
151 atomic_t read_locks;
152 atomic_t blocking_writers; 170 atomic_t blocking_writers;
153 atomic_t blocking_readers; 171 atomic_t blocking_readers;
154 atomic_t spinning_readers; 172 bool lock_nested;
155 atomic_t spinning_writers;
156 short lock_nested;
157 /* >= 0 if eb belongs to a log tree, -1 otherwise */ 173 /* >= 0 if eb belongs to a log tree, -1 otherwise */
158 short log_index; 174 short log_index;
159 175
@@ -171,6 +187,10 @@ struct extent_buffer {
171 wait_queue_head_t read_lock_wq; 187 wait_queue_head_t read_lock_wq;
172 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 188 struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
173#ifdef CONFIG_BTRFS_DEBUG 189#ifdef CONFIG_BTRFS_DEBUG
190 atomic_t spinning_writers;
191 atomic_t spinning_readers;
192 atomic_t read_locks;
193 atomic_t write_locks;
174 struct list_head leak_list; 194 struct list_head leak_list;
175#endif 195#endif
176}; 196};
@@ -239,7 +259,10 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
239 u64 start, u64 len, 259 u64 start, u64 len,
240 int create); 260 int create);
241 261
242void extent_io_tree_init(struct extent_io_tree *tree, void *private_data); 262void extent_io_tree_init(struct btrfs_fs_info *fs_info,
263 struct extent_io_tree *tree, unsigned int owner,
264 void *private_data);
265void extent_io_tree_release(struct extent_io_tree *tree);
243int try_release_extent_mapping(struct page *page, gfp_t mask); 266int try_release_extent_mapping(struct page *page, gfp_t mask);
244int try_release_extent_buffer(struct page *page); 267int try_release_extent_buffer(struct page *page);
245int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 268int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -309,6 +332,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
309int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 332int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
310 unsigned bits, u64 *failed_start, 333 unsigned bits, u64 *failed_start,
311 struct extent_state **cached_state, gfp_t mask); 334 struct extent_state **cached_state, gfp_t mask);
335int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
336 unsigned bits);
312 337
313static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, 338static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
314 u64 end, unsigned bits) 339 u64 end, unsigned bits)
@@ -376,6 +401,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
376int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 401int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
377 u64 *start_ret, u64 *end_ret, unsigned bits, 402 u64 *start_ret, u64 *end_ret, unsigned bits,
378 struct extent_state **cached_state); 403 struct extent_state **cached_state);
404void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
405 u64 *start_ret, u64 *end_ret, unsigned bits);
379int extent_invalidatepage(struct extent_io_tree *tree, 406int extent_invalidatepage(struct extent_io_tree *tree,
380 struct page *page, unsigned long offset); 407 struct page *page, unsigned long offset);
381int extent_write_full_page(struct page *page, struct writeback_control *wbc); 408int extent_write_full_page(struct page *page, struct writeback_control *wbc);
@@ -405,8 +432,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
405#define WAIT_NONE 0 432#define WAIT_NONE 0
406#define WAIT_COMPLETE 1 433#define WAIT_COMPLETE 1
407#define WAIT_PAGE_LOCK 2 434#define WAIT_PAGE_LOCK 2
408int read_extent_buffer_pages(struct extent_io_tree *tree, 435int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
409 struct extent_buffer *eb, int wait,
410 int mirror_num); 436 int mirror_num);
411void wait_on_extent_buffer_writeback(struct extent_buffer *eb); 437void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
412 438
@@ -487,8 +513,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
487 struct extent_io_tree *io_tree, u64 start, 513 struct extent_io_tree *io_tree, u64 start,
488 struct page *page, u64 ino, unsigned int pg_offset); 514 struct page *page, u64 ino, unsigned int pg_offset);
489void end_extent_writepage(struct page *page, int err, u64 start, u64 end); 515void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
490int repair_eb_io_failure(struct btrfs_fs_info *fs_info, 516int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
491 struct extent_buffer *eb, int mirror_num);
492 517
493/* 518/*
494 * When IO fails, either with EIO or csum verification fails, we 519 * When IO fails, either with EIO or csum verification fails, we
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 928f729c55ba..9558d79faf1e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -4,6 +4,7 @@
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/spinlock.h> 5#include <linux/spinlock.h>
6#include "ctree.h" 6#include "ctree.h"
7#include "volumes.h"
7#include "extent_map.h" 8#include "extent_map.h"
8#include "compression.h" 9#include "compression.h"
9 10
@@ -337,6 +338,37 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree,
337 try_merge_map(tree, em); 338 try_merge_map(tree, em);
338} 339}
339 340
341static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
342{
343 struct map_lookup *map = em->map_lookup;
344 u64 stripe_size = em->orig_block_len;
345 int i;
346
347 for (i = 0; i < map->num_stripes; i++) {
348 struct btrfs_bio_stripe *stripe = &map->stripes[i];
349 struct btrfs_device *device = stripe->dev;
350
351 set_extent_bits_nowait(&device->alloc_state, stripe->physical,
352 stripe->physical + stripe_size - 1, bits);
353 }
354}
355
356static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
357{
358 struct map_lookup *map = em->map_lookup;
359 u64 stripe_size = em->orig_block_len;
360 int i;
361
362 for (i = 0; i < map->num_stripes; i++) {
363 struct btrfs_bio_stripe *stripe = &map->stripes[i];
364 struct btrfs_device *device = stripe->dev;
365
366 __clear_extent_bit(&device->alloc_state, stripe->physical,
367 stripe->physical + stripe_size - 1, bits,
368 0, 0, NULL, GFP_NOWAIT, NULL);
369 }
370}
371
340/** 372/**
341 * add_extent_mapping - add new extent map to the extent tree 373 * add_extent_mapping - add new extent map to the extent tree
342 * @tree: tree to insert new map in 374 * @tree: tree to insert new map in
@@ -357,6 +389,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
357 goto out; 389 goto out;
358 390
359 setup_extent_mapping(tree, em, modified); 391 setup_extent_mapping(tree, em, modified);
392 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
393 extent_map_device_set_bits(em, CHUNK_ALLOCATED);
394 extent_map_device_clear_bits(em, CHUNK_TRIMMED);
395 }
360out: 396out:
361 return ret; 397 return ret;
362} 398}
@@ -438,6 +474,8 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
438 rb_erase_cached(&em->rb_node, &tree->map); 474 rb_erase_cached(&em->rb_node, &tree->map);
439 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) 475 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
440 list_del_init(&em->list); 476 list_del_init(&em->list);
477 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
478 extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
441 RB_CLEAR_NODE(&em->rb_node); 479 RB_CLEAR_NODE(&em->rb_node);
442} 480}
443 481
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index cccc75d15970..d431ea8198e4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -413,6 +413,16 @@ fail:
413 return ret; 413 return ret;
414} 414}
415 415
416/*
417 * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
418 * @inode: Owner of the data inside the bio
419 * @bio: Contains the data to be checksummed
420 * @file_start: offset in file this bio begins to describe
421 * @contig: Boolean. If true/1 means all bio vecs in this bio are
422 * contiguous and they begin at @file_start in the file. False/0
423 * means this bio can contains potentially discontigous bio vecs
424 * so the logical offset of each should be calculated separately.
425 */
416blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, 426blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
417 u64 file_start, int contig) 427 u64 file_start, int contig)
418{ 428{
@@ -458,8 +468,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
458 BUG_ON(!ordered); /* Logic error */ 468 BUG_ON(!ordered); /* Logic error */
459 } 469 }
460 470
461 data = kmap_atomic(bvec.bv_page);
462
463 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, 471 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
464 bvec.bv_len + fs_info->sectorsize 472 bvec.bv_len + fs_info->sectorsize
465 - 1); 473 - 1);
@@ -469,10 +477,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
469 offset < ordered->file_offset) { 477 offset < ordered->file_offset) {
470 unsigned long bytes_left; 478 unsigned long bytes_left;
471 479
472 kunmap_atomic(data);
473 sums->len = this_sum_bytes; 480 sums->len = this_sum_bytes;
474 this_sum_bytes = 0; 481 this_sum_bytes = 0;
475 btrfs_add_ordered_sum(inode, ordered, sums); 482 btrfs_add_ordered_sum(ordered, sums);
476 btrfs_put_ordered_extent(ordered); 483 btrfs_put_ordered_extent(ordered);
477 484
478 bytes_left = bio->bi_iter.bi_size - total_bytes; 485 bytes_left = bio->bi_iter.bi_size - total_bytes;
@@ -489,16 +496,16 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
489 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) 496 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
490 + total_bytes; 497 + total_bytes;
491 index = 0; 498 index = 0;
492
493 data = kmap_atomic(bvec.bv_page);
494 } 499 }
495 500
496 sums->sums[index] = ~(u32)0; 501 sums->sums[index] = ~(u32)0;
502 data = kmap_atomic(bvec.bv_page);
497 sums->sums[index] 503 sums->sums[index]
498 = btrfs_csum_data(data + bvec.bv_offset 504 = btrfs_csum_data(data + bvec.bv_offset
499 + (i * fs_info->sectorsize), 505 + (i * fs_info->sectorsize),
500 sums->sums[index], 506 sums->sums[index],
501 fs_info->sectorsize); 507 fs_info->sectorsize);
508 kunmap_atomic(data);
502 btrfs_csum_final(sums->sums[index], 509 btrfs_csum_final(sums->sums[index],
503 (char *)(sums->sums + index)); 510 (char *)(sums->sums + index));
504 index++; 511 index++;
@@ -507,10 +514,9 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
507 total_bytes += fs_info->sectorsize; 514 total_bytes += fs_info->sectorsize;
508 } 515 }
509 516
510 kunmap_atomic(data);
511 } 517 }
512 this_sum_bytes = 0; 518 this_sum_bytes = 0;
513 btrfs_add_ordered_sum(inode, ordered, sums); 519 btrfs_add_ordered_sum(ordered, sums);
514 btrfs_put_ordered_extent(ordered); 520 btrfs_put_ordered_extent(ordered);
515 return 0; 521 return 0;
516} 522}
@@ -551,7 +557,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
551 */ 557 */
552 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 558 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
553 new_size *= csum_size; 559 new_size *= csum_size;
554 btrfs_truncate_item(fs_info, path, new_size, 1); 560 btrfs_truncate_item(path, new_size, 1);
555 } else if (key->offset >= bytenr && csum_end > end_byte && 561 } else if (key->offset >= bytenr && csum_end > end_byte &&
556 end_byte > key->offset) { 562 end_byte > key->offset) {
557 /* 563 /*
@@ -563,7 +569,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
563 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 569 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
564 new_size *= csum_size; 570 new_size *= csum_size;
565 571
566 btrfs_truncate_item(fs_info, path, new_size, 0); 572 btrfs_truncate_item(path, new_size, 0);
567 573
568 key->offset = end_byte; 574 key->offset = end_byte;
569 btrfs_set_item_key_safe(fs_info, path, key); 575 btrfs_set_item_key_safe(fs_info, path, key);
@@ -832,11 +838,11 @@ again:
832 u32 diff; 838 u32 diff;
833 u32 free_space; 839 u32 free_space;
834 840
835 if (btrfs_leaf_free_space(fs_info, leaf) < 841 if (btrfs_leaf_free_space(leaf) <
836 sizeof(struct btrfs_item) + csum_size * 2) 842 sizeof(struct btrfs_item) + csum_size * 2)
837 goto insert; 843 goto insert;
838 844
839 free_space = btrfs_leaf_free_space(fs_info, leaf) - 845 free_space = btrfs_leaf_free_space(leaf) -
840 sizeof(struct btrfs_item) - csum_size; 846 sizeof(struct btrfs_item) - csum_size;
841 tmp = sums->len - total_bytes; 847 tmp = sums->len - total_bytes;
842 tmp >>= fs_info->sb->s_blocksize_bits; 848 tmp >>= fs_info->sb->s_blocksize_bits;
@@ -852,7 +858,7 @@ again:
852 diff /= csum_size; 858 diff /= csum_size;
853 diff *= csum_size; 859 diff *= csum_size;
854 860
855 btrfs_extend_item(fs_info, path, diff); 861 btrfs_extend_item(path, diff);
856 ret = 0; 862 ret = 0;
857 goto csum; 863 goto csum;
858 } 864 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 34fe8a58b0e9..7e85dca0e6f2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -754,6 +754,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
754 struct btrfs_fs_info *fs_info = root->fs_info; 754 struct btrfs_fs_info *fs_info = root->fs_info;
755 struct extent_buffer *leaf; 755 struct extent_buffer *leaf;
756 struct btrfs_file_extent_item *fi; 756 struct btrfs_file_extent_item *fi;
757 struct btrfs_ref ref = { 0 };
757 struct btrfs_key key; 758 struct btrfs_key key;
758 struct btrfs_key new_key; 759 struct btrfs_key new_key;
759 u64 ino = btrfs_ino(BTRFS_I(inode)); 760 u64 ino = btrfs_ino(BTRFS_I(inode));
@@ -909,11 +910,14 @@ next_slot:
909 btrfs_mark_buffer_dirty(leaf); 910 btrfs_mark_buffer_dirty(leaf);
910 911
911 if (update_refs && disk_bytenr > 0) { 912 if (update_refs && disk_bytenr > 0) {
912 ret = btrfs_inc_extent_ref(trans, root, 913 btrfs_init_generic_ref(&ref,
913 disk_bytenr, num_bytes, 0, 914 BTRFS_ADD_DELAYED_REF,
915 disk_bytenr, num_bytes, 0);
916 btrfs_init_data_ref(&ref,
914 root->root_key.objectid, 917 root->root_key.objectid,
915 new_key.objectid, 918 new_key.objectid,
916 start - extent_offset); 919 start - extent_offset);
920 ret = btrfs_inc_extent_ref(trans, &ref);
917 BUG_ON(ret); /* -ENOMEM */ 921 BUG_ON(ret); /* -ENOMEM */
918 } 922 }
919 key.offset = start; 923 key.offset = start;
@@ -993,11 +997,14 @@ delete_extent_item:
993 extent_end = ALIGN(extent_end, 997 extent_end = ALIGN(extent_end,
994 fs_info->sectorsize); 998 fs_info->sectorsize);
995 } else if (update_refs && disk_bytenr > 0) { 999 } else if (update_refs && disk_bytenr > 0) {
996 ret = btrfs_free_extent(trans, root, 1000 btrfs_init_generic_ref(&ref,
997 disk_bytenr, num_bytes, 0, 1001 BTRFS_DROP_DELAYED_REF,
1002 disk_bytenr, num_bytes, 0);
1003 btrfs_init_data_ref(&ref,
998 root->root_key.objectid, 1004 root->root_key.objectid,
999 key.objectid, key.offset - 1005 key.objectid,
1000 extent_offset); 1006 key.offset - extent_offset);
1007 ret = btrfs_free_extent(trans, &ref);
1001 BUG_ON(ret); /* -ENOMEM */ 1008 BUG_ON(ret); /* -ENOMEM */
1002 inode_sub_bytes(inode, 1009 inode_sub_bytes(inode,
1003 extent_end - key.offset); 1010 extent_end - key.offset);
@@ -1025,7 +1032,7 @@ delete_extent_item:
1025 continue; 1032 continue;
1026 } 1033 }
1027 1034
1028 BUG_ON(1); 1035 BUG();
1029 } 1036 }
1030 1037
1031 if (!ret && del_nr > 0) { 1038 if (!ret && del_nr > 0) {
@@ -1050,7 +1057,7 @@ delete_extent_item:
1050 if (!ret && replace_extent && leafs_visited == 1 && 1057 if (!ret && replace_extent && leafs_visited == 1 &&
1051 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || 1058 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
1052 path->locks[0] == BTRFS_WRITE_LOCK) && 1059 path->locks[0] == BTRFS_WRITE_LOCK) &&
1053 btrfs_leaf_free_space(fs_info, leaf) >= 1060 btrfs_leaf_free_space(leaf) >=
1054 sizeof(struct btrfs_item) + extent_item_size) { 1061 sizeof(struct btrfs_item) + extent_item_size) {
1055 1062
1056 key.objectid = ino; 1063 key.objectid = ino;
@@ -1142,6 +1149,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1142 struct extent_buffer *leaf; 1149 struct extent_buffer *leaf;
1143 struct btrfs_path *path; 1150 struct btrfs_path *path;
1144 struct btrfs_file_extent_item *fi; 1151 struct btrfs_file_extent_item *fi;
1152 struct btrfs_ref ref = { 0 };
1145 struct btrfs_key key; 1153 struct btrfs_key key;
1146 struct btrfs_key new_key; 1154 struct btrfs_key new_key;
1147 u64 bytenr; 1155 u64 bytenr;
@@ -1287,9 +1295,11 @@ again:
1287 extent_end - split); 1295 extent_end - split);
1288 btrfs_mark_buffer_dirty(leaf); 1296 btrfs_mark_buffer_dirty(leaf);
1289 1297
1290 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 1298 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
1291 0, root->root_key.objectid, 1299 num_bytes, 0);
1292 ino, orig_offset); 1300 btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
1301 orig_offset);
1302 ret = btrfs_inc_extent_ref(trans, &ref);
1293 if (ret) { 1303 if (ret) {
1294 btrfs_abort_transaction(trans, ret); 1304 btrfs_abort_transaction(trans, ret);
1295 goto out; 1305 goto out;
@@ -1311,6 +1321,9 @@ again:
1311 1321
1312 other_start = end; 1322 other_start = end;
1313 other_end = 0; 1323 other_end = 0;
1324 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1325 num_bytes, 0);
1326 btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
1314 if (extent_mergeable(leaf, path->slots[0] + 1, 1327 if (extent_mergeable(leaf, path->slots[0] + 1,
1315 ino, bytenr, orig_offset, 1328 ino, bytenr, orig_offset,
1316 &other_start, &other_end)) { 1329 &other_start, &other_end)) {
@@ -1321,9 +1334,7 @@ again:
1321 extent_end = other_end; 1334 extent_end = other_end;
1322 del_slot = path->slots[0] + 1; 1335 del_slot = path->slots[0] + 1;
1323 del_nr++; 1336 del_nr++;
1324 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1337 ret = btrfs_free_extent(trans, &ref);
1325 0, root->root_key.objectid,
1326 ino, orig_offset);
1327 if (ret) { 1338 if (ret) {
1328 btrfs_abort_transaction(trans, ret); 1339 btrfs_abort_transaction(trans, ret);
1329 goto out; 1340 goto out;
@@ -1341,9 +1352,7 @@ again:
1341 key.offset = other_start; 1352 key.offset = other_start;
1342 del_slot = path->slots[0]; 1353 del_slot = path->slots[0];
1343 del_nr++; 1354 del_nr++;
1344 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1355 ret = btrfs_free_extent(trans, &ref);
1345 0, root->root_key.objectid,
1346 ino, orig_offset);
1347 if (ret) { 1356 if (ret) {
1348 btrfs_abort_transaction(trans, ret); 1357 btrfs_abort_transaction(trans, ret);
1349 goto out; 1358 goto out;
@@ -2165,7 +2174,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2165 inode_unlock(inode); 2174 inode_unlock(inode);
2166 goto out; 2175 goto out;
2167 } 2176 }
2168 trans->sync = true;
2169 2177
2170 ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx); 2178 ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
2171 if (ret < 0) { 2179 if (ret < 0) {
@@ -3132,6 +3140,7 @@ static long btrfs_fallocate(struct file *file, int mode,
3132 ret = btrfs_qgroup_reserve_data(inode, &data_reserved, 3140 ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
3133 cur_offset, last_byte - cur_offset); 3141 cur_offset, last_byte - cur_offset);
3134 if (ret < 0) { 3142 if (ret < 0) {
3143 cur_offset = last_byte;
3135 free_extent_map(em); 3144 free_extent_map(em);
3136 break; 3145 break;
3137 } 3146 }
@@ -3181,7 +3190,7 @@ out:
3181 /* Let go of our reservation. */ 3190 /* Let go of our reservation. */
3182 if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) 3191 if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
3183 btrfs_free_reserved_data_space(inode, data_reserved, 3192 btrfs_free_reserved_data_space(inode, data_reserved,
3184 alloc_start, alloc_end - cur_offset); 3193 cur_offset, alloc_end - cur_offset);
3185 extent_changeset_free(data_reserved); 3194 extent_changeset_free(data_reserved);
3186 return ret; 3195 return ret;
3187} 3196}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 74aa552f4793..f74dc259307b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -88,10 +88,11 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
88 return inode; 88 return inode;
89} 89}
90 90
91struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 91struct inode *lookup_free_space_inode(
92 struct btrfs_block_group_cache 92 struct btrfs_block_group_cache *block_group,
93 *block_group, struct btrfs_path *path) 93 struct btrfs_path *path)
94{ 94{
95 struct btrfs_fs_info *fs_info = block_group->fs_info;
95 struct inode *inode = NULL; 96 struct inode *inode = NULL;
96 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 97 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
97 98
@@ -185,20 +186,19 @@ static int __create_free_space_inode(struct btrfs_root *root,
185 return 0; 186 return 0;
186} 187}
187 188
188int create_free_space_inode(struct btrfs_fs_info *fs_info, 189int create_free_space_inode(struct btrfs_trans_handle *trans,
189 struct btrfs_trans_handle *trans,
190 struct btrfs_block_group_cache *block_group, 190 struct btrfs_block_group_cache *block_group,
191 struct btrfs_path *path) 191 struct btrfs_path *path)
192{ 192{
193 int ret; 193 int ret;
194 u64 ino; 194 u64 ino;
195 195
196 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 196 ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
197 if (ret < 0) 197 if (ret < 0)
198 return ret; 198 return ret;
199 199
200 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 200 return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
201 block_group->key.objectid); 201 ino, block_group->key.objectid);
202} 202}
203 203
204int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 204int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
@@ -812,9 +812,9 @@ free_cache:
812 goto out; 812 goto out;
813} 813}
814 814
815int load_free_space_cache(struct btrfs_fs_info *fs_info, 815int load_free_space_cache(struct btrfs_block_group_cache *block_group)
816 struct btrfs_block_group_cache *block_group)
817{ 816{
817 struct btrfs_fs_info *fs_info = block_group->fs_info;
818 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 818 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
819 struct inode *inode; 819 struct inode *inode;
820 struct btrfs_path *path; 820 struct btrfs_path *path;
@@ -858,7 +858,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
859 * we will never try to read their inode item while the fs is mounted. 859 * we will never try to read their inode item while the fs is mounted.
860 */ 860 */
861 inode = lookup_free_space_inode(fs_info, block_group, path); 861 inode = lookup_free_space_inode(block_group, path);
862 if (IS_ERR(inode)) { 862 if (IS_ERR(inode)) {
863 btrfs_free_path(path); 863 btrfs_free_path(path);
864 return 0; 864 return 0;
@@ -1039,8 +1039,7 @@ fail:
1039 return -1; 1039 return -1;
1040} 1040}
1041 1041
1042static noinline_for_stack int 1042static noinline_for_stack int write_pinned_extent_entries(
1043write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
1044 struct btrfs_block_group_cache *block_group, 1043 struct btrfs_block_group_cache *block_group,
1045 struct btrfs_io_ctl *io_ctl, 1044 struct btrfs_io_ctl *io_ctl,
1046 int *entries) 1045 int *entries)
@@ -1059,7 +1058,7 @@ write_pinned_extent_entries(struct btrfs_fs_info *fs_info,
1059 * We shouldn't have switched the pinned extents yet so this is the 1058 * We shouldn't have switched the pinned extents yet so this is the
1060 * right one 1059 * right one
1061 */ 1060 */
1062 unpin = fs_info->pinned_extents; 1061 unpin = block_group->fs_info->pinned_extents;
1063 1062
1064 start = block_group->key.objectid; 1063 start = block_group->key.objectid;
1065 1064
@@ -1235,7 +1234,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1235 struct btrfs_io_ctl *io_ctl, 1234 struct btrfs_io_ctl *io_ctl,
1236 struct btrfs_trans_handle *trans) 1235 struct btrfs_trans_handle *trans)
1237{ 1236{
1238 struct btrfs_fs_info *fs_info = root->fs_info;
1239 struct extent_state *cached_state = NULL; 1237 struct extent_state *cached_state = NULL;
1240 LIST_HEAD(bitmap_list); 1238 LIST_HEAD(bitmap_list);
1241 int entries = 0; 1239 int entries = 0;
@@ -1293,8 +1291,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1293 * If this changes while we are working we'll get added back to 1291 * If this changes while we are working we'll get added back to
1294 * the dirty list and redo it. No locking needed 1292 * the dirty list and redo it. No locking needed
1295 */ 1293 */
1296 ret = write_pinned_extent_entries(fs_info, block_group, 1294 ret = write_pinned_extent_entries(block_group, io_ctl, &entries);
1297 io_ctl, &entries);
1298 if (ret) 1295 if (ret)
1299 goto out_nospc_locked; 1296 goto out_nospc_locked;
1300 1297
@@ -1370,11 +1367,11 @@ out_unlock:
1370 goto out; 1367 goto out;
1371} 1368}
1372 1369
1373int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1370int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1374 struct btrfs_trans_handle *trans,
1375 struct btrfs_block_group_cache *block_group, 1371 struct btrfs_block_group_cache *block_group,
1376 struct btrfs_path *path) 1372 struct btrfs_path *path)
1377{ 1373{
1374 struct btrfs_fs_info *fs_info = trans->fs_info;
1378 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1375 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1379 struct inode *inode; 1376 struct inode *inode;
1380 int ret = 0; 1377 int ret = 0;
@@ -1386,7 +1383,7 @@ int btrfs_write_out_cache(struct btrfs_fs_info *fs_info,
1386 } 1383 }
1387 spin_unlock(&block_group->lock); 1384 spin_unlock(&block_group->lock);
1388 1385
1389 inode = lookup_free_space_inode(fs_info, block_group, path); 1386 inode = lookup_free_space_inode(block_group, path);
1390 if (IS_ERR(inode)) 1387 if (IS_ERR(inode))
1391 return 0; 1388 return 0;
1392 1389
@@ -3040,11 +3037,11 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
3040 * returns zero and sets up cluster if things worked out, otherwise 3037 * returns zero and sets up cluster if things worked out, otherwise
3041 * it returns -enospc 3038 * it returns -enospc
3042 */ 3039 */
3043int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3040int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
3044 struct btrfs_block_group_cache *block_group,
3045 struct btrfs_free_cluster *cluster, 3041 struct btrfs_free_cluster *cluster,
3046 u64 offset, u64 bytes, u64 empty_size) 3042 u64 offset, u64 bytes, u64 empty_size)
3047{ 3043{
3044 struct btrfs_fs_info *fs_info = block_group->fs_info;
3048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3045 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3049 struct btrfs_free_space *entry, *tmp; 3046 struct btrfs_free_space *entry, *tmp;
3050 LIST_HEAD(bitmaps); 3047 LIST_HEAD(bitmaps);
@@ -3366,10 +3363,6 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
3366 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3363 em = lookup_extent_mapping(em_tree, block_group->key.objectid,
3367 1); 3364 1);
3368 BUG_ON(!em); /* logic error, can't happen */ 3365 BUG_ON(!em); /* logic error, can't happen */
3369 /*
3370 * remove_extent_mapping() will delete us from the pinned_chunks
3371 * list, which is protected by the chunk mutex.
3372 */
3373 remove_extent_mapping(em_tree, em); 3366 remove_extent_mapping(em_tree, em);
3374 write_unlock(&em_tree->lock); 3367 write_unlock(&em_tree->lock);
3375 mutex_unlock(&fs_info->chunk_mutex); 3368 mutex_unlock(&fs_info->chunk_mutex);
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 15e30b93db0d..8760acb55ffd 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -38,11 +38,10 @@ struct btrfs_free_space_op {
38 38
39struct btrfs_io_ctl; 39struct btrfs_io_ctl;
40 40
41struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 41struct inode *lookup_free_space_inode(
42 struct btrfs_block_group_cache 42 struct btrfs_block_group_cache *block_group,
43 *block_group, struct btrfs_path *path); 43 struct btrfs_path *path);
44int create_free_space_inode(struct btrfs_fs_info *fs_info, 44int create_free_space_inode(struct btrfs_trans_handle *trans,
45 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group, 45 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path); 46 struct btrfs_path *path);
48 47
@@ -51,13 +50,11 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
51int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 50int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
52 struct btrfs_block_group_cache *block_group, 51 struct btrfs_block_group_cache *block_group,
53 struct inode *inode); 52 struct inode *inode);
54int load_free_space_cache(struct btrfs_fs_info *fs_info, 53int load_free_space_cache(struct btrfs_block_group_cache *block_group);
55 struct btrfs_block_group_cache *block_group);
56int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 54int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
57 struct btrfs_block_group_cache *block_group, 55 struct btrfs_block_group_cache *block_group,
58 struct btrfs_path *path); 56 struct btrfs_path *path);
59int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 57int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
60 struct btrfs_trans_handle *trans,
61 struct btrfs_block_group_cache *block_group, 58 struct btrfs_block_group_cache *block_group,
62 struct btrfs_path *path); 59 struct btrfs_path *path);
63struct inode *lookup_free_ino_inode(struct btrfs_root *root, 60struct inode *lookup_free_ino_inode(struct btrfs_root *root,
@@ -95,8 +92,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
95u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 92u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
96void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 93void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
97 u64 bytes); 94 u64 bytes);
98int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 95int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
99 struct btrfs_block_group_cache *block_group,
100 struct btrfs_free_cluster *cluster, 96 struct btrfs_free_cluster *cluster,
101 u64 offset, u64 bytes, u64 empty_size); 97 u64 offset, u64 bytes, u64 empty_size);
102void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); 98void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index e5089087eaa6..f5dc115ebba0 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -76,10 +76,11 @@ out:
76 76
77EXPORT_FOR_TESTS 77EXPORT_FOR_TESTS
78struct btrfs_free_space_info *search_free_space_info( 78struct btrfs_free_space_info *search_free_space_info(
79 struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, 79 struct btrfs_trans_handle *trans,
80 struct btrfs_block_group_cache *block_group, 80 struct btrfs_block_group_cache *block_group,
81 struct btrfs_path *path, int cow) 81 struct btrfs_path *path, int cow)
82{ 82{
83 struct btrfs_fs_info *fs_info = block_group->fs_info;
83 struct btrfs_root *root = fs_info->free_space_root; 84 struct btrfs_root *root = fs_info->free_space_root;
84 struct btrfs_key key; 85 struct btrfs_key key;
85 int ret; 86 int ret;
@@ -253,7 +254,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
253 btrfs_release_path(path); 254 btrfs_release_path(path);
254 } 255 }
255 256
256 info = search_free_space_info(trans, fs_info, block_group, path, 1); 257 info = search_free_space_info(trans, block_group, path, 1);
257 if (IS_ERR(info)) { 258 if (IS_ERR(info)) {
258 ret = PTR_ERR(info); 259 ret = PTR_ERR(info);
259 goto out; 260 goto out;
@@ -398,7 +399,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
398 btrfs_release_path(path); 399 btrfs_release_path(path);
399 } 400 }
400 401
401 info = search_free_space_info(trans, fs_info, block_group, path, 1); 402 info = search_free_space_info(trans, block_group, path, 1);
402 if (IS_ERR(info)) { 403 if (IS_ERR(info)) {
403 ret = PTR_ERR(info); 404 ret = PTR_ERR(info);
404 goto out; 405 goto out;
@@ -463,8 +464,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
463 if (new_extents == 0) 464 if (new_extents == 0)
464 return 0; 465 return 0;
465 466
466 info = search_free_space_info(trans, trans->fs_info, block_group, path, 467 info = search_free_space_info(trans, block_group, path, 1);
467 1);
468 if (IS_ERR(info)) { 468 if (IS_ERR(info)) {
469 ret = PTR_ERR(info); 469 ret = PTR_ERR(info);
470 goto out; 470 goto out;
@@ -793,8 +793,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
793 return ret; 793 return ret;
794 } 794 }
795 795
796 info = search_free_space_info(NULL, trans->fs_info, block_group, path, 796 info = search_free_space_info(NULL, block_group, path, 0);
797 0);
798 if (IS_ERR(info)) 797 if (IS_ERR(info))
799 return PTR_ERR(info); 798 return PTR_ERR(info);
800 flags = btrfs_free_space_flags(path->nodes[0], info); 799 flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -977,7 +976,6 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
977 struct btrfs_block_group_cache *block_group, 976 struct btrfs_block_group_cache *block_group,
978 struct btrfs_path *path, u64 start, u64 size) 977 struct btrfs_path *path, u64 start, u64 size)
979{ 978{
980 struct btrfs_fs_info *fs_info = trans->fs_info;
981 struct btrfs_free_space_info *info; 979 struct btrfs_free_space_info *info;
982 u32 flags; 980 u32 flags;
983 int ret; 981 int ret;
@@ -988,7 +986,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
988 return ret; 986 return ret;
989 } 987 }
990 988
991 info = search_free_space_info(NULL, fs_info, block_group, path, 0); 989 info = search_free_space_info(NULL, block_group, path, 0);
992 if (IS_ERR(info)) 990 if (IS_ERR(info))
993 return PTR_ERR(info); 991 return PTR_ERR(info);
994 flags = btrfs_free_space_flags(path->nodes[0], info); 992 flags = btrfs_free_space_flags(path->nodes[0], info);
@@ -1150,7 +1148,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1150 return PTR_ERR(trans); 1148 return PTR_ERR(trans);
1151 1149
1152 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1150 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1153 free_space_root = btrfs_create_tree(trans, fs_info, 1151 free_space_root = btrfs_create_tree(trans,
1154 BTRFS_FREE_SPACE_TREE_OBJECTID); 1152 BTRFS_FREE_SPACE_TREE_OBJECTID);
1155 if (IS_ERR(free_space_root)) { 1153 if (IS_ERR(free_space_root)) {
1156 ret = PTR_ERR(free_space_root); 1154 ret = PTR_ERR(free_space_root);
@@ -1248,7 +1246,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1248 list_del(&free_space_root->dirty_list); 1246 list_del(&free_space_root->dirty_list);
1249 1247
1250 btrfs_tree_lock(free_space_root->node); 1248 btrfs_tree_lock(free_space_root->node);
1251 clean_tree_block(fs_info, free_space_root->node); 1249 btrfs_clean_tree_block(free_space_root->node);
1252 btrfs_tree_unlock(free_space_root->node); 1250 btrfs_tree_unlock(free_space_root->node);
1253 btrfs_free_tree_block(trans, free_space_root, free_space_root->node, 1251 btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
1254 0, 1); 1252 0, 1);
@@ -1534,14 +1532,12 @@ out:
1534int load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1532int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1535{ 1533{
1536 struct btrfs_block_group_cache *block_group; 1534 struct btrfs_block_group_cache *block_group;
1537 struct btrfs_fs_info *fs_info;
1538 struct btrfs_free_space_info *info; 1535 struct btrfs_free_space_info *info;
1539 struct btrfs_path *path; 1536 struct btrfs_path *path;
1540 u32 extent_count, flags; 1537 u32 extent_count, flags;
1541 int ret; 1538 int ret;
1542 1539
1543 block_group = caching_ctl->block_group; 1540 block_group = caching_ctl->block_group;
1544 fs_info = block_group->fs_info;
1545 1541
1546 path = btrfs_alloc_path(); 1542 path = btrfs_alloc_path();
1547 if (!path) 1543 if (!path)
@@ -1555,7 +1551,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1555 path->search_commit_root = 1; 1551 path->search_commit_root = 1;
1556 path->reada = READA_FORWARD; 1552 path->reada = READA_FORWARD;
1557 1553
1558 info = search_free_space_info(NULL, fs_info, block_group, path, 0); 1554 info = search_free_space_info(NULL, block_group, path, 0);
1559 if (IS_ERR(info)) { 1555 if (IS_ERR(info)) {
1560 ret = PTR_ERR(info); 1556 ret = PTR_ERR(info);
1561 goto out; 1557 goto out;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 3133651d7d70..22b7602bde25 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -30,7 +30,6 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
30#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 30#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
31struct btrfs_free_space_info * 31struct btrfs_free_space_info *
32search_free_space_info(struct btrfs_trans_handle *trans, 32search_free_space_info(struct btrfs_trans_handle *trans,
33 struct btrfs_fs_info *fs_info,
34 struct btrfs_block_group_cache *block_group, 33 struct btrfs_block_group_cache *block_group,
35 struct btrfs_path *path, int cow); 34 struct btrfs_path *path, int cow);
36int __add_to_free_space_tree(struct btrfs_trans_handle *trans, 35int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index a8956a3c9e05..30d62ef918b9 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -170,7 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
170 memmove_extent_buffer(leaf, ptr, ptr + del_len, 170 memmove_extent_buffer(leaf, ptr, ptr + del_len,
171 item_size - (ptr + del_len - item_start)); 171 item_size - (ptr + del_len - item_start));
172 172
173 btrfs_truncate_item(root->fs_info, path, item_size - del_len, 1); 173 btrfs_truncate_item(path, item_size - del_len, 1);
174 174
175out: 175out:
176 btrfs_free_path(path); 176 btrfs_free_path(path);
@@ -234,7 +234,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
234 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); 234 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
235 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 235 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
236 item_size - (ptr + sub_item_len - item_start)); 236 item_size - (ptr + sub_item_len - item_start));
237 btrfs_truncate_item(root->fs_info, path, item_size - sub_item_len, 1); 237 btrfs_truncate_item(path, item_size - sub_item_len, 1);
238out: 238out:
239 btrfs_free_path(path); 239 btrfs_free_path(path);
240 240
@@ -288,7 +288,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
288 name, name_len, NULL)) 288 name, name_len, NULL))
289 goto out; 289 goto out;
290 290
291 btrfs_extend_item(root->fs_info, path, ins_len); 291 btrfs_extend_item(path, ins_len);
292 ret = 0; 292 ret = 0;
293 } 293 }
294 if (ret < 0) 294 if (ret < 0)
@@ -347,7 +347,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
347 goto out; 347 goto out;
348 348
349 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 349 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
350 btrfs_extend_item(fs_info, path, ins_len); 350 btrfs_extend_item(path, ins_len);
351 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 351 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
352 struct btrfs_inode_ref); 352 struct btrfs_inode_ref);
353 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 353 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ade7d0c5ce1b..56929daea0f7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -28,6 +28,7 @@
28#include <linux/magic.h> 28#include <linux/magic.h>
29#include <linux/iversion.h> 29#include <linux/iversion.h>
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/sched/mm.h>
31#include <asm/unaligned.h> 32#include <asm/unaligned.h>
32#include "ctree.h" 33#include "ctree.h"
33#include "disk-io.h" 34#include "disk-io.h"
@@ -73,17 +74,6 @@ struct kmem_cache *btrfs_trans_handle_cachep;
73struct kmem_cache *btrfs_path_cachep; 74struct kmem_cache *btrfs_path_cachep;
74struct kmem_cache *btrfs_free_space_cachep; 75struct kmem_cache *btrfs_free_space_cachep;
75 76
76#define S_SHIFT 12
77static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
78 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
79 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
80 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
81 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
82 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
83 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
84 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
85};
86
87static int btrfs_setsize(struct inode *inode, struct iattr *attr); 77static int btrfs_setsize(struct inode *inode, struct iattr *attr);
88static int btrfs_truncate(struct inode *inode, bool skip_writeback); 78static int btrfs_truncate(struct inode *inode, bool skip_writeback);
89static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); 79static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
@@ -366,18 +356,24 @@ struct async_extent {
366 struct list_head list; 356 struct list_head list;
367}; 357};
368 358
369struct async_cow { 359struct async_chunk {
370 struct inode *inode; 360 struct inode *inode;
371 struct btrfs_fs_info *fs_info;
372 struct page *locked_page; 361 struct page *locked_page;
373 u64 start; 362 u64 start;
374 u64 end; 363 u64 end;
375 unsigned int write_flags; 364 unsigned int write_flags;
376 struct list_head extents; 365 struct list_head extents;
377 struct btrfs_work work; 366 struct btrfs_work work;
367 atomic_t *pending;
378}; 368};
379 369
380static noinline int add_async_extent(struct async_cow *cow, 370struct async_cow {
371 /* Number of chunks in flight; must be first in the structure */
372 atomic_t num_chunks;
373 struct async_chunk chunks[];
374};
375
376static noinline int add_async_extent(struct async_chunk *cow,
381 u64 start, u64 ram_size, 377 u64 start, u64 ram_size,
382 u64 compressed_size, 378 u64 compressed_size,
383 struct page **pages, 379 struct page **pages,
@@ -444,14 +440,14 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
444 * are written in the same order that the flusher thread sent them 440 * are written in the same order that the flusher thread sent them
445 * down. 441 * down.
446 */ 442 */
447static noinline void compress_file_range(struct inode *inode, 443static noinline void compress_file_range(struct async_chunk *async_chunk,
448 struct page *locked_page, 444 int *num_added)
449 u64 start, u64 end,
450 struct async_cow *async_cow,
451 int *num_added)
452{ 445{
446 struct inode *inode = async_chunk->inode;
453 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 447 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
454 u64 blocksize = fs_info->sectorsize; 448 u64 blocksize = fs_info->sectorsize;
449 u64 start = async_chunk->start;
450 u64 end = async_chunk->end;
455 u64 actual_end; 451 u64 actual_end;
456 int ret = 0; 452 int ret = 0;
457 struct page **pages = NULL; 453 struct page **pages = NULL;
@@ -630,7 +626,7 @@ cont:
630 * allocation on disk for these compressed pages, and 626 * allocation on disk for these compressed pages, and
631 * will submit them to the elevator. 627 * will submit them to the elevator.
632 */ 628 */
633 add_async_extent(async_cow, start, total_in, 629 add_async_extent(async_chunk, start, total_in,
634 total_compressed, pages, nr_pages, 630 total_compressed, pages, nr_pages,
635 compress_type); 631 compress_type);
636 632
@@ -670,14 +666,14 @@ cleanup_and_bail_uncompressed:
670 * to our extent and set things up for the async work queue to run 666 * to our extent and set things up for the async work queue to run
671 * cow_file_range to do the normal delalloc dance. 667 * cow_file_range to do the normal delalloc dance.
672 */ 668 */
673 if (page_offset(locked_page) >= start && 669 if (page_offset(async_chunk->locked_page) >= start &&
674 page_offset(locked_page) <= end) 670 page_offset(async_chunk->locked_page) <= end)
675 __set_page_dirty_nobuffers(locked_page); 671 __set_page_dirty_nobuffers(async_chunk->locked_page);
676 /* unlocked later on in the async handlers */ 672 /* unlocked later on in the async handlers */
677 673
678 if (redirty) 674 if (redirty)
679 extent_range_redirty_for_io(inode, start, end); 675 extent_range_redirty_for_io(inode, start, end);
680 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, 676 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
681 BTRFS_COMPRESS_NONE); 677 BTRFS_COMPRESS_NONE);
682 *num_added += 1; 678 *num_added += 1;
683 679
@@ -713,38 +709,34 @@ static void free_async_extent_pages(struct async_extent *async_extent)
713 * queued. We walk all the async extents created by compress_file_range 709 * queued. We walk all the async extents created by compress_file_range
714 * and send them down to the disk. 710 * and send them down to the disk.
715 */ 711 */
716static noinline void submit_compressed_extents(struct async_cow *async_cow) 712static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
717{ 713{
718 struct inode *inode = async_cow->inode; 714 struct inode *inode = async_chunk->inode;
719 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 715 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
720 struct async_extent *async_extent; 716 struct async_extent *async_extent;
721 u64 alloc_hint = 0; 717 u64 alloc_hint = 0;
722 struct btrfs_key ins; 718 struct btrfs_key ins;
723 struct extent_map *em; 719 struct extent_map *em;
724 struct btrfs_root *root = BTRFS_I(inode)->root; 720 struct btrfs_root *root = BTRFS_I(inode)->root;
725 struct extent_io_tree *io_tree; 721 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
726 int ret = 0; 722 int ret = 0;
727 723
728again: 724again:
729 while (!list_empty(&async_cow->extents)) { 725 while (!list_empty(&async_chunk->extents)) {
730 async_extent = list_entry(async_cow->extents.next, 726 async_extent = list_entry(async_chunk->extents.next,
731 struct async_extent, list); 727 struct async_extent, list);
732 list_del(&async_extent->list); 728 list_del(&async_extent->list);
733 729
734 io_tree = &BTRFS_I(inode)->io_tree;
735
736retry: 730retry:
731 lock_extent(io_tree, async_extent->start,
732 async_extent->start + async_extent->ram_size - 1);
737 /* did the compression code fall back to uncompressed IO? */ 733 /* did the compression code fall back to uncompressed IO? */
738 if (!async_extent->pages) { 734 if (!async_extent->pages) {
739 int page_started = 0; 735 int page_started = 0;
740 unsigned long nr_written = 0; 736 unsigned long nr_written = 0;
741 737
742 lock_extent(io_tree, async_extent->start,
743 async_extent->start +
744 async_extent->ram_size - 1);
745
746 /* allocate blocks */ 738 /* allocate blocks */
747 ret = cow_file_range(inode, async_cow->locked_page, 739 ret = cow_file_range(inode, async_chunk->locked_page,
748 async_extent->start, 740 async_extent->start,
749 async_extent->start + 741 async_extent->start +
750 async_extent->ram_size - 1, 742 async_extent->ram_size - 1,
@@ -768,15 +760,12 @@ retry:
768 async_extent->ram_size - 1, 760 async_extent->ram_size - 1,
769 WB_SYNC_ALL); 761 WB_SYNC_ALL);
770 else if (ret) 762 else if (ret)
771 unlock_page(async_cow->locked_page); 763 unlock_page(async_chunk->locked_page);
772 kfree(async_extent); 764 kfree(async_extent);
773 cond_resched(); 765 cond_resched();
774 continue; 766 continue;
775 } 767 }
776 768
777 lock_extent(io_tree, async_extent->start,
778 async_extent->start + async_extent->ram_size - 1);
779
780 ret = btrfs_reserve_extent(root, async_extent->ram_size, 769 ret = btrfs_reserve_extent(root, async_extent->ram_size,
781 async_extent->compressed_size, 770 async_extent->compressed_size,
782 async_extent->compressed_size, 771 async_extent->compressed_size,
@@ -855,7 +844,7 @@ retry:
855 ins.objectid, 844 ins.objectid,
856 ins.offset, async_extent->pages, 845 ins.offset, async_extent->pages,
857 async_extent->nr_pages, 846 async_extent->nr_pages,
858 async_cow->write_flags)) { 847 async_chunk->write_flags)) {
859 struct page *p = async_extent->pages[0]; 848 struct page *p = async_extent->pages[0];
860 const u64 start = async_extent->start; 849 const u64 start = async_extent->start;
861 const u64 end = start + async_extent->ram_size - 1; 850 const u64 end = start + async_extent->ram_size - 1;
@@ -1132,16 +1121,15 @@ out_unlock:
1132 */ 1121 */
1133static noinline void async_cow_start(struct btrfs_work *work) 1122static noinline void async_cow_start(struct btrfs_work *work)
1134{ 1123{
1135 struct async_cow *async_cow; 1124 struct async_chunk *async_chunk;
1136 int num_added = 0; 1125 int num_added = 0;
1137 async_cow = container_of(work, struct async_cow, work);
1138 1126
1139 compress_file_range(async_cow->inode, async_cow->locked_page, 1127 async_chunk = container_of(work, struct async_chunk, work);
1140 async_cow->start, async_cow->end, async_cow, 1128
1141 &num_added); 1129 compress_file_range(async_chunk, &num_added);
1142 if (num_added == 0) { 1130 if (num_added == 0) {
1143 btrfs_add_delayed_iput(async_cow->inode); 1131 btrfs_add_delayed_iput(async_chunk->inode);
1144 async_cow->inode = NULL; 1132 async_chunk->inode = NULL;
1145 } 1133 }
1146} 1134}
1147 1135
@@ -1150,14 +1138,12 @@ static noinline void async_cow_start(struct btrfs_work *work)
1150 */ 1138 */
1151static noinline void async_cow_submit(struct btrfs_work *work) 1139static noinline void async_cow_submit(struct btrfs_work *work)
1152{ 1140{
1153 struct btrfs_fs_info *fs_info; 1141 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1154 struct async_cow *async_cow; 1142 work);
1143 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1155 unsigned long nr_pages; 1144 unsigned long nr_pages;
1156 1145
1157 async_cow = container_of(work, struct async_cow, work); 1146 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1158
1159 fs_info = async_cow->fs_info;
1160 nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
1161 PAGE_SHIFT; 1147 PAGE_SHIFT;
1162 1148
1163 /* atomic_sub_return implies a barrier */ 1149 /* atomic_sub_return implies a barrier */
@@ -1166,22 +1152,28 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1166 cond_wake_up_nomb(&fs_info->async_submit_wait); 1152 cond_wake_up_nomb(&fs_info->async_submit_wait);
1167 1153
1168 /* 1154 /*
1169 * ->inode could be NULL if async_cow_start has failed to compress, 1155 * ->inode could be NULL if async_chunk_start has failed to compress,
1170 * in which case we don't have anything to submit, yet we need to 1156 * in which case we don't have anything to submit, yet we need to
1171 * always adjust ->async_delalloc_pages as its paired with the init 1157 * always adjust ->async_delalloc_pages as its paired with the init
1172 * happening in cow_file_range_async 1158 * happening in cow_file_range_async
1173 */ 1159 */
1174 if (async_cow->inode) 1160 if (async_chunk->inode)
1175 submit_compressed_extents(async_cow); 1161 submit_compressed_extents(async_chunk);
1176} 1162}
1177 1163
1178static noinline void async_cow_free(struct btrfs_work *work) 1164static noinline void async_cow_free(struct btrfs_work *work)
1179{ 1165{
1180 struct async_cow *async_cow; 1166 struct async_chunk *async_chunk;
1181 async_cow = container_of(work, struct async_cow, work); 1167
1182 if (async_cow->inode) 1168 async_chunk = container_of(work, struct async_chunk, work);
1183 btrfs_add_delayed_iput(async_cow->inode); 1169 if (async_chunk->inode)
1184 kfree(async_cow); 1170 btrfs_add_delayed_iput(async_chunk->inode);
1171 /*
1172 * Since the pointer to 'pending' is at the beginning of the array of
1173 * async_chunk's, freeing it ensures the whole array has been freed.
1174 */
1175 if (atomic_dec_and_test(async_chunk->pending))
1176 kvfree(async_chunk->pending);
1185} 1177}
1186 1178
1187static int cow_file_range_async(struct inode *inode, struct page *locked_page, 1179static int cow_file_range_async(struct inode *inode, struct page *locked_page,
@@ -1190,45 +1182,73 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1190 unsigned int write_flags) 1182 unsigned int write_flags)
1191{ 1183{
1192 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1184 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1193 struct async_cow *async_cow; 1185 struct async_cow *ctx;
1186 struct async_chunk *async_chunk;
1194 unsigned long nr_pages; 1187 unsigned long nr_pages;
1195 u64 cur_end; 1188 u64 cur_end;
1189 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1190 int i;
1191 bool should_compress;
1192 unsigned nofs_flag;
1193
1194 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1195
1196 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1197 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1198 num_chunks = 1;
1199 should_compress = false;
1200 } else {
1201 should_compress = true;
1202 }
1203
1204 nofs_flag = memalloc_nofs_save();
1205 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1206 memalloc_nofs_restore(nofs_flag);
1207
1208 if (!ctx) {
1209 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1210 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1211 EXTENT_DO_ACCOUNTING;
1212 unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1213 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
1214 PAGE_SET_ERROR;
1215
1216 extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
1217 clear_bits, page_ops);
1218 return -ENOMEM;
1219 }
1220
1221 async_chunk = ctx->chunks;
1222 atomic_set(&ctx->num_chunks, num_chunks);
1223
1224 for (i = 0; i < num_chunks; i++) {
1225 if (should_compress)
1226 cur_end = min(end, start + SZ_512K - 1);
1227 else
1228 cur_end = end;
1196 1229
1197 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1198 1, 0, NULL);
1199 while (start < end) {
1200 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1201 BUG_ON(!async_cow); /* -ENOMEM */
1202 /* 1230 /*
1203 * igrab is called higher up in the call chain, take only the 1231 * igrab is called higher up in the call chain, take only the
1204 * lightweight reference for the callback lifetime 1232 * lightweight reference for the callback lifetime
1205 */ 1233 */
1206 ihold(inode); 1234 ihold(inode);
1207 async_cow->inode = inode; 1235 async_chunk[i].pending = &ctx->num_chunks;
1208 async_cow->fs_info = fs_info; 1236 async_chunk[i].inode = inode;
1209 async_cow->locked_page = locked_page; 1237 async_chunk[i].start = start;
1210 async_cow->start = start; 1238 async_chunk[i].end = cur_end;
1211 async_cow->write_flags = write_flags; 1239 async_chunk[i].locked_page = locked_page;
1212 1240 async_chunk[i].write_flags = write_flags;
1213 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && 1241 INIT_LIST_HEAD(&async_chunk[i].extents);
1214 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) 1242
1215 cur_end = end; 1243 btrfs_init_work(&async_chunk[i].work,
1216 else
1217 cur_end = min(end, start + SZ_512K - 1);
1218
1219 async_cow->end = cur_end;
1220 INIT_LIST_HEAD(&async_cow->extents);
1221
1222 btrfs_init_work(&async_cow->work,
1223 btrfs_delalloc_helper, 1244 btrfs_delalloc_helper,
1224 async_cow_start, async_cow_submit, 1245 async_cow_start, async_cow_submit,
1225 async_cow_free); 1246 async_cow_free);
1226 1247
1227 nr_pages = (cur_end - start + PAGE_SIZE) >> 1248 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1228 PAGE_SHIFT;
1229 atomic_add(nr_pages, &fs_info->async_delalloc_pages); 1249 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1230 1250
1231 btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); 1251 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1232 1252
1233 *nr_written += nr_pages; 1253 *nr_written += nr_pages;
1234 start = cur_end + 1; 1254 start = cur_end + 1;
@@ -1451,7 +1471,7 @@ next_slot:
1451 extent_end = ALIGN(extent_end, 1471 extent_end = ALIGN(extent_end,
1452 fs_info->sectorsize); 1472 fs_info->sectorsize);
1453 } else { 1473 } else {
1454 BUG_ON(1); 1474 BUG();
1455 } 1475 }
1456out_check: 1476out_check:
1457 if (extent_end <= start) { 1477 if (extent_end <= start) {
@@ -1964,11 +1984,11 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
1964 * 1984 *
1965 * c-3) otherwise: async submit 1985 * c-3) otherwise: async submit
1966 */ 1986 */
1967static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, 1987static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
1968 int mirror_num, unsigned long bio_flags, 1988 int mirror_num,
1969 u64 bio_offset) 1989 unsigned long bio_flags)
1990
1970{ 1991{
1971 struct inode *inode = private_data;
1972 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1992 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1973 struct btrfs_root *root = BTRFS_I(inode)->root; 1993 struct btrfs_root *root = BTRFS_I(inode)->root;
1974 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; 1994 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
@@ -2003,8 +2023,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
2003 goto mapit; 2023 goto mapit;
2004 /* we're doing a write, do the async checksumming */ 2024 /* we're doing a write, do the async checksumming */
2005 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, 2025 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2006 bio_offset, inode, 2026 0, inode, btrfs_submit_bio_start);
2007 btrfs_submit_bio_start);
2008 goto out; 2027 goto out;
2009 } else if (!skip_sum) { 2028 } else if (!skip_sum) {
2010 ret = btrfs_csum_one_bio(inode, bio, 0, 0); 2029 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -2531,6 +2550,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
2531 struct btrfs_file_extent_item *item; 2550 struct btrfs_file_extent_item *item;
2532 struct btrfs_ordered_extent *ordered; 2551 struct btrfs_ordered_extent *ordered;
2533 struct btrfs_trans_handle *trans; 2552 struct btrfs_trans_handle *trans;
2553 struct btrfs_ref ref = { 0 };
2534 struct btrfs_root *root; 2554 struct btrfs_root *root;
2535 struct btrfs_key key; 2555 struct btrfs_key key;
2536 struct extent_buffer *leaf; 2556 struct extent_buffer *leaf;
@@ -2701,10 +2721,11 @@ again:
2701 inode_add_bytes(inode, len); 2721 inode_add_bytes(inode, len);
2702 btrfs_release_path(path); 2722 btrfs_release_path(path);
2703 2723
2704 ret = btrfs_inc_extent_ref(trans, root, new->bytenr, 2724 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
2705 new->disk_len, 0, 2725 new->disk_len, 0);
2706 backref->root_id, backref->inum, 2726 btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
2707 new->file_pos); /* start - extent_offset */ 2727 new->file_pos); /* start - extent_offset */
2728 ret = btrfs_inc_extent_ref(trans, &ref);
2708 if (ret) { 2729 if (ret) {
2709 btrfs_abort_transaction(trans, ret); 2730 btrfs_abort_transaction(trans, ret);
2710 goto out_free_path; 2731 goto out_free_path;
@@ -3699,21 +3720,6 @@ cache_index:
3699 * inode is not a directory, logging its parent unnecessarily. 3720 * inode is not a directory, logging its parent unnecessarily.
3700 */ 3721 */
3701 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; 3722 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3702 /*
3703 * Similar reasoning for last_link_trans, needs to be set otherwise
3704 * for a case like the following:
3705 *
3706 * mkdir A
3707 * touch foo
3708 * ln foo A/bar
3709 * echo 2 > /proc/sys/vm/drop_caches
3710 * fsync foo
3711 * <power failure>
3712 *
3713 * Would result in link bar and directory A not existing after the power
3714 * failure.
3715 */
3716 BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
3717 3723
3718 path->slots[0]++; 3724 path->slots[0]++;
3719 if (inode->i_nlink != 1 || 3725 if (inode->i_nlink != 1 ||
@@ -4679,7 +4685,7 @@ search_again:
4679 4685
4680 btrfs_set_file_extent_ram_bytes(leaf, fi, size); 4686 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4681 size = btrfs_file_extent_calc_inline_size(size); 4687 size = btrfs_file_extent_calc_inline_size(size);
4682 btrfs_truncate_item(root->fs_info, path, size, 1); 4688 btrfs_truncate_item(path, size, 1);
4683 } else if (!del_item) { 4689 } else if (!del_item) {
4684 /* 4690 /*
4685 * We have to bail so the last_size is set to 4691 * We have to bail so the last_size is set to
@@ -4718,12 +4724,17 @@ delete:
4718 if (found_extent && 4724 if (found_extent &&
4719 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || 4725 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4720 root == fs_info->tree_root)) { 4726 root == fs_info->tree_root)) {
4727 struct btrfs_ref ref = { 0 };
4728
4721 btrfs_set_path_blocking(path); 4729 btrfs_set_path_blocking(path);
4722 bytes_deleted += extent_num_bytes; 4730 bytes_deleted += extent_num_bytes;
4723 ret = btrfs_free_extent(trans, root, extent_start, 4731
4724 extent_num_bytes, 0, 4732 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
4725 btrfs_header_owner(leaf), 4733 extent_start, extent_num_bytes, 0);
4726 ino, extent_offset); 4734 ref.real_root = root->root_key.objectid;
4735 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
4736 ino, extent_offset);
4737 ret = btrfs_free_extent(trans, &ref);
4727 if (ret) { 4738 if (ret) {
4728 btrfs_abort_transaction(trans, ret); 4739 btrfs_abort_transaction(trans, ret);
4729 break; 4740 break;
@@ -5448,12 +5459,14 @@ no_delete:
5448} 5459}
5449 5460
5450/* 5461/*
5451 * this returns the key found in the dir entry in the location pointer. 5462 * Return the key found in the dir entry in the location pointer, fill @type
5463 * with BTRFS_FT_*, and return 0.
5464 *
5452 * If no dir entries were found, returns -ENOENT. 5465 * If no dir entries were found, returns -ENOENT.
5453 * If found a corrupted location in dir entry, returns -EUCLEAN. 5466 * If found a corrupted location in dir entry, returns -EUCLEAN.
5454 */ 5467 */
5455static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, 5468static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5456 struct btrfs_key *location) 5469 struct btrfs_key *location, u8 *type)
5457{ 5470{
5458 const char *name = dentry->d_name.name; 5471 const char *name = dentry->d_name.name;
5459 int namelen = dentry->d_name.len; 5472 int namelen = dentry->d_name.len;
@@ -5482,6 +5495,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5482 __func__, name, btrfs_ino(BTRFS_I(dir)), 5495 __func__, name, btrfs_ino(BTRFS_I(dir)),
5483 location->objectid, location->type, location->offset); 5496 location->objectid, location->type, location->offset);
5484 } 5497 }
5498 if (!ret)
5499 *type = btrfs_dir_type(path->nodes[0], di);
5485out: 5500out:
5486 btrfs_free_path(path); 5501 btrfs_free_path(path);
5487 return ret; 5502 return ret;
@@ -5719,6 +5734,24 @@ static struct inode *new_simple_dir(struct super_block *s,
5719 return inode; 5734 return inode;
5720} 5735}
5721 5736
5737static inline u8 btrfs_inode_type(struct inode *inode)
5738{
5739 /*
5740 * Compile-time asserts that generic FT_* types still match
5741 * BTRFS_FT_* types
5742 */
5743 BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
5744 BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
5745 BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
5746 BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
5747 BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
5748 BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
5749 BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
5750 BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
5751
5752 return fs_umode_to_ftype(inode->i_mode);
5753}
5754
5722struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) 5755struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5723{ 5756{
5724 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 5757 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -5726,18 +5759,31 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5726 struct btrfs_root *root = BTRFS_I(dir)->root; 5759 struct btrfs_root *root = BTRFS_I(dir)->root;
5727 struct btrfs_root *sub_root = root; 5760 struct btrfs_root *sub_root = root;
5728 struct btrfs_key location; 5761 struct btrfs_key location;
5762 u8 di_type = 0;
5729 int index; 5763 int index;
5730 int ret = 0; 5764 int ret = 0;
5731 5765
5732 if (dentry->d_name.len > BTRFS_NAME_LEN) 5766 if (dentry->d_name.len > BTRFS_NAME_LEN)
5733 return ERR_PTR(-ENAMETOOLONG); 5767 return ERR_PTR(-ENAMETOOLONG);
5734 5768
5735 ret = btrfs_inode_by_name(dir, dentry, &location); 5769 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5736 if (ret < 0) 5770 if (ret < 0)
5737 return ERR_PTR(ret); 5771 return ERR_PTR(ret);
5738 5772
5739 if (location.type == BTRFS_INODE_ITEM_KEY) { 5773 if (location.type == BTRFS_INODE_ITEM_KEY) {
5740 inode = btrfs_iget(dir->i_sb, &location, root, NULL); 5774 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5775 if (IS_ERR(inode))
5776 return inode;
5777
5778 /* Do extra check against inode mode with di_type */
5779 if (btrfs_inode_type(inode) != di_type) {
5780 btrfs_crit(fs_info,
5781"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5782 inode->i_mode, btrfs_inode_type(inode),
5783 di_type);
5784 iput(inode);
5785 return ERR_PTR(-EUCLEAN);
5786 }
5741 return inode; 5787 return inode;
5742 } 5788 }
5743 5789
@@ -5797,10 +5843,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5797 return d_splice_alias(inode, dentry); 5843 return d_splice_alias(inode, dentry);
5798} 5844}
5799 5845
5800unsigned char btrfs_filetype_table[] = {
5801 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5802};
5803
5804/* 5846/*
5805 * All this infrastructure exists because dir_emit can fault, and we are holding 5847 * All this infrastructure exists because dir_emit can fault, and we are holding
5806 * the tree lock when doing readdir. For now just allocate a buffer and copy 5848 * the tree lock when doing readdir. For now just allocate a buffer and copy
@@ -5939,7 +5981,7 @@ again:
5939 name_ptr = (char *)(entry + 1); 5981 name_ptr = (char *)(entry + 1);
5940 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), 5982 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5941 name_len); 5983 name_len);
5942 put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], 5984 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
5943 &entry->type); 5985 &entry->type);
5944 btrfs_dir_item_key_to_cpu(leaf, di, &location); 5986 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5945 put_unaligned(location.objectid, &entry->ino); 5987 put_unaligned(location.objectid, &entry->ino);
@@ -6342,11 +6384,6 @@ fail:
6342 return ERR_PTR(ret); 6384 return ERR_PTR(ret);
6343} 6385}
6344 6386
6345static inline u8 btrfs_inode_type(struct inode *inode)
6346{
6347 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
6348}
6349
6350/* 6387/*
6351 * utility function to add 'inode' into 'parent_inode' with 6388 * utility function to add 'inode' into 'parent_inode' with
6352 * a give name and a given sequence number. 6389 * a give name and a given sequence number.
@@ -6634,7 +6671,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6634 if (err) 6671 if (err)
6635 goto fail; 6672 goto fail;
6636 } 6673 }
6637 BTRFS_I(inode)->last_link_trans = trans->transid;
6638 d_instantiate(dentry, inode); 6674 d_instantiate(dentry, inode);
6639 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent, 6675 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6640 true, NULL); 6676 true, NULL);
@@ -6864,6 +6900,14 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6864 extent_start = found_key.offset; 6900 extent_start = found_key.offset;
6865 if (extent_type == BTRFS_FILE_EXTENT_REG || 6901 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6866 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 6902 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6903 /* Only regular file could have regular/prealloc extent */
6904 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6905 ret = -EUCLEAN;
6906 btrfs_crit(fs_info,
6907 "regular/prealloc extent found for non-regular inode %llu",
6908 btrfs_ino(inode));
6909 goto out;
6910 }
6867 extent_end = extent_start + 6911 extent_end = extent_start +
6868 btrfs_file_extent_num_bytes(leaf, item); 6912 btrfs_file_extent_num_bytes(leaf, item);
6869 6913
@@ -9163,7 +9207,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
9163 ei->index_cnt = (u64)-1; 9207 ei->index_cnt = (u64)-1;
9164 ei->dir_index = 0; 9208 ei->dir_index = 0;
9165 ei->last_unlink_trans = 0; 9209 ei->last_unlink_trans = 0;
9166 ei->last_link_trans = 0;
9167 ei->last_log_commit = 0; 9210 ei->last_log_commit = 0;
9168 9211
9169 spin_lock_init(&ei->lock); 9212 spin_lock_init(&ei->lock);
@@ -9182,10 +9225,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
9182 9225
9183 inode = &ei->vfs_inode; 9226 inode = &ei->vfs_inode;
9184 extent_map_tree_init(&ei->extent_tree); 9227 extent_map_tree_init(&ei->extent_tree);
9185 extent_io_tree_init(&ei->io_tree, inode); 9228 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
9186 extent_io_tree_init(&ei->io_failure_tree, inode); 9229 extent_io_tree_init(fs_info, &ei->io_failure_tree,
9187 ei->io_tree.track_uptodate = 1; 9230 IO_TREE_INODE_IO_FAILURE, inode);
9188 ei->io_failure_tree.track_uptodate = 1; 9231 ei->io_tree.track_uptodate = true;
9232 ei->io_failure_tree.track_uptodate = true;
9189 atomic_set(&ei->sync_writers, 0); 9233 atomic_set(&ei->sync_writers, 0);
9190 mutex_init(&ei->log_mutex); 9234 mutex_init(&ei->log_mutex);
9191 mutex_init(&ei->delalloc_mutex); 9235 mutex_init(&ei->delalloc_mutex);
@@ -9427,7 +9471,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9427 /* Reference for the source. */ 9471 /* Reference for the source. */
9428 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { 9472 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9429 /* force full log commit if subvolume involved. */ 9473 /* force full log commit if subvolume involved. */
9430 btrfs_set_log_full_commit(fs_info, trans); 9474 btrfs_set_log_full_commit(trans);
9431 } else { 9475 } else {
9432 btrfs_pin_log_trans(root); 9476 btrfs_pin_log_trans(root);
9433 root_log_pinned = true; 9477 root_log_pinned = true;
@@ -9444,7 +9488,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9444 /* And now for the dest. */ 9488 /* And now for the dest. */
9445 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { 9489 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9446 /* force full log commit if subvolume involved. */ 9490 /* force full log commit if subvolume involved. */
9447 btrfs_set_log_full_commit(fs_info, trans); 9491 btrfs_set_log_full_commit(trans);
9448 } else { 9492 } else {
9449 btrfs_pin_log_trans(dest); 9493 btrfs_pin_log_trans(dest);
9450 dest_log_pinned = true; 9494 dest_log_pinned = true;
@@ -9580,7 +9624,7 @@ out_fail:
9580 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || 9624 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9581 (new_inode && 9625 (new_inode &&
9582 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) 9626 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9583 btrfs_set_log_full_commit(fs_info, trans); 9627 btrfs_set_log_full_commit(trans);
9584 9628
9585 if (root_log_pinned) { 9629 if (root_log_pinned) {
9586 btrfs_end_log_trans(root); 9630 btrfs_end_log_trans(root);
@@ -9766,7 +9810,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9766 BTRFS_I(old_inode)->dir_index = 0ULL; 9810 BTRFS_I(old_inode)->dir_index = 0ULL;
9767 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 9811 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9768 /* force full log commit if subvolume involved. */ 9812 /* force full log commit if subvolume involved. */
9769 btrfs_set_log_full_commit(fs_info, trans); 9813 btrfs_set_log_full_commit(trans);
9770 } else { 9814 } else {
9771 btrfs_pin_log_trans(root); 9815 btrfs_pin_log_trans(root);
9772 log_pinned = true; 9816 log_pinned = true;
@@ -9887,7 +9931,7 @@ out_fail:
9887 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) || 9931 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9888 (new_inode && 9932 (new_inode &&
9889 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))) 9933 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9890 btrfs_set_log_full_commit(fs_info, trans); 9934 btrfs_set_log_full_commit(trans);
9891 9935
9892 btrfs_end_log_trans(root); 9936 btrfs_end_log_trans(root);
9893 log_pinned = false; 9937 log_pinned = false;
@@ -10190,7 +10234,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
10190 10234
10191 inode->i_op = &btrfs_symlink_inode_operations; 10235 inode->i_op = &btrfs_symlink_inode_operations;
10192 inode_nohighmem(inode); 10236 inode_nohighmem(inode);
10193 inode->i_mapping->a_ops = &btrfs_aops;
10194 inode_set_bytes(inode, name_len); 10237 inode_set_bytes(inode, name_len);
10195 btrfs_i_size_write(BTRFS_I(inode), name_len); 10238 btrfs_i_size_write(BTRFS_I(inode), name_len);
10196 err = btrfs_update_inode(trans, root, inode); 10239 err = btrfs_update_inode(trans, root, inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cd4e693406a0..6dafa857bbb9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -187,11 +187,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
187 struct btrfs_inode *binode = BTRFS_I(inode); 187 struct btrfs_inode *binode = BTRFS_I(inode);
188 struct btrfs_root *root = binode->root; 188 struct btrfs_root *root = binode->root;
189 struct btrfs_trans_handle *trans; 189 struct btrfs_trans_handle *trans;
190 unsigned int fsflags, old_fsflags; 190 unsigned int fsflags;
191 int ret; 191 int ret;
192 u64 old_flags; 192 const char *comp = NULL;
193 unsigned int old_i_flags; 193 u32 binode_flags = binode->flags;
194 umode_t mode;
195 194
196 if (!inode_owner_or_capable(inode)) 195 if (!inode_owner_or_capable(inode))
197 return -EPERM; 196 return -EPERM;
@@ -212,13 +211,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
212 211
213 inode_lock(inode); 212 inode_lock(inode);
214 213
215 old_flags = binode->flags;
216 old_i_flags = inode->i_flags;
217 mode = inode->i_mode;
218
219 fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); 214 fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
220 old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); 215 if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
221 if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 216 (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
222 if (!capable(CAP_LINUX_IMMUTABLE)) { 217 if (!capable(CAP_LINUX_IMMUTABLE)) {
223 ret = -EPERM; 218 ret = -EPERM;
224 goto out_unlock; 219 goto out_unlock;
@@ -226,52 +221,52 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
226 } 221 }
227 222
228 if (fsflags & FS_SYNC_FL) 223 if (fsflags & FS_SYNC_FL)
229 binode->flags |= BTRFS_INODE_SYNC; 224 binode_flags |= BTRFS_INODE_SYNC;
230 else 225 else
231 binode->flags &= ~BTRFS_INODE_SYNC; 226 binode_flags &= ~BTRFS_INODE_SYNC;
232 if (fsflags & FS_IMMUTABLE_FL) 227 if (fsflags & FS_IMMUTABLE_FL)
233 binode->flags |= BTRFS_INODE_IMMUTABLE; 228 binode_flags |= BTRFS_INODE_IMMUTABLE;
234 else 229 else
235 binode->flags &= ~BTRFS_INODE_IMMUTABLE; 230 binode_flags &= ~BTRFS_INODE_IMMUTABLE;
236 if (fsflags & FS_APPEND_FL) 231 if (fsflags & FS_APPEND_FL)
237 binode->flags |= BTRFS_INODE_APPEND; 232 binode_flags |= BTRFS_INODE_APPEND;
238 else 233 else
239 binode->flags &= ~BTRFS_INODE_APPEND; 234 binode_flags &= ~BTRFS_INODE_APPEND;
240 if (fsflags & FS_NODUMP_FL) 235 if (fsflags & FS_NODUMP_FL)
241 binode->flags |= BTRFS_INODE_NODUMP; 236 binode_flags |= BTRFS_INODE_NODUMP;
242 else 237 else
243 binode->flags &= ~BTRFS_INODE_NODUMP; 238 binode_flags &= ~BTRFS_INODE_NODUMP;
244 if (fsflags & FS_NOATIME_FL) 239 if (fsflags & FS_NOATIME_FL)
245 binode->flags |= BTRFS_INODE_NOATIME; 240 binode_flags |= BTRFS_INODE_NOATIME;
246 else 241 else
247 binode->flags &= ~BTRFS_INODE_NOATIME; 242 binode_flags &= ~BTRFS_INODE_NOATIME;
248 if (fsflags & FS_DIRSYNC_FL) 243 if (fsflags & FS_DIRSYNC_FL)
249 binode->flags |= BTRFS_INODE_DIRSYNC; 244 binode_flags |= BTRFS_INODE_DIRSYNC;
250 else 245 else
251 binode->flags &= ~BTRFS_INODE_DIRSYNC; 246 binode_flags &= ~BTRFS_INODE_DIRSYNC;
252 if (fsflags & FS_NOCOW_FL) { 247 if (fsflags & FS_NOCOW_FL) {
253 if (S_ISREG(mode)) { 248 if (S_ISREG(inode->i_mode)) {
254 /* 249 /*
255 * It's safe to turn csums off here, no extents exist. 250 * It's safe to turn csums off here, no extents exist.
256 * Otherwise we want the flag to reflect the real COW 251 * Otherwise we want the flag to reflect the real COW
257 * status of the file and will not set it. 252 * status of the file and will not set it.
258 */ 253 */
259 if (inode->i_size == 0) 254 if (inode->i_size == 0)
260 binode->flags |= BTRFS_INODE_NODATACOW 255 binode_flags |= BTRFS_INODE_NODATACOW |
261 | BTRFS_INODE_NODATASUM; 256 BTRFS_INODE_NODATASUM;
262 } else { 257 } else {
263 binode->flags |= BTRFS_INODE_NODATACOW; 258 binode_flags |= BTRFS_INODE_NODATACOW;
264 } 259 }
265 } else { 260 } else {
266 /* 261 /*
267 * Revert back under same assumptions as above 262 * Revert back under same assumptions as above
268 */ 263 */
269 if (S_ISREG(mode)) { 264 if (S_ISREG(inode->i_mode)) {
270 if (inode->i_size == 0) 265 if (inode->i_size == 0)
271 binode->flags &= ~(BTRFS_INODE_NODATACOW 266 binode_flags &= ~(BTRFS_INODE_NODATACOW |
272 | BTRFS_INODE_NODATASUM); 267 BTRFS_INODE_NODATASUM);
273 } else { 268 } else {
274 binode->flags &= ~BTRFS_INODE_NODATACOW; 269 binode_flags &= ~BTRFS_INODE_NODATACOW;
275 } 270 }
276 } 271 }
277 272
@@ -281,57 +276,61 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
281 * things smaller. 276 * things smaller.
282 */ 277 */
283 if (fsflags & FS_NOCOMP_FL) { 278 if (fsflags & FS_NOCOMP_FL) {
284 binode->flags &= ~BTRFS_INODE_COMPRESS; 279 binode_flags &= ~BTRFS_INODE_COMPRESS;
285 binode->flags |= BTRFS_INODE_NOCOMPRESS; 280 binode_flags |= BTRFS_INODE_NOCOMPRESS;
286
287 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
288 if (ret && ret != -ENODATA)
289 goto out_drop;
290 } else if (fsflags & FS_COMPR_FL) { 281 } else if (fsflags & FS_COMPR_FL) {
291 const char *comp;
292 282
293 if (IS_SWAPFILE(inode)) { 283 if (IS_SWAPFILE(inode)) {
294 ret = -ETXTBSY; 284 ret = -ETXTBSY;
295 goto out_unlock; 285 goto out_unlock;
296 } 286 }
297 287
298 binode->flags |= BTRFS_INODE_COMPRESS; 288 binode_flags |= BTRFS_INODE_COMPRESS;
299 binode->flags &= ~BTRFS_INODE_NOCOMPRESS; 289 binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
300 290
301 comp = btrfs_compress_type2str(fs_info->compress_type); 291 comp = btrfs_compress_type2str(fs_info->compress_type);
302 if (!comp || comp[0] == 0) 292 if (!comp || comp[0] == 0)
303 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); 293 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
304
305 ret = btrfs_set_prop(inode, "btrfs.compression",
306 comp, strlen(comp), 0);
307 if (ret)
308 goto out_drop;
309
310 } else { 294 } else {
311 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 295 binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
312 if (ret && ret != -ENODATA)
313 goto out_drop;
314 binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
315 } 296 }
316 297
317 trans = btrfs_start_transaction(root, 1); 298 /*
299 * 1 for inode item
300 * 2 for properties
301 */
302 trans = btrfs_start_transaction(root, 3);
318 if (IS_ERR(trans)) { 303 if (IS_ERR(trans)) {
319 ret = PTR_ERR(trans); 304 ret = PTR_ERR(trans);
320 goto out_drop; 305 goto out_unlock;
306 }
307
308 if (comp) {
309 ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
310 strlen(comp), 0);
311 if (ret) {
312 btrfs_abort_transaction(trans, ret);
313 goto out_end_trans;
314 }
315 set_bit(BTRFS_INODE_COPY_EVERYTHING,
316 &BTRFS_I(inode)->runtime_flags);
317 } else {
318 ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
319 0, 0);
320 if (ret && ret != -ENODATA) {
321 btrfs_abort_transaction(trans, ret);
322 goto out_end_trans;
323 }
321 } 324 }
322 325
326 binode->flags = binode_flags;
323 btrfs_sync_inode_flags_to_i_flags(inode); 327 btrfs_sync_inode_flags_to_i_flags(inode);
324 inode_inc_iversion(inode); 328 inode_inc_iversion(inode);
325 inode->i_ctime = current_time(inode); 329 inode->i_ctime = current_time(inode);
326 ret = btrfs_update_inode(trans, root, inode); 330 ret = btrfs_update_inode(trans, root, inode);
327 331
332 out_end_trans:
328 btrfs_end_transaction(trans); 333 btrfs_end_transaction(trans);
329 out_drop:
330 if (ret) {
331 binode->flags = old_flags;
332 inode->i_flags = old_i_flags;
333 }
334
335 out_unlock: 334 out_unlock:
336 inode_unlock(inode); 335 inode_unlock(inode);
337 mnt_drop_write_file(file); 336 mnt_drop_write_file(file);
@@ -3260,6 +3259,19 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3260{ 3259{
3261 int ret; 3260 int ret;
3262 u64 i, tail_len, chunk_count; 3261 u64 i, tail_len, chunk_count;
3262 struct btrfs_root *root_dst = BTRFS_I(dst)->root;
3263
3264 spin_lock(&root_dst->root_item_lock);
3265 if (root_dst->send_in_progress) {
3266 btrfs_warn_rl(root_dst->fs_info,
3267"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
3268 root_dst->root_key.objectid,
3269 root_dst->send_in_progress);
3270 spin_unlock(&root_dst->root_item_lock);
3271 return -EAGAIN;
3272 }
3273 root_dst->dedupe_in_progress++;
3274 spin_unlock(&root_dst->root_item_lock);
3263 3275
3264 tail_len = olen % BTRFS_MAX_DEDUPE_LEN; 3276 tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
3265 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); 3277 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
@@ -3268,7 +3280,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3268 ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, 3280 ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
3269 dst, dst_loff); 3281 dst, dst_loff);
3270 if (ret) 3282 if (ret)
3271 return ret; 3283 goto out;
3272 3284
3273 loff += BTRFS_MAX_DEDUPE_LEN; 3285 loff += BTRFS_MAX_DEDUPE_LEN;
3274 dst_loff += BTRFS_MAX_DEDUPE_LEN; 3286 dst_loff += BTRFS_MAX_DEDUPE_LEN;
@@ -3277,6 +3289,10 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3277 if (tail_len > 0) 3289 if (tail_len > 0)
3278 ret = btrfs_extent_same_range(src, loff, tail_len, dst, 3290 ret = btrfs_extent_same_range(src, loff, tail_len, dst,
3279 dst_loff); 3291 dst_loff);
3292out:
3293 spin_lock(&root_dst->root_item_lock);
3294 root_dst->dedupe_in_progress--;
3295 spin_unlock(&root_dst->root_item_lock);
3280 3296
3281 return ret; 3297 return ret;
3282} 3298}
@@ -3735,13 +3751,16 @@ process_slot:
3735 datal); 3751 datal);
3736 3752
3737 if (disko) { 3753 if (disko) {
3754 struct btrfs_ref ref = { 0 };
3738 inode_add_bytes(inode, datal); 3755 inode_add_bytes(inode, datal);
3739 ret = btrfs_inc_extent_ref(trans, 3756 btrfs_init_generic_ref(&ref,
3740 root, 3757 BTRFS_ADD_DELAYED_REF, disko,
3741 disko, diskl, 0, 3758 diskl, 0);
3742 root->root_key.objectid, 3759 btrfs_init_data_ref(&ref,
3743 btrfs_ino(BTRFS_I(inode)), 3760 root->root_key.objectid,
3744 new_key.offset - datao); 3761 btrfs_ino(BTRFS_I(inode)),
3762 new_key.offset - datao);
3763 ret = btrfs_inc_extent_ref(trans, &ref);
3745 if (ret) { 3764 if (ret) {
3746 btrfs_abort_transaction(trans, 3765 btrfs_abort_transaction(trans,
3747 ret); 3766 ret);
@@ -3948,16 +3967,10 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
3948 return -EXDEV; 3967 return -EXDEV;
3949 } 3968 }
3950 3969
3951 if (same_inode)
3952 inode_lock(inode_in);
3953 else
3954 lock_two_nondirectories(inode_in, inode_out);
3955
3956 /* don't make the dst file partly checksummed */ 3970 /* don't make the dst file partly checksummed */
3957 if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != 3971 if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
3958 (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) { 3972 (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
3959 ret = -EINVAL; 3973 return -EINVAL;
3960 goto out_unlock;
3961 } 3974 }
3962 3975
3963 /* 3976 /*
@@ -3991,26 +4004,14 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
3991 ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), 4004 ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
3992 wb_len); 4005 wb_len);
3993 if (ret < 0) 4006 if (ret < 0)
3994 goto out_unlock; 4007 return ret;
3995 ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), 4008 ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
3996 wb_len); 4009 wb_len);
3997 if (ret < 0) 4010 if (ret < 0)
3998 goto out_unlock; 4011 return ret;
3999 4012
4000 ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 4013 return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
4001 len, remap_flags); 4014 len, remap_flags);
4002 if (ret < 0 || *len == 0)
4003 goto out_unlock;
4004
4005 return 0;
4006
4007 out_unlock:
4008 if (same_inode)
4009 inode_unlock(inode_in);
4010 else
4011 unlock_two_nondirectories(inode_in, inode_out);
4012
4013 return ret;
4014} 4015}
4015 4016
4016loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, 4017loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
@@ -4025,16 +4026,22 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
4025 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 4026 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
4026 return -EINVAL; 4027 return -EINVAL;
4027 4028
4029 if (same_inode)
4030 inode_lock(src_inode);
4031 else
4032 lock_two_nondirectories(src_inode, dst_inode);
4033
4028 ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, 4034 ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff,
4029 &len, remap_flags); 4035 &len, remap_flags);
4030 if (ret < 0 || len == 0) 4036 if (ret < 0 || len == 0)
4031 return ret; 4037 goto out_unlock;
4032 4038
4033 if (remap_flags & REMAP_FILE_DEDUP) 4039 if (remap_flags & REMAP_FILE_DEDUP)
4034 ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); 4040 ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff);
4035 else 4041 else
4036 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 4042 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
4037 4043
4044out_unlock:
4038 if (same_inode) 4045 if (same_inode)
4039 inode_unlock(src_inode); 4046 inode_unlock(src_inode);
4040 else 4047 else
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 82b84e4daad1..2f6c3c7851ed 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -12,10 +12,82 @@
12#include "extent_io.h" 12#include "extent_io.h"
13#include "locking.h" 13#include "locking.h"
14 14
15static void btrfs_assert_tree_read_locked(struct extent_buffer *eb); 15#ifdef CONFIG_BTRFS_DEBUG
16static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
17{
18 WARN_ON(atomic_read(&eb->spinning_writers));
19 atomic_inc(&eb->spinning_writers);
20}
21
22static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
23{
24 WARN_ON(atomic_read(&eb->spinning_writers) != 1);
25 atomic_dec(&eb->spinning_writers);
26}
27
28static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
29{
30 WARN_ON(atomic_read(&eb->spinning_writers));
31}
32
33static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
34{
35 atomic_inc(&eb->spinning_readers);
36}
37
38static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
39{
40 WARN_ON(atomic_read(&eb->spinning_readers) == 0);
41 atomic_dec(&eb->spinning_readers);
42}
43
44static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
45{
46 atomic_inc(&eb->read_locks);
47}
48
49static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
50{
51 atomic_dec(&eb->read_locks);
52}
53
54static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
55{
56 BUG_ON(!atomic_read(&eb->read_locks));
57}
58
59static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
60{
61 atomic_inc(&eb->write_locks);
62}
63
64static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
65{
66 atomic_dec(&eb->write_locks);
67}
68
69void btrfs_assert_tree_locked(struct extent_buffer *eb)
70{
71 BUG_ON(!atomic_read(&eb->write_locks));
72}
73
74#else
75static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
76static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
77static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
78static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
79static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
80static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
81static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
82static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
83void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
84static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
85static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
86#endif
16 87
17void btrfs_set_lock_blocking_read(struct extent_buffer *eb) 88void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
18{ 89{
90 trace_btrfs_set_lock_blocking_read(eb);
19 /* 91 /*
20 * No lock is required. The lock owner may change if we have a read 92 * No lock is required. The lock owner may change if we have a read
21 * lock, but it won't change to or away from us. If we have the write 93 * lock, but it won't change to or away from us. If we have the write
@@ -25,13 +97,13 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
25 return; 97 return;
26 btrfs_assert_tree_read_locked(eb); 98 btrfs_assert_tree_read_locked(eb);
27 atomic_inc(&eb->blocking_readers); 99 atomic_inc(&eb->blocking_readers);
28 WARN_ON(atomic_read(&eb->spinning_readers) == 0); 100 btrfs_assert_spinning_readers_put(eb);
29 atomic_dec(&eb->spinning_readers);
30 read_unlock(&eb->lock); 101 read_unlock(&eb->lock);
31} 102}
32 103
33void btrfs_set_lock_blocking_write(struct extent_buffer *eb) 104void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
34{ 105{
106 trace_btrfs_set_lock_blocking_write(eb);
35 /* 107 /*
36 * No lock is required. The lock owner may change if we have a read 108 * No lock is required. The lock owner may change if we have a read
37 * lock, but it won't change to or away from us. If we have the write 109 * lock, but it won't change to or away from us. If we have the write
@@ -40,8 +112,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
40 if (eb->lock_nested && current->pid == eb->lock_owner) 112 if (eb->lock_nested && current->pid == eb->lock_owner)
41 return; 113 return;
42 if (atomic_read(&eb->blocking_writers) == 0) { 114 if (atomic_read(&eb->blocking_writers) == 0) {
43 WARN_ON(atomic_read(&eb->spinning_writers) != 1); 115 btrfs_assert_spinning_writers_put(eb);
44 atomic_dec(&eb->spinning_writers);
45 btrfs_assert_tree_locked(eb); 116 btrfs_assert_tree_locked(eb);
46 atomic_inc(&eb->blocking_writers); 117 atomic_inc(&eb->blocking_writers);
47 write_unlock(&eb->lock); 118 write_unlock(&eb->lock);
@@ -50,6 +121,7 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
50 121
51void btrfs_clear_lock_blocking_read(struct extent_buffer *eb) 122void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
52{ 123{
124 trace_btrfs_clear_lock_blocking_read(eb);
53 /* 125 /*
54 * No lock is required. The lock owner may change if we have a read 126 * No lock is required. The lock owner may change if we have a read
55 * lock, but it won't change to or away from us. If we have the write 127 * lock, but it won't change to or away from us. If we have the write
@@ -59,7 +131,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
59 return; 131 return;
60 BUG_ON(atomic_read(&eb->blocking_readers) == 0); 132 BUG_ON(atomic_read(&eb->blocking_readers) == 0);
61 read_lock(&eb->lock); 133 read_lock(&eb->lock);
62 atomic_inc(&eb->spinning_readers); 134 btrfs_assert_spinning_readers_get(eb);
63 /* atomic_dec_and_test implies a barrier */ 135 /* atomic_dec_and_test implies a barrier */
64 if (atomic_dec_and_test(&eb->blocking_readers)) 136 if (atomic_dec_and_test(&eb->blocking_readers))
65 cond_wake_up_nomb(&eb->read_lock_wq); 137 cond_wake_up_nomb(&eb->read_lock_wq);
@@ -67,6 +139,7 @@ void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
67 139
68void btrfs_clear_lock_blocking_write(struct extent_buffer *eb) 140void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
69{ 141{
142 trace_btrfs_clear_lock_blocking_write(eb);
70 /* 143 /*
71 * no lock is required. The lock owner may change if 144 * no lock is required. The lock owner may change if
72 * we have a read lock, but it won't change to or away 145 * we have a read lock, but it won't change to or away
@@ -77,8 +150,7 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
77 return; 150 return;
78 BUG_ON(atomic_read(&eb->blocking_writers) != 1); 151 BUG_ON(atomic_read(&eb->blocking_writers) != 1);
79 write_lock(&eb->lock); 152 write_lock(&eb->lock);
80 WARN_ON(atomic_read(&eb->spinning_writers)); 153 btrfs_assert_spinning_writers_get(eb);
81 atomic_inc(&eb->spinning_writers);
82 /* atomic_dec_and_test implies a barrier */ 154 /* atomic_dec_and_test implies a barrier */
83 if (atomic_dec_and_test(&eb->blocking_writers)) 155 if (atomic_dec_and_test(&eb->blocking_writers))
84 cond_wake_up_nomb(&eb->write_lock_wq); 156 cond_wake_up_nomb(&eb->write_lock_wq);
@@ -90,6 +162,10 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
90 */ 162 */
91void btrfs_tree_read_lock(struct extent_buffer *eb) 163void btrfs_tree_read_lock(struct extent_buffer *eb)
92{ 164{
165 u64 start_ns = 0;
166
167 if (trace_btrfs_tree_read_lock_enabled())
168 start_ns = ktime_get_ns();
93again: 169again:
94 BUG_ON(!atomic_read(&eb->blocking_writers) && 170 BUG_ON(!atomic_read(&eb->blocking_writers) &&
95 current->pid == eb->lock_owner); 171 current->pid == eb->lock_owner);
@@ -104,8 +180,9 @@ again:
104 * called on a partly (write-)locked tree. 180 * called on a partly (write-)locked tree.
105 */ 181 */
106 BUG_ON(eb->lock_nested); 182 BUG_ON(eb->lock_nested);
107 eb->lock_nested = 1; 183 eb->lock_nested = true;
108 read_unlock(&eb->lock); 184 read_unlock(&eb->lock);
185 trace_btrfs_tree_read_lock(eb, start_ns);
109 return; 186 return;
110 } 187 }
111 if (atomic_read(&eb->blocking_writers)) { 188 if (atomic_read(&eb->blocking_writers)) {
@@ -114,8 +191,9 @@ again:
114 atomic_read(&eb->blocking_writers) == 0); 191 atomic_read(&eb->blocking_writers) == 0);
115 goto again; 192 goto again;
116 } 193 }
117 atomic_inc(&eb->read_locks); 194 btrfs_assert_tree_read_locks_get(eb);
118 atomic_inc(&eb->spinning_readers); 195 btrfs_assert_spinning_readers_get(eb);
196 trace_btrfs_tree_read_lock(eb, start_ns);
119} 197}
120 198
121/* 199/*
@@ -133,8 +211,9 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
133 read_unlock(&eb->lock); 211 read_unlock(&eb->lock);
134 return 0; 212 return 0;
135 } 213 }
136 atomic_inc(&eb->read_locks); 214 btrfs_assert_tree_read_locks_get(eb);
137 atomic_inc(&eb->spinning_readers); 215 btrfs_assert_spinning_readers_get(eb);
216 trace_btrfs_tree_read_lock_atomic(eb);
138 return 1; 217 return 1;
139} 218}
140 219
@@ -154,8 +233,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
154 read_unlock(&eb->lock); 233 read_unlock(&eb->lock);
155 return 0; 234 return 0;
156 } 235 }
157 atomic_inc(&eb->read_locks); 236 btrfs_assert_tree_read_locks_get(eb);
158 atomic_inc(&eb->spinning_readers); 237 btrfs_assert_spinning_readers_get(eb);
238 trace_btrfs_try_tree_read_lock(eb);
159 return 1; 239 return 1;
160} 240}
161 241
@@ -175,9 +255,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
175 write_unlock(&eb->lock); 255 write_unlock(&eb->lock);
176 return 0; 256 return 0;
177 } 257 }
178 atomic_inc(&eb->write_locks); 258 btrfs_assert_tree_write_locks_get(eb);
179 atomic_inc(&eb->spinning_writers); 259 btrfs_assert_spinning_writers_get(eb);
180 eb->lock_owner = current->pid; 260 eb->lock_owner = current->pid;
261 trace_btrfs_try_tree_write_lock(eb);
181 return 1; 262 return 1;
182} 263}
183 264
@@ -186,6 +267,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
186 */ 267 */
187void btrfs_tree_read_unlock(struct extent_buffer *eb) 268void btrfs_tree_read_unlock(struct extent_buffer *eb)
188{ 269{
270 trace_btrfs_tree_read_unlock(eb);
189 /* 271 /*
190 * if we're nested, we have the write lock. No new locking 272 * if we're nested, we have the write lock. No new locking
191 * is needed as long as we are the lock owner. 273 * is needed as long as we are the lock owner.
@@ -193,13 +275,12 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
193 * field only matters to the lock owner. 275 * field only matters to the lock owner.
194 */ 276 */
195 if (eb->lock_nested && current->pid == eb->lock_owner) { 277 if (eb->lock_nested && current->pid == eb->lock_owner) {
196 eb->lock_nested = 0; 278 eb->lock_nested = false;
197 return; 279 return;
198 } 280 }
199 btrfs_assert_tree_read_locked(eb); 281 btrfs_assert_tree_read_locked(eb);
200 WARN_ON(atomic_read(&eb->spinning_readers) == 0); 282 btrfs_assert_spinning_readers_put(eb);
201 atomic_dec(&eb->spinning_readers); 283 btrfs_assert_tree_read_locks_put(eb);
202 atomic_dec(&eb->read_locks);
203 read_unlock(&eb->lock); 284 read_unlock(&eb->lock);
204} 285}
205 286
@@ -208,6 +289,7 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
208 */ 289 */
209void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) 290void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
210{ 291{
292 trace_btrfs_tree_read_unlock_blocking(eb);
211 /* 293 /*
212 * if we're nested, we have the write lock. No new locking 294 * if we're nested, we have the write lock. No new locking
213 * is needed as long as we are the lock owner. 295 * is needed as long as we are the lock owner.
@@ -215,7 +297,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
215 * field only matters to the lock owner. 297 * field only matters to the lock owner.
216 */ 298 */
217 if (eb->lock_nested && current->pid == eb->lock_owner) { 299 if (eb->lock_nested && current->pid == eb->lock_owner) {
218 eb->lock_nested = 0; 300 eb->lock_nested = false;
219 return; 301 return;
220 } 302 }
221 btrfs_assert_tree_read_locked(eb); 303 btrfs_assert_tree_read_locked(eb);
@@ -223,7 +305,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
223 /* atomic_dec_and_test implies a barrier */ 305 /* atomic_dec_and_test implies a barrier */
224 if (atomic_dec_and_test(&eb->blocking_readers)) 306 if (atomic_dec_and_test(&eb->blocking_readers))
225 cond_wake_up_nomb(&eb->read_lock_wq); 307 cond_wake_up_nomb(&eb->read_lock_wq);
226 atomic_dec(&eb->read_locks); 308 btrfs_assert_tree_read_locks_put(eb);
227} 309}
228 310
229/* 311/*
@@ -232,6 +314,11 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
232 */ 314 */
233void btrfs_tree_lock(struct extent_buffer *eb) 315void btrfs_tree_lock(struct extent_buffer *eb)
234{ 316{
317 u64 start_ns = 0;
318
319 if (trace_btrfs_tree_lock_enabled())
320 start_ns = ktime_get_ns();
321
235 WARN_ON(eb->lock_owner == current->pid); 322 WARN_ON(eb->lock_owner == current->pid);
236again: 323again:
237 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); 324 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -242,10 +329,10 @@ again:
242 write_unlock(&eb->lock); 329 write_unlock(&eb->lock);
243 goto again; 330 goto again;
244 } 331 }
245 WARN_ON(atomic_read(&eb->spinning_writers)); 332 btrfs_assert_spinning_writers_get(eb);
246 atomic_inc(&eb->spinning_writers); 333 btrfs_assert_tree_write_locks_get(eb);
247 atomic_inc(&eb->write_locks);
248 eb->lock_owner = current->pid; 334 eb->lock_owner = current->pid;
335 trace_btrfs_tree_lock(eb, start_ns);
249} 336}
250 337
251/* 338/*
@@ -258,28 +345,18 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
258 BUG_ON(blockers > 1); 345 BUG_ON(blockers > 1);
259 346
260 btrfs_assert_tree_locked(eb); 347 btrfs_assert_tree_locked(eb);
348 trace_btrfs_tree_unlock(eb);
261 eb->lock_owner = 0; 349 eb->lock_owner = 0;
262 atomic_dec(&eb->write_locks); 350 btrfs_assert_tree_write_locks_put(eb);
263 351
264 if (blockers) { 352 if (blockers) {
265 WARN_ON(atomic_read(&eb->spinning_writers)); 353 btrfs_assert_no_spinning_writers(eb);
266 atomic_dec(&eb->blocking_writers); 354 atomic_dec(&eb->blocking_writers);
267 /* Use the lighter barrier after atomic */ 355 /* Use the lighter barrier after atomic */
268 smp_mb__after_atomic(); 356 smp_mb__after_atomic();
269 cond_wake_up_nomb(&eb->write_lock_wq); 357 cond_wake_up_nomb(&eb->write_lock_wq);
270 } else { 358 } else {
271 WARN_ON(atomic_read(&eb->spinning_writers) != 1); 359 btrfs_assert_spinning_writers_put(eb);
272 atomic_dec(&eb->spinning_writers);
273 write_unlock(&eb->lock); 360 write_unlock(&eb->lock);
274 } 361 }
275} 362}
276
277void btrfs_assert_tree_locked(struct extent_buffer *eb)
278{
279 BUG_ON(!atomic_read(&eb->write_locks));
280}
281
282static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
283{
284 BUG_ON(!atomic_read(&eb->read_locks));
285}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 45e3cfd1198b..52889da69113 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -195,8 +195,11 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
195 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 195 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
196 set_bit(type, &entry->flags); 196 set_bit(type, &entry->flags);
197 197
198 if (dio) 198 if (dio) {
199 percpu_counter_add_batch(&fs_info->dio_bytes, len,
200 fs_info->delalloc_batch);
199 set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); 201 set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
202 }
200 203
201 /* one ref for the tree */ 204 /* one ref for the tree */
202 refcount_set(&entry->refs, 1); 205 refcount_set(&entry->refs, 1);
@@ -271,13 +274,12 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
271 * when an ordered extent is finished. If the list covers more than one 274 * when an ordered extent is finished. If the list covers more than one
272 * ordered extent, it is split across multiples. 275 * ordered extent, it is split across multiples.
273 */ 276 */
274void btrfs_add_ordered_sum(struct inode *inode, 277void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
275 struct btrfs_ordered_extent *entry,
276 struct btrfs_ordered_sum *sum) 278 struct btrfs_ordered_sum *sum)
277{ 279{
278 struct btrfs_ordered_inode_tree *tree; 280 struct btrfs_ordered_inode_tree *tree;
279 281
280 tree = &BTRFS_I(inode)->ordered_tree; 282 tree = &BTRFS_I(entry->inode)->ordered_tree;
281 spin_lock_irq(&tree->lock); 283 spin_lock_irq(&tree->lock);
282 list_add_tail(&sum->list, &entry->list); 284 list_add_tail(&sum->list, &entry->list);
283 spin_unlock_irq(&tree->lock); 285 spin_unlock_irq(&tree->lock);
@@ -469,6 +471,10 @@ void btrfs_remove_ordered_extent(struct inode *inode,
469 if (root != fs_info->tree_root) 471 if (root != fs_info->tree_root)
470 btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false); 472 btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
471 473
474 if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
475 percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len,
476 fs_info->delalloc_batch);
477
472 tree = &btrfs_inode->ordered_tree; 478 tree = &btrfs_inode->ordered_tree;
473 spin_lock_irq(&tree->lock); 479 spin_lock_irq(&tree->lock);
474 node = &entry->rb_node; 480 node = &entry->rb_node;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index fb9a161f0215..4c5991c3de14 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -167,8 +167,7 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
167int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 167int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
168 u64 start, u64 len, u64 disk_len, 168 u64 start, u64 len, u64 disk_len,
169 int type, int compress_type); 169 int type, int compress_type);
170void btrfs_add_ordered_sum(struct inode *inode, 170void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
171 struct btrfs_ordered_extent *entry,
172 struct btrfs_ordered_sum *sum); 171 struct btrfs_ordered_sum *sum);
173struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, 172struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
174 u64 file_offset); 173 u64 file_offset);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index df49931ffe92..1141ca5fae6a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -189,7 +189,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
189 btrfs_info(fs_info, 189 btrfs_info(fs_info,
190 "leaf %llu gen %llu total ptrs %d free space %d owner %llu", 190 "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
191 btrfs_header_bytenr(l), btrfs_header_generation(l), nr, 191 btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
192 btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); 192 btrfs_leaf_free_space(l), btrfs_header_owner(l));
193 print_eb_refs_lock(l); 193 print_eb_refs_lock(l);
194 for (i = 0 ; i < nr ; i++) { 194 for (i = 0 ; i < nr ; i++) {
195 item = btrfs_item_nr(i); 195 item = btrfs_item_nr(i);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 61d22a56c0ba..ca2716917e37 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -23,36 +23,6 @@ struct prop_handler {
23 int inheritable; 23 int inheritable;
24}; 24};
25 25
26static int prop_compression_validate(const char *value, size_t len);
27static int prop_compression_apply(struct inode *inode,
28 const char *value,
29 size_t len);
30static const char *prop_compression_extract(struct inode *inode);
31
32static struct prop_handler prop_handlers[] = {
33 {
34 .xattr_name = XATTR_BTRFS_PREFIX "compression",
35 .validate = prop_compression_validate,
36 .apply = prop_compression_apply,
37 .extract = prop_compression_extract,
38 .inheritable = 1
39 },
40};
41
42void __init btrfs_props_init(void)
43{
44 int i;
45
46 hash_init(prop_handlers_ht);
47
48 for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
49 struct prop_handler *p = &prop_handlers[i];
50 u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
51
52 hash_add(prop_handlers_ht, &p->node, h);
53 }
54}
55
56static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash) 26static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
57{ 27{
58 struct hlist_head *h; 28 struct hlist_head *h;
@@ -85,15 +55,9 @@ find_prop_handler(const char *name,
85 return NULL; 55 return NULL;
86} 56}
87 57
88static int __btrfs_set_prop(struct btrfs_trans_handle *trans, 58int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
89 struct inode *inode,
90 const char *name,
91 const char *value,
92 size_t value_len,
93 int flags)
94{ 59{
95 const struct prop_handler *handler; 60 const struct prop_handler *handler;
96 int ret;
97 61
98 if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN) 62 if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
99 return -EINVAL; 63 return -EINVAL;
@@ -102,9 +66,26 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
102 if (!handler) 66 if (!handler)
103 return -EINVAL; 67 return -EINVAL;
104 68
69 if (value_len == 0)
70 return 0;
71
72 return handler->validate(value, value_len);
73}
74
75int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
76 const char *name, const char *value, size_t value_len,
77 int flags)
78{
79 const struct prop_handler *handler;
80 int ret;
81
82 handler = find_prop_handler(name, NULL);
83 if (!handler)
84 return -EINVAL;
85
105 if (value_len == 0) { 86 if (value_len == 0) {
106 ret = btrfs_setxattr(trans, inode, handler->xattr_name, 87 ret = btrfs_setxattr(trans, inode, handler->xattr_name,
107 NULL, 0, flags); 88 NULL, 0, flags);
108 if (ret) 89 if (ret)
109 return ret; 90 return ret;
110 91
@@ -114,17 +95,14 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
114 return ret; 95 return ret;
115 } 96 }
116 97
117 ret = handler->validate(value, value_len); 98 ret = btrfs_setxattr(trans, inode, handler->xattr_name, value,
118 if (ret) 99 value_len, flags);
119 return ret;
120 ret = btrfs_setxattr(trans, inode, handler->xattr_name,
121 value, value_len, flags);
122 if (ret) 100 if (ret)
123 return ret; 101 return ret;
124 ret = handler->apply(inode, value, value_len); 102 ret = handler->apply(inode, value, value_len);
125 if (ret) { 103 if (ret) {
126 btrfs_setxattr(trans, inode, handler->xattr_name, 104 btrfs_setxattr(trans, inode, handler->xattr_name, NULL,
127 NULL, 0, flags); 105 0, flags);
128 return ret; 106 return ret;
129 } 107 }
130 108
@@ -133,15 +111,6 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
133 return 0; 111 return 0;
134} 112}
135 113
136int btrfs_set_prop(struct inode *inode,
137 const char *name,
138 const char *value,
139 size_t value_len,
140 int flags)
141{
142 return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
143}
144
145static int iterate_object_props(struct btrfs_root *root, 114static int iterate_object_props(struct btrfs_root *root,
146 struct btrfs_path *path, 115 struct btrfs_path *path,
147 u64 objectid, 116 u64 objectid,
@@ -283,6 +252,78 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
283 return ret; 252 return ret;
284} 253}
285 254
255static int prop_compression_validate(const char *value, size_t len)
256{
257 if (!value)
258 return 0;
259
260 if (!strncmp("lzo", value, 3))
261 return 0;
262 else if (!strncmp("zlib", value, 4))
263 return 0;
264 else if (!strncmp("zstd", value, 4))
265 return 0;
266
267 return -EINVAL;
268}
269
270static int prop_compression_apply(struct inode *inode, const char *value,
271 size_t len)
272{
273 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
274 int type;
275
276 if (len == 0) {
277 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
278 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
279 BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
280
281 return 0;
282 }
283
284 if (!strncmp("lzo", value, 3)) {
285 type = BTRFS_COMPRESS_LZO;
286 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
287 } else if (!strncmp("zlib", value, 4)) {
288 type = BTRFS_COMPRESS_ZLIB;
289 } else if (!strncmp("zstd", value, 4)) {
290 type = BTRFS_COMPRESS_ZSTD;
291 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
292 } else {
293 return -EINVAL;
294 }
295
296 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
297 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
298 BTRFS_I(inode)->prop_compress = type;
299
300 return 0;
301}
302
303static const char *prop_compression_extract(struct inode *inode)
304{
305 switch (BTRFS_I(inode)->prop_compress) {
306 case BTRFS_COMPRESS_ZLIB:
307 case BTRFS_COMPRESS_LZO:
308 case BTRFS_COMPRESS_ZSTD:
309 return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
310 default:
311 break;
312 }
313
314 return NULL;
315}
316
317static struct prop_handler prop_handlers[] = {
318 {
319 .xattr_name = XATTR_BTRFS_PREFIX "compression",
320 .validate = prop_compression_validate,
321 .apply = prop_compression_apply,
322 .extract = prop_compression_extract,
323 .inheritable = 1
324 },
325};
326
286static int inherit_props(struct btrfs_trans_handle *trans, 327static int inherit_props(struct btrfs_trans_handle *trans,
287 struct inode *inode, 328 struct inode *inode,
288 struct inode *parent) 329 struct inode *parent)
@@ -308,20 +349,38 @@ static int inherit_props(struct btrfs_trans_handle *trans,
308 if (!value) 349 if (!value)
309 continue; 350 continue;
310 351
352 /*
353 * This is not strictly necessary as the property should be
354 * valid, but in case it isn't, don't propagate it futher.
355 */
356 ret = h->validate(value, strlen(value));
357 if (ret)
358 continue;
359
311 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 360 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
312 ret = btrfs_block_rsv_add(root, trans->block_rsv, 361 ret = btrfs_block_rsv_add(root, trans->block_rsv,
313 num_bytes, BTRFS_RESERVE_NO_FLUSH); 362 num_bytes, BTRFS_RESERVE_NO_FLUSH);
314 if (ret) 363 if (ret)
315 goto out; 364 return ret;
316 ret = __btrfs_set_prop(trans, inode, h->xattr_name, 365
317 value, strlen(value), 0); 366 ret = btrfs_setxattr(trans, inode, h->xattr_name, value,
367 strlen(value), 0);
368 if (!ret) {
369 ret = h->apply(inode, value, strlen(value));
370 if (ret)
371 btrfs_setxattr(trans, inode, h->xattr_name,
372 NULL, 0, 0);
373 else
374 set_bit(BTRFS_INODE_HAS_PROPS,
375 &BTRFS_I(inode)->runtime_flags);
376 }
377
318 btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); 378 btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes);
319 if (ret) 379 if (ret)
320 goto out; 380 return ret;
321 } 381 }
322 ret = 0; 382
323out: 383 return 0;
324 return ret;
325} 384}
326 385
327int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, 386int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
@@ -364,64 +423,17 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
364 return ret; 423 return ret;
365} 424}
366 425
367static int prop_compression_validate(const char *value, size_t len) 426void __init btrfs_props_init(void)
368{
369 if (!strncmp("lzo", value, 3))
370 return 0;
371 else if (!strncmp("zlib", value, 4))
372 return 0;
373 else if (!strncmp("zstd", value, 4))
374 return 0;
375
376 return -EINVAL;
377}
378
379static int prop_compression_apply(struct inode *inode,
380 const char *value,
381 size_t len)
382{ 427{
383 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 428 int i;
384 int type;
385
386 if (len == 0) {
387 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
388 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
389 BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
390
391 return 0;
392 }
393
394 if (!strncmp("lzo", value, 3)) {
395 type = BTRFS_COMPRESS_LZO;
396 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
397 } else if (!strncmp("zlib", value, 4)) {
398 type = BTRFS_COMPRESS_ZLIB;
399 } else if (!strncmp("zstd", value, 4)) {
400 type = BTRFS_COMPRESS_ZSTD;
401 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
402 } else {
403 return -EINVAL;
404 }
405 429
406 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 430 hash_init(prop_handlers_ht);
407 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
408 BTRFS_I(inode)->prop_compress = type;
409 431
410 return 0; 432 for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
411} 433 struct prop_handler *p = &prop_handlers[i];
434 u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
412 435
413static const char *prop_compression_extract(struct inode *inode) 436 hash_add(prop_handlers_ht, &p->node, h);
414{
415 switch (BTRFS_I(inode)->prop_compress) {
416 case BTRFS_COMPRESS_ZLIB:
417 case BTRFS_COMPRESS_LZO:
418 case BTRFS_COMPRESS_ZSTD:
419 return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
420 default:
421 break;
422 } 437 }
423
424 return NULL;
425} 438}
426 439
427
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 618815b4f9d5..40b2c65b518c 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -10,11 +10,10 @@
10 10
11void __init btrfs_props_init(void); 11void __init btrfs_props_init(void);
12 12
13int btrfs_set_prop(struct inode *inode, 13int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
14 const char *name, 14 const char *name, const char *value, size_t value_len,
15 const char *value,
16 size_t value_len,
17 int flags); 15 int flags);
16int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
18 17
19int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path); 18int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
20 19
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index e659d9d61107..2f708f2c4e67 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -918,8 +918,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
918 /* 918 /*
919 * initially create the quota tree 919 * initially create the quota tree
920 */ 920 */
921 quota_root = btrfs_create_tree(trans, fs_info, 921 quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
922 BTRFS_QUOTA_TREE_OBJECTID);
923 if (IS_ERR(quota_root)) { 922 if (IS_ERR(quota_root)) {
924 ret = PTR_ERR(quota_root); 923 ret = PTR_ERR(quota_root);
925 btrfs_abort_transaction(trans, ret); 924 btrfs_abort_transaction(trans, ret);
@@ -1101,7 +1100,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
1101 list_del(&quota_root->dirty_list); 1100 list_del(&quota_root->dirty_list);
1102 1101
1103 btrfs_tree_lock(quota_root->node); 1102 btrfs_tree_lock(quota_root->node);
1104 clean_tree_block(fs_info, quota_root->node); 1103 btrfs_clean_tree_block(quota_root->node);
1105 btrfs_tree_unlock(quota_root->node); 1104 btrfs_tree_unlock(quota_root->node);
1106 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 1105 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1107 1106
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index b283d3a6e837..5cec2c6970f2 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -659,36 +659,43 @@ static void dump_block_entry(struct btrfs_fs_info *fs_info,
659 659
660/* 660/*
661 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr 661 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
662 * @root: the root we are making this modification from.
663 * @bytenr: the bytenr we are modifying.
664 * @num_bytes: number of bytes.
665 * @parent: the parent bytenr.
666 * @ref_root: the original root owner of the bytenr.
667 * @owner: level in the case of metadata, inode in the case of data.
668 * @offset: 0 for metadata, file offset for data.
669 * @action: the action that we are doing, this is the same as the delayed ref
670 * action.
671 * 662 *
672 * This will add an action item to the given bytenr and do sanity checks to make 663 * This will add an action item to the given bytenr and do sanity checks to make
673 * sure we haven't messed something up. If we are making a new allocation and 664 * sure we haven't messed something up. If we are making a new allocation and
674 * this block entry has history we will delete all previous actions as long as 665 * this block entry has history we will delete all previous actions as long as
675 * our sanity checks pass as they are no longer needed. 666 * our sanity checks pass as they are no longer needed.
676 */ 667 */
677int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, 668int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
678 u64 parent, u64 ref_root, u64 owner, u64 offset, 669 struct btrfs_ref *generic_ref)
679 int action)
680{ 670{
681 struct btrfs_fs_info *fs_info = root->fs_info;
682 struct ref_entry *ref = NULL, *exist; 671 struct ref_entry *ref = NULL, *exist;
683 struct ref_action *ra = NULL; 672 struct ref_action *ra = NULL;
684 struct block_entry *be = NULL; 673 struct block_entry *be = NULL;
685 struct root_entry *re = NULL; 674 struct root_entry *re = NULL;
675 int action = generic_ref->action;
686 int ret = 0; 676 int ret = 0;
687 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 677 bool metadata;
678 u64 bytenr = generic_ref->bytenr;
679 u64 num_bytes = generic_ref->len;
680 u64 parent = generic_ref->parent;
681 u64 ref_root;
682 u64 owner;
683 u64 offset;
688 684
689 if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) 685 if (!btrfs_test_opt(fs_info, REF_VERIFY))
690 return 0; 686 return 0;
691 687
688 if (generic_ref->type == BTRFS_REF_METADATA) {
689 ref_root = generic_ref->tree_ref.root;
690 owner = generic_ref->tree_ref.level;
691 offset = 0;
692 } else {
693 ref_root = generic_ref->data_ref.ref_root;
694 owner = generic_ref->data_ref.ino;
695 offset = generic_ref->data_ref.offset;
696 }
697 metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
698
692 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 699 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
693 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 700 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
694 if (!ra || !ref) { 701 if (!ra || !ref) {
@@ -721,7 +728,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
721 728
722 INIT_LIST_HEAD(&ra->list); 729 INIT_LIST_HEAD(&ra->list);
723 ra->action = action; 730 ra->action = action;
724 ra->root = root->root_key.objectid; 731 ra->root = generic_ref->real_root;
725 732
726 /* 733 /*
727 * This is an allocation, preallocate the block_entry in case we haven't 734 * This is an allocation, preallocate the block_entry in case we haven't
@@ -734,7 +741,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
734 * is and the new root objectid, so let's not treat the passed 741 * is and the new root objectid, so let's not treat the passed
735 * in root as if it really has a ref for this bytenr. 742 * in root as if it really has a ref for this bytenr.
736 */ 743 */
737 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); 744 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
738 if (IS_ERR(be)) { 745 if (IS_ERR(be)) {
739 kfree(ra); 746 kfree(ra);
740 ret = PTR_ERR(be); 747 ret = PTR_ERR(be);
@@ -776,13 +783,13 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
776 * one we want to lookup below when we modify the 783 * one we want to lookup below when we modify the
777 * re->num_refs. 784 * re->num_refs.
778 */ 785 */
779 ref_root = root->root_key.objectid; 786 ref_root = generic_ref->real_root;
780 re->root_objectid = root->root_key.objectid; 787 re->root_objectid = generic_ref->real_root;
781 re->num_refs = 0; 788 re->num_refs = 0;
782 } 789 }
783 790
784 spin_lock(&root->fs_info->ref_verify_lock); 791 spin_lock(&fs_info->ref_verify_lock);
785 be = lookup_block_entry(&root->fs_info->block_tree, bytenr); 792 be = lookup_block_entry(&fs_info->block_tree, bytenr);
786 if (!be) { 793 if (!be) {
787 btrfs_err(fs_info, 794 btrfs_err(fs_info,
788"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 795"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
@@ -851,7 +858,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
851 * didn't think of some other corner case. 858 * didn't think of some other corner case.
852 */ 859 */
853 btrfs_err(fs_info, "failed to find root %llu for %llu", 860 btrfs_err(fs_info, "failed to find root %llu for %llu",
854 root->root_key.objectid, be->bytenr); 861 generic_ref->real_root, be->bytenr);
855 dump_block_entry(fs_info, be); 862 dump_block_entry(fs_info, be);
856 dump_ref_action(fs_info, ra); 863 dump_ref_action(fs_info, ra);
857 kfree(ra); 864 kfree(ra);
@@ -870,7 +877,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
870 list_add_tail(&ra->list, &be->actions); 877 list_add_tail(&ra->list, &be->actions);
871 ret = 0; 878 ret = 0;
872out_unlock: 879out_unlock:
873 spin_unlock(&root->fs_info->ref_verify_lock); 880 spin_unlock(&fs_info->ref_verify_lock);
874out: 881out:
875 if (ret) 882 if (ret)
876 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 883 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h
index b7d2a4edfdb7..855de37719b5 100644
--- a/fs/btrfs/ref-verify.h
+++ b/fs/btrfs/ref-verify.h
@@ -9,9 +9,8 @@
9#ifdef CONFIG_BTRFS_FS_REF_VERIFY 9#ifdef CONFIG_BTRFS_FS_REF_VERIFY
10int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info); 10int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
11void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info); 11void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
12int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, 12int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
13 u64 parent, u64 ref_root, u64 owner, u64 offset, 13 struct btrfs_ref *generic_ref);
14 int action);
15void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 14void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
16 u64 len); 15 u64 len);
17 16
@@ -30,9 +29,8 @@ static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
30{ 29{
31} 30}
32 31
33static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, 32static inline int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
34 u64 num_bytes, u64 parent, u64 ref_root, 33 struct btrfs_ref *generic_ref)
35 u64 owner, u64 offset, int action)
36{ 34{
37 return 0; 35 return 0;
38} 36}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ddf028509931..a459ecddcce4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1643,6 +1643,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1643 1643
1644 nritems = btrfs_header_nritems(leaf); 1644 nritems = btrfs_header_nritems(leaf);
1645 for (i = 0; i < nritems; i++) { 1645 for (i = 0; i < nritems; i++) {
1646 struct btrfs_ref ref = { 0 };
1647
1646 cond_resched(); 1648 cond_resched();
1647 btrfs_item_key_to_cpu(leaf, &key, i); 1649 btrfs_item_key_to_cpu(leaf, &key, i);
1648 if (key.type != BTRFS_EXTENT_DATA_KEY) 1650 if (key.type != BTRFS_EXTENT_DATA_KEY)
@@ -1703,18 +1705,23 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1703 dirty = 1; 1705 dirty = 1;
1704 1706
1705 key.offset -= btrfs_file_extent_offset(leaf, fi); 1707 key.offset -= btrfs_file_extent_offset(leaf, fi);
1706 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1708 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1707 num_bytes, parent, 1709 num_bytes, parent);
1708 btrfs_header_owner(leaf), 1710 ref.real_root = root->root_key.objectid;
1709 key.objectid, key.offset); 1711 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1712 key.objectid, key.offset);
1713 ret = btrfs_inc_extent_ref(trans, &ref);
1710 if (ret) { 1714 if (ret) {
1711 btrfs_abort_transaction(trans, ret); 1715 btrfs_abort_transaction(trans, ret);
1712 break; 1716 break;
1713 } 1717 }
1714 1718
1715 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1719 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1716 parent, btrfs_header_owner(leaf), 1720 num_bytes, parent);
1717 key.objectid, key.offset); 1721 ref.real_root = root->root_key.objectid;
1722 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1723 key.objectid, key.offset);
1724 ret = btrfs_free_extent(trans, &ref);
1718 if (ret) { 1725 if (ret) {
1719 btrfs_abort_transaction(trans, ret); 1726 btrfs_abort_transaction(trans, ret);
1720 break; 1727 break;
@@ -1756,6 +1763,7 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1756 struct btrfs_fs_info *fs_info = dest->fs_info; 1763 struct btrfs_fs_info *fs_info = dest->fs_info;
1757 struct extent_buffer *eb; 1764 struct extent_buffer *eb;
1758 struct extent_buffer *parent; 1765 struct extent_buffer *parent;
1766 struct btrfs_ref ref = { 0 };
1759 struct btrfs_key key; 1767 struct btrfs_key key;
1760 u64 old_bytenr; 1768 u64 old_bytenr;
1761 u64 new_bytenr; 1769 u64 new_bytenr;
@@ -1916,23 +1924,31 @@ again:
1916 path->slots[level], old_ptr_gen); 1924 path->slots[level], old_ptr_gen);
1917 btrfs_mark_buffer_dirty(path->nodes[level]); 1925 btrfs_mark_buffer_dirty(path->nodes[level]);
1918 1926
1919 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, 1927 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1920 blocksize, path->nodes[level]->start, 1928 blocksize, path->nodes[level]->start);
1921 src->root_key.objectid, level - 1, 0); 1929 ref.skip_qgroup = true;
1930 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1931 ret = btrfs_inc_extent_ref(trans, &ref);
1922 BUG_ON(ret); 1932 BUG_ON(ret);
1923 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, 1933 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1924 blocksize, 0, dest->root_key.objectid, 1934 blocksize, 0);
1925 level - 1, 0); 1935 ref.skip_qgroup = true;
1936 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1937 ret = btrfs_inc_extent_ref(trans, &ref);
1926 BUG_ON(ret); 1938 BUG_ON(ret);
1927 1939
1928 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1940 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1929 path->nodes[level]->start, 1941 blocksize, path->nodes[level]->start);
1930 src->root_key.objectid, level - 1, 0); 1942 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1943 ref.skip_qgroup = true;
1944 ret = btrfs_free_extent(trans, &ref);
1931 BUG_ON(ret); 1945 BUG_ON(ret);
1932 1946
1933 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1947 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1934 0, dest->root_key.objectid, level - 1, 1948 blocksize, 0);
1935 0); 1949 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1950 ref.skip_qgroup = true;
1951 ret = btrfs_free_extent(trans, &ref);
1936 BUG_ON(ret); 1952 BUG_ON(ret);
1937 1953
1938 btrfs_unlock_up_safe(path, 0); 1954 btrfs_unlock_up_safe(path, 0);
@@ -2721,6 +2737,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2721 rc->backref_cache.path[node->level] = node; 2737 rc->backref_cache.path[node->level] = node;
2722 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2738 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2723 struct btrfs_key first_key; 2739 struct btrfs_key first_key;
2740 struct btrfs_ref ref = { 0 };
2724 2741
2725 cond_resched(); 2742 cond_resched();
2726 2743
@@ -2826,11 +2843,13 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2826 trans->transid); 2843 trans->transid);
2827 btrfs_mark_buffer_dirty(upper->eb); 2844 btrfs_mark_buffer_dirty(upper->eb);
2828 2845
2829 ret = btrfs_inc_extent_ref(trans, root, 2846 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2830 node->eb->start, blocksize, 2847 node->eb->start, blocksize,
2831 upper->eb->start, 2848 upper->eb->start);
2832 btrfs_header_owner(upper->eb), 2849 ref.real_root = root->root_key.objectid;
2833 node->level, 0); 2850 btrfs_init_tree_ref(&ref, node->level,
2851 btrfs_header_owner(upper->eb));
2852 ret = btrfs_inc_extent_ref(trans, &ref);
2834 BUG_ON(ret); 2853 BUG_ON(ret);
2835 2854
2836 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2855 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
@@ -4222,7 +4241,7 @@ out:
4222 return inode; 4241 return inode;
4223} 4242}
4224 4243
4225static struct reloc_control *alloc_reloc_control(void) 4244static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
4226{ 4245{
4227 struct reloc_control *rc; 4246 struct reloc_control *rc;
4228 4247
@@ -4234,7 +4253,8 @@ static struct reloc_control *alloc_reloc_control(void)
4234 INIT_LIST_HEAD(&rc->dirty_subvol_roots); 4253 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
4235 backref_cache_init(&rc->backref_cache); 4254 backref_cache_init(&rc->backref_cache);
4236 mapping_tree_init(&rc->reloc_root_tree); 4255 mapping_tree_init(&rc->reloc_root_tree);
4237 extent_io_tree_init(&rc->processed_blocks, NULL); 4256 extent_io_tree_init(fs_info, &rc->processed_blocks,
4257 IO_TREE_RELOC_BLOCKS, NULL);
4238 return rc; 4258 return rc;
4239} 4259}
4240 4260
@@ -4276,7 +4296,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
4276 return -ETXTBSY; 4296 return -ETXTBSY;
4277 } 4297 }
4278 4298
4279 rc = alloc_reloc_control(); 4299 rc = alloc_reloc_control(fs_info);
4280 if (!rc) { 4300 if (!rc) {
4281 btrfs_put_block_group(bg); 4301 btrfs_put_block_group(bg);
4282 return -ENOMEM; 4302 return -ENOMEM;
@@ -4298,7 +4318,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
4298 goto out; 4318 goto out;
4299 } 4319 }
4300 4320
4301 inode = lookup_free_space_inode(fs_info, rc->block_group, path); 4321 inode = lookup_free_space_inode(rc->block_group, path);
4302 btrfs_free_path(path); 4322 btrfs_free_path(path);
4303 4323
4304 if (!IS_ERR(inode)) 4324 if (!IS_ERR(inode))
@@ -4330,27 +4350,36 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
4330 mutex_lock(&fs_info->cleaner_mutex); 4350 mutex_lock(&fs_info->cleaner_mutex);
4331 ret = relocate_block_group(rc); 4351 ret = relocate_block_group(rc);
4332 mutex_unlock(&fs_info->cleaner_mutex); 4352 mutex_unlock(&fs_info->cleaner_mutex);
4333 if (ret < 0) { 4353 if (ret < 0)
4334 err = ret; 4354 err = ret;
4335 goto out;
4336 }
4337
4338 if (rc->extents_found == 0)
4339 break;
4340
4341 btrfs_info(fs_info, "found %llu extents", rc->extents_found);
4342 4355
4356 /*
4357 * We may have gotten ENOSPC after we already dirtied some
4358 * extents. If writeout happens while we're relocating a
4359 * different block group we could end up hitting the
4360 * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
4361 * btrfs_reloc_cow_block. Make sure we write everything out
4362 * properly so we don't trip over this problem, and then break
4363 * out of the loop if we hit an error.
4364 */
4343 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4365 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4344 ret = btrfs_wait_ordered_range(rc->data_inode, 0, 4366 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4345 (u64)-1); 4367 (u64)-1);
4346 if (ret) { 4368 if (ret)
4347 err = ret; 4369 err = ret;
4348 goto out;
4349 }
4350 invalidate_mapping_pages(rc->data_inode->i_mapping, 4370 invalidate_mapping_pages(rc->data_inode->i_mapping,
4351 0, -1); 4371 0, -1);
4352 rc->stage = UPDATE_DATA_PTRS; 4372 rc->stage = UPDATE_DATA_PTRS;
4353 } 4373 }
4374
4375 if (err < 0)
4376 goto out;
4377
4378 if (rc->extents_found == 0)
4379 break;
4380
4381 btrfs_info(fs_info, "found %llu extents", rc->extents_found);
4382
4354 } 4383 }
4355 4384
4356 WARN_ON(rc->block_group->pinned > 0); 4385 WARN_ON(rc->block_group->pinned > 0);
@@ -4472,7 +4501,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4472 if (list_empty(&reloc_roots)) 4501 if (list_empty(&reloc_roots))
4473 goto out; 4502 goto out;
4474 4503
4475 rc = alloc_reloc_control(); 4504 rc = alloc_reloc_control(fs_info);
4476 if (!rc) { 4505 if (!rc) {
4477 err = -ENOMEM; 4506 err = -ENOMEM;
4478 goto out; 4507 goto out;
@@ -4594,7 +4623,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4594 new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); 4623 new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
4595 sums->bytenr = new_bytenr; 4624 sums->bytenr = new_bytenr;
4596 4625
4597 btrfs_add_ordered_sum(inode, ordered, sums); 4626 btrfs_add_ordered_sum(ordered, sums);
4598 } 4627 }
4599out: 4628out:
4600 btrfs_put_ordered_extent(ordered); 4629 btrfs_put_ordered_extent(ordered);
@@ -4667,14 +4696,12 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4667void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, 4696void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4668 u64 *bytes_to_reserve) 4697 u64 *bytes_to_reserve)
4669{ 4698{
4670 struct btrfs_root *root; 4699 struct btrfs_root *root = pending->root;
4671 struct reloc_control *rc; 4700 struct reloc_control *rc = root->fs_info->reloc_ctl;
4672 4701
4673 root = pending->root; 4702 if (!root->reloc_root || !rc)
4674 if (!root->reloc_root)
4675 return; 4703 return;
4676 4704
4677 rc = root->fs_info->reloc_ctl;
4678 if (!rc->merge_reloc_tree) 4705 if (!rc->merge_reloc_tree)
4679 return; 4706 return;
4680 4707
@@ -4703,10 +4730,10 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4703 struct btrfs_root *root = pending->root; 4730 struct btrfs_root *root = pending->root;
4704 struct btrfs_root *reloc_root; 4731 struct btrfs_root *reloc_root;
4705 struct btrfs_root *new_root; 4732 struct btrfs_root *new_root;
4706 struct reloc_control *rc; 4733 struct reloc_control *rc = root->fs_info->reloc_ctl;
4707 int ret; 4734 int ret;
4708 4735
4709 if (!root->reloc_root) 4736 if (!root->reloc_root || !rc)
4710 return 0; 4737 return 0;
4711 4738
4712 rc = root->fs_info->reloc_ctl; 4739 rc = root->fs_info->reloc_ctl;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 893d12fbfda0..1b9a5d0de139 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -137,11 +137,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
137 goto out; 137 goto out;
138 } 138 }
139 139
140 if (ret != 0) { 140 if (ret > 0) {
141 btrfs_print_leaf(path->nodes[0]); 141 btrfs_crit(fs_info,
142 btrfs_crit(fs_info, "unable to update root key %llu %u %llu", 142 "unable to find root key (%llu %u %llu) in tree %llu",
143 key->objectid, key->type, key->offset); 143 key->objectid, key->type, key->offset,
144 BUG_ON(1); 144 root->root_key.objectid);
145 ret = -EUCLEAN;
146 btrfs_abort_transaction(trans, ret);
147 goto out;
145 } 148 }
146 149
147 l = path->nodes[0]; 150 l = path->nodes[0];
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a99588536c79..f7b29f9db5e2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3791,7 +3791,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3791 struct btrfs_workqueue *scrub_parity = NULL; 3791 struct btrfs_workqueue *scrub_parity = NULL;
3792 3792
3793 if (btrfs_fs_closing(fs_info)) 3793 if (btrfs_fs_closing(fs_info))
3794 return -EINVAL; 3794 return -EAGAIN;
3795 3795
3796 if (fs_info->nodesize > BTRFS_STRIPE_LEN) { 3796 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
3797 /* 3797 /*
@@ -3999,9 +3999,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3999 return 0; 3999 return 0;
4000} 4000}
4001 4001
4002int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, 4002int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4003 struct btrfs_device *dev)
4004{ 4003{
4004 struct btrfs_fs_info *fs_info = dev->fs_info;
4005 struct scrub_ctx *sctx; 4005 struct scrub_ctx *sctx;
4006 4006
4007 mutex_lock(&fs_info->scrub_lock); 4007 mutex_lock(&fs_info->scrub_lock);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7ea2d6b1f170..dd38dfe174df 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1160,7 +1160,6 @@ out:
1160struct backref_ctx { 1160struct backref_ctx {
1161 struct send_ctx *sctx; 1161 struct send_ctx *sctx;
1162 1162
1163 struct btrfs_path *path;
1164 /* number of total found references */ 1163 /* number of total found references */
1165 u64 found; 1164 u64 found;
1166 1165
@@ -1213,8 +1212,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1213{ 1212{
1214 struct backref_ctx *bctx = ctx_; 1213 struct backref_ctx *bctx = ctx_;
1215 struct clone_root *found; 1214 struct clone_root *found;
1216 int ret;
1217 u64 i_size;
1218 1215
1219 /* First check if the root is in the list of accepted clone sources */ 1216 /* First check if the root is in the list of accepted clone sources */
1220 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1217 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
@@ -1231,19 +1228,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1231 } 1228 }
1232 1229
1233 /* 1230 /*
1234 * There are inodes that have extents that lie behind its i_size. Don't
1235 * accept clones from these extents.
1236 */
1237 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1238 NULL, NULL, NULL);
1239 btrfs_release_path(bctx->path);
1240 if (ret < 0)
1241 return ret;
1242
1243 if (offset + bctx->data_offset + bctx->extent_len > i_size)
1244 return 0;
1245
1246 /*
1247 * Make sure we don't consider clones from send_root that are 1231 * Make sure we don't consider clones from send_root that are
1248 * behind the current inode/offset. 1232 * behind the current inode/offset.
1249 */ 1233 */
@@ -1319,8 +1303,6 @@ static int find_extent_clone(struct send_ctx *sctx,
1319 goto out; 1303 goto out;
1320 } 1304 }
1321 1305
1322 backref_ctx->path = tmp_path;
1323
1324 if (data_offset >= ino_size) { 1306 if (data_offset >= ino_size) {
1325 /* 1307 /*
1326 * There may be extents that lie behind the file's size. 1308 * There may be extents that lie behind the file's size.
@@ -5082,6 +5064,7 @@ static int clone_range(struct send_ctx *sctx,
5082 struct btrfs_path *path; 5064 struct btrfs_path *path;
5083 struct btrfs_key key; 5065 struct btrfs_key key;
5084 int ret; 5066 int ret;
5067 u64 clone_src_i_size;
5085 5068
5086 /* 5069 /*
5087 * Prevent cloning from a zero offset with a length matching the sector 5070 * Prevent cloning from a zero offset with a length matching the sector
@@ -5107,6 +5090,16 @@ static int clone_range(struct send_ctx *sctx,
5107 return -ENOMEM; 5090 return -ENOMEM;
5108 5091
5109 /* 5092 /*
5093 * There are inodes that have extents that lie behind its i_size. Don't
5094 * accept clones from these extents.
5095 */
5096 ret = __get_inode_info(clone_root->root, path, clone_root->ino,
5097 &clone_src_i_size, NULL, NULL, NULL, NULL, NULL);
5098 btrfs_release_path(path);
5099 if (ret < 0)
5100 goto out;
5101
5102 /*
5110 * We can't send a clone operation for the entire range if we find 5103 * We can't send a clone operation for the entire range if we find
5111 * extent items in the respective range in the source file that 5104 * extent items in the respective range in the source file that
5112 * refer to different extents or if we find holes. 5105 * refer to different extents or if we find holes.
@@ -5148,6 +5141,7 @@ static int clone_range(struct send_ctx *sctx,
5148 u8 type; 5141 u8 type;
5149 u64 ext_len; 5142 u64 ext_len;
5150 u64 clone_len; 5143 u64 clone_len;
5144 u64 clone_data_offset;
5151 5145
5152 if (slot >= btrfs_header_nritems(leaf)) { 5146 if (slot >= btrfs_header_nritems(leaf)) {
5153 ret = btrfs_next_leaf(clone_root->root, path); 5147 ret = btrfs_next_leaf(clone_root->root, path);
@@ -5201,10 +5195,30 @@ static int clone_range(struct send_ctx *sctx,
5201 if (key.offset >= clone_root->offset + len) 5195 if (key.offset >= clone_root->offset + len)
5202 break; 5196 break;
5203 5197
5198 if (key.offset >= clone_src_i_size)
5199 break;
5200
5201 if (key.offset + ext_len > clone_src_i_size)
5202 ext_len = clone_src_i_size - key.offset;
5203
5204 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
5205 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
5206 clone_root->offset = key.offset;
5207 if (clone_data_offset < data_offset &&
5208 clone_data_offset + ext_len > data_offset) {
5209 u64 extent_offset;
5210
5211 extent_offset = data_offset - clone_data_offset;
5212 ext_len -= extent_offset;
5213 clone_data_offset += extent_offset;
5214 clone_root->offset += extent_offset;
5215 }
5216 }
5217
5204 clone_len = min_t(u64, ext_len, len); 5218 clone_len = min_t(u64, ext_len, len);
5205 5219
5206 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && 5220 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
5207 btrfs_file_extent_offset(leaf, ei) == data_offset) 5221 clone_data_offset == data_offset)
5208 ret = send_clone(sctx, offset, clone_len, clone_root); 5222 ret = send_clone(sctx, offset, clone_len, clone_root);
5209 else 5223 else
5210 ret = send_extent_data(sctx, offset, clone_len); 5224 ret = send_extent_data(sctx, offset, clone_len);
@@ -6579,6 +6593,38 @@ commit_trans:
6579 return btrfs_commit_transaction(trans); 6593 return btrfs_commit_transaction(trans);
6580} 6594}
6581 6595
6596/*
6597 * Make sure any existing dellaloc is flushed for any root used by a send
6598 * operation so that we do not miss any data and we do not race with writeback
6599 * finishing and changing a tree while send is using the tree. This could
6600 * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
6601 * a send operation then uses the subvolume.
6602 * After flushing delalloc ensure_commit_roots_uptodate() must be called.
6603 */
6604static int flush_delalloc_roots(struct send_ctx *sctx)
6605{
6606 struct btrfs_root *root = sctx->parent_root;
6607 int ret;
6608 int i;
6609
6610 if (root) {
6611 ret = btrfs_start_delalloc_snapshot(root);
6612 if (ret)
6613 return ret;
6614 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
6615 }
6616
6617 for (i = 0; i < sctx->clone_roots_cnt; i++) {
6618 root = sctx->clone_roots[i].root;
6619 ret = btrfs_start_delalloc_snapshot(root);
6620 if (ret)
6621 return ret;
6622 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
6623 }
6624
6625 return 0;
6626}
6627
6582static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 6628static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
6583{ 6629{
6584 spin_lock(&root->root_item_lock); 6630 spin_lock(&root->root_item_lock);
@@ -6594,6 +6640,13 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
6594 spin_unlock(&root->root_item_lock); 6640 spin_unlock(&root->root_item_lock);
6595} 6641}
6596 6642
6643static void dedupe_in_progress_warn(const struct btrfs_root *root)
6644{
6645 btrfs_warn_rl(root->fs_info,
6646"cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
6647 root->root_key.objectid, root->dedupe_in_progress);
6648}
6649
6597long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) 6650long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6598{ 6651{
6599 int ret = 0; 6652 int ret = 0;
@@ -6617,6 +6670,11 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6617 * making it RW. This also protects against deletion. 6670 * making it RW. This also protects against deletion.
6618 */ 6671 */
6619 spin_lock(&send_root->root_item_lock); 6672 spin_lock(&send_root->root_item_lock);
6673 if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
6674 dedupe_in_progress_warn(send_root);
6675 spin_unlock(&send_root->root_item_lock);
6676 return -EAGAIN;
6677 }
6620 send_root->send_in_progress++; 6678 send_root->send_in_progress++;
6621 spin_unlock(&send_root->root_item_lock); 6679 spin_unlock(&send_root->root_item_lock);
6622 6680
@@ -6751,6 +6809,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6751 ret = -EPERM; 6809 ret = -EPERM;
6752 goto out; 6810 goto out;
6753 } 6811 }
6812 if (clone_root->dedupe_in_progress) {
6813 dedupe_in_progress_warn(clone_root);
6814 spin_unlock(&clone_root->root_item_lock);
6815 srcu_read_unlock(&fs_info->subvol_srcu, index);
6816 ret = -EAGAIN;
6817 goto out;
6818 }
6754 clone_root->send_in_progress++; 6819 clone_root->send_in_progress++;
6755 spin_unlock(&clone_root->root_item_lock); 6820 spin_unlock(&clone_root->root_item_lock);
6756 srcu_read_unlock(&fs_info->subvol_srcu, index); 6821 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6785,6 +6850,13 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6785 ret = -EPERM; 6850 ret = -EPERM;
6786 goto out; 6851 goto out;
6787 } 6852 }
6853 if (sctx->parent_root->dedupe_in_progress) {
6854 dedupe_in_progress_warn(sctx->parent_root);
6855 spin_unlock(&sctx->parent_root->root_item_lock);
6856 srcu_read_unlock(&fs_info->subvol_srcu, index);
6857 ret = -EAGAIN;
6858 goto out;
6859 }
6788 spin_unlock(&sctx->parent_root->root_item_lock); 6860 spin_unlock(&sctx->parent_root->root_item_lock);
6789 6861
6790 srcu_read_unlock(&fs_info->subvol_srcu, index); 6862 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6803,6 +6875,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6803 NULL); 6875 NULL);
6804 sort_clone_roots = 1; 6876 sort_clone_roots = 1;
6805 6877
6878 ret = flush_delalloc_roots(sctx);
6879 if (ret)
6880 goto out;
6881
6806 ret = ensure_commit_roots_uptodate(sctx); 6882 ret = ensure_commit_roots_uptodate(sctx);
6807 if (ret) 6883 if (ret)
6808 goto out; 6884 goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 236f812091a3..0645ec428b4f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1400,7 +1400,7 @@ static inline int is_subvolume_inode(struct inode *inode)
1400} 1400}
1401 1401
1402static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, 1402static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1403 const char *device_name, struct vfsmount *mnt) 1403 struct vfsmount *mnt)
1404{ 1404{
1405 struct dentry *root; 1405 struct dentry *root;
1406 int ret; 1406 int ret;
@@ -1649,7 +1649,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1649 } 1649 }
1650 1650
1651 /* mount_subvol() will free subvol_name and mnt_root */ 1651 /* mount_subvol() will free subvol_name and mnt_root */
1652 root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); 1652 root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
1653 1653
1654out: 1654out:
1655 return root; 1655 return root;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 8a59597f1883..9238fd4f1734 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -17,6 +17,16 @@
17 17
18static struct vfsmount *test_mnt = NULL; 18static struct vfsmount *test_mnt = NULL;
19 19
20const char *test_error[] = {
21 [TEST_ALLOC_FS_INFO] = "cannot allocate fs_info",
22 [TEST_ALLOC_ROOT] = "cannot allocate root",
23 [TEST_ALLOC_EXTENT_BUFFER] = "cannot extent buffer",
24 [TEST_ALLOC_PATH] = "cannot allocate path",
25 [TEST_ALLOC_INODE] = "cannot allocate inode",
26 [TEST_ALLOC_BLOCK_GROUP] = "cannot allocate block group",
27 [TEST_ALLOC_EXTENT_MAP] = "cannot allocate extent map",
28};
29
20static const struct super_operations btrfs_test_super_ops = { 30static const struct super_operations btrfs_test_super_ops = {
21 .alloc_inode = btrfs_alloc_inode, 31 .alloc_inode = btrfs_alloc_inode,
22 .destroy_inode = btrfs_test_destroy_inode, 32 .destroy_inode = btrfs_test_destroy_inode,
@@ -99,7 +109,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
99 109
100 spin_lock_init(&fs_info->buffer_lock); 110 spin_lock_init(&fs_info->buffer_lock);
101 spin_lock_init(&fs_info->qgroup_lock); 111 spin_lock_init(&fs_info->qgroup_lock);
102 spin_lock_init(&fs_info->qgroup_op_lock);
103 spin_lock_init(&fs_info->super_lock); 112 spin_lock_init(&fs_info->super_lock);
104 spin_lock_init(&fs_info->fs_roots_radix_lock); 113 spin_lock_init(&fs_info->fs_roots_radix_lock);
105 spin_lock_init(&fs_info->tree_mod_seq_lock); 114 spin_lock_init(&fs_info->tree_mod_seq_lock);
@@ -115,8 +124,10 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
115 INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); 124 INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
116 INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); 125 INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
117 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 126 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
118 extent_io_tree_init(&fs_info->freed_extents[0], NULL); 127 extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
119 extent_io_tree_init(&fs_info->freed_extents[1], NULL); 128 IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
129 extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
130 IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
120 fs_info->pinned_extents = &fs_info->freed_extents[0]; 131 fs_info->pinned_extents = &fs_info->freed_extents[0];
121 set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); 132 set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
122 133
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 70ff9f9d86a1..ee277bbd939b 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -10,7 +10,22 @@
10int btrfs_run_sanity_tests(void); 10int btrfs_run_sanity_tests(void);
11 11
12#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__) 12#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
13#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__) 13#define test_err(fmt, ...) pr_err("BTRFS: selftest: %s:%d " fmt "\n", \
14 __FILE__, __LINE__, ##__VA_ARGS__)
15
16#define test_std_err(index) test_err("%s", test_error[index])
17
18enum {
19 TEST_ALLOC_FS_INFO,
20 TEST_ALLOC_ROOT,
21 TEST_ALLOC_EXTENT_BUFFER,
22 TEST_ALLOC_PATH,
23 TEST_ALLOC_INODE,
24 TEST_ALLOC_BLOCK_GROUP,
25 TEST_ALLOC_EXTENT_MAP,
26};
27
28extern const char *test_error[];
14 29
15struct btrfs_root; 30struct btrfs_root;
16struct btrfs_trans_handle; 31struct btrfs_trans_handle;
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 7d72eab6d32c..a1b9f9b5978e 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -30,27 +30,27 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
30 30
31 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 31 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
32 if (!fs_info) { 32 if (!fs_info) {
33 test_err("could not allocate fs_info"); 33 test_std_err(TEST_ALLOC_FS_INFO);
34 return -ENOMEM; 34 return -ENOMEM;
35 } 35 }
36 36
37 root = btrfs_alloc_dummy_root(fs_info); 37 root = btrfs_alloc_dummy_root(fs_info);
38 if (IS_ERR(root)) { 38 if (IS_ERR(root)) {
39 test_err("could not allocate root"); 39 test_std_err(TEST_ALLOC_ROOT);
40 ret = PTR_ERR(root); 40 ret = PTR_ERR(root);
41 goto out; 41 goto out;
42 } 42 }
43 43
44 path = btrfs_alloc_path(); 44 path = btrfs_alloc_path();
45 if (!path) { 45 if (!path) {
46 test_err("could not allocate path"); 46 test_std_err(TEST_ALLOC_PATH);
47 ret = -ENOMEM; 47 ret = -ENOMEM;
48 goto out; 48 goto out;
49 } 49 }
50 50
51 path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize); 51 path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
52 if (!eb) { 52 if (!eb) {
53 test_err("could not allocate dummy buffer"); 53 test_std_err(TEST_ALLOC_EXTENT_BUFFER);
54 ret = -ENOMEM; 54 ret = -ENOMEM;
55 goto out; 55 goto out;
56 } 56 }
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 3c46d7f23456..7bf4d5734dbe 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -73,11 +73,15 @@ static int test_find_delalloc(u32 sectorsize)
73 73
74 inode = btrfs_new_test_inode(); 74 inode = btrfs_new_test_inode();
75 if (!inode) { 75 if (!inode) {
76 test_err("failed to allocate test inode"); 76 test_std_err(TEST_ALLOC_INODE);
77 return -ENOMEM; 77 return -ENOMEM;
78 } 78 }
79 79
80 extent_io_tree_init(&tmp, NULL); 80 /*
81 * Passing NULL as we don't have fs_info but tracepoints are not used
82 * at this point
83 */
84 extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
81 85
82 /* 86 /*
83 * First go through and create and mark all of our pages dirty, we pin 87 * First go through and create and mark all of our pages dirty, we pin
@@ -374,8 +378,8 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
374{ 378{
375 struct btrfs_fs_info *fs_info; 379 struct btrfs_fs_info *fs_info;
376 unsigned long len; 380 unsigned long len;
377 unsigned long *bitmap; 381 unsigned long *bitmap = NULL;
378 struct extent_buffer *eb; 382 struct extent_buffer *eb = NULL;
379 int ret; 383 int ret;
380 384
381 test_msg("running extent buffer bitmap tests"); 385 test_msg("running extent buffer bitmap tests");
@@ -388,18 +392,23 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
388 ? sectorsize * 4 : sectorsize; 392 ? sectorsize * 4 : sectorsize;
389 393
390 fs_info = btrfs_alloc_dummy_fs_info(len, len); 394 fs_info = btrfs_alloc_dummy_fs_info(len, len);
395 if (!fs_info) {
396 test_std_err(TEST_ALLOC_FS_INFO);
397 return -ENOMEM;
398 }
391 399
392 bitmap = kmalloc(len, GFP_KERNEL); 400 bitmap = kmalloc(len, GFP_KERNEL);
393 if (!bitmap) { 401 if (!bitmap) {
394 test_err("couldn't allocate test bitmap"); 402 test_err("couldn't allocate test bitmap");
395 return -ENOMEM; 403 ret = -ENOMEM;
404 goto out;
396 } 405 }
397 406
398 eb = __alloc_dummy_extent_buffer(fs_info, 0, len); 407 eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
399 if (!eb) { 408 if (!eb) {
400 test_err("couldn't allocate test extent buffer"); 409 test_std_err(TEST_ALLOC_ROOT);
401 kfree(bitmap); 410 ret = -ENOMEM;
402 return -ENOMEM; 411 goto out;
403 } 412 }
404 413
405 ret = __test_eb_bitmaps(bitmap, eb, len); 414 ret = __test_eb_bitmaps(bitmap, eb, len);
@@ -408,17 +417,18 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
408 417
409 /* Do it over again with an extent buffer which isn't page-aligned. */ 418 /* Do it over again with an extent buffer which isn't page-aligned. */
410 free_extent_buffer(eb); 419 free_extent_buffer(eb);
411 eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len); 420 eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
412 if (!eb) { 421 if (!eb) {
413 test_err("couldn't allocate test extent buffer"); 422 test_std_err(TEST_ALLOC_ROOT);
414 kfree(bitmap); 423 ret = -ENOMEM;
415 return -ENOMEM; 424 goto out;
416 } 425 }
417 426
418 ret = __test_eb_bitmaps(bitmap, eb, len); 427 ret = __test_eb_bitmaps(bitmap, eb, len);
419out: 428out:
420 free_extent_buffer(eb); 429 free_extent_buffer(eb);
421 kfree(bitmap); 430 kfree(bitmap);
431 btrfs_free_dummy_fs_info(fs_info);
422 return ret; 432 return ret;
423} 433}
424 434
@@ -434,6 +444,5 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
434 444
435 ret = test_eb_bitmaps(sectorsize, nodesize); 445 ret = test_eb_bitmaps(sectorsize, nodesize);
436out: 446out:
437 test_msg("extent I/O tests finished");
438 return ret; 447 return ret;
439} 448}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index bf15d3a7f20e..87aeabe9d610 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -47,7 +47,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
47 * ->add_extent_mapping(0, 16K) 47 * ->add_extent_mapping(0, 16K)
48 * -> #handle -EEXIST 48 * -> #handle -EEXIST
49 */ 49 */
50static void test_case_1(struct btrfs_fs_info *fs_info, 50static int test_case_1(struct btrfs_fs_info *fs_info,
51 struct extent_map_tree *em_tree) 51 struct extent_map_tree *em_tree)
52{ 52{
53 struct extent_map *em; 53 struct extent_map *em;
@@ -56,9 +56,10 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
56 int ret; 56 int ret;
57 57
58 em = alloc_extent_map(); 58 em = alloc_extent_map();
59 if (!em) 59 if (!em) {
60 /* Skip the test on error. */ 60 test_std_err(TEST_ALLOC_EXTENT_MAP);
61 return; 61 return -ENOMEM;
62 }
62 63
63 /* Add [0, 16K) */ 64 /* Add [0, 16K) */
64 em->start = 0; 65 em->start = 0;
@@ -66,25 +67,37 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
66 em->block_start = 0; 67 em->block_start = 0;
67 em->block_len = SZ_16K; 68 em->block_len = SZ_16K;
68 ret = add_extent_mapping(em_tree, em, 0); 69 ret = add_extent_mapping(em_tree, em, 0);
69 ASSERT(ret == 0); 70 if (ret < 0) {
71 test_err("cannot add extent range [0, 16K)");
72 goto out;
73 }
70 free_extent_map(em); 74 free_extent_map(em);
71 75
72 /* Add [16K, 20K) following [0, 16K) */ 76 /* Add [16K, 20K) following [0, 16K) */
73 em = alloc_extent_map(); 77 em = alloc_extent_map();
74 if (!em) 78 if (!em) {
79 test_std_err(TEST_ALLOC_EXTENT_MAP);
80 ret = -ENOMEM;
75 goto out; 81 goto out;
82 }
76 83
77 em->start = SZ_16K; 84 em->start = SZ_16K;
78 em->len = SZ_4K; 85 em->len = SZ_4K;
79 em->block_start = SZ_32K; /* avoid merging */ 86 em->block_start = SZ_32K; /* avoid merging */
80 em->block_len = SZ_4K; 87 em->block_len = SZ_4K;
81 ret = add_extent_mapping(em_tree, em, 0); 88 ret = add_extent_mapping(em_tree, em, 0);
82 ASSERT(ret == 0); 89 if (ret < 0) {
90 test_err("cannot add extent range [16K, 20K)");
91 goto out;
92 }
83 free_extent_map(em); 93 free_extent_map(em);
84 94
85 em = alloc_extent_map(); 95 em = alloc_extent_map();
86 if (!em) 96 if (!em) {
97 test_std_err(TEST_ALLOC_EXTENT_MAP);
98 ret = -ENOMEM;
87 goto out; 99 goto out;
100 }
88 101
89 /* Add [0, 8K), should return [0, 16K) instead. */ 102 /* Add [0, 8K), should return [0, 16K) instead. */
90 em->start = start; 103 em->start = start;
@@ -92,19 +105,24 @@ static void test_case_1(struct btrfs_fs_info *fs_info,
92 em->block_start = start; 105 em->block_start = start;
93 em->block_len = len; 106 em->block_len = len;
94 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); 107 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
95 if (ret) 108 if (ret) {
96 test_err("case1 [%llu %llu]: ret %d", start, start + len, ret); 109 test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
110 goto out;
111 }
97 if (em && 112 if (em &&
98 (em->start != 0 || extent_map_end(em) != SZ_16K || 113 (em->start != 0 || extent_map_end(em) != SZ_16K ||
99 em->block_start != 0 || em->block_len != SZ_16K)) 114 em->block_start != 0 || em->block_len != SZ_16K)) {
100 test_err( 115 test_err(
101"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", 116"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
102 start, start + len, ret, em->start, em->len, 117 start, start + len, ret, em->start, em->len,
103 em->block_start, em->block_len); 118 em->block_start, em->block_len);
119 ret = -EINVAL;
120 }
104 free_extent_map(em); 121 free_extent_map(em);
105out: 122out:
106 /* free memory */
107 free_extent_map_tree(em_tree); 123 free_extent_map_tree(em_tree);
124
125 return ret;
108} 126}
109 127
110/* 128/*
@@ -113,16 +131,17 @@ out:
113 * Reading the inline ending up with EEXIST, ie. read an inline 131 * Reading the inline ending up with EEXIST, ie. read an inline
114 * extent and discard page cache and read it again. 132 * extent and discard page cache and read it again.
115 */ 133 */
116static void test_case_2(struct btrfs_fs_info *fs_info, 134static int test_case_2(struct btrfs_fs_info *fs_info,
117 struct extent_map_tree *em_tree) 135 struct extent_map_tree *em_tree)
118{ 136{
119 struct extent_map *em; 137 struct extent_map *em;
120 int ret; 138 int ret;
121 139
122 em = alloc_extent_map(); 140 em = alloc_extent_map();
123 if (!em) 141 if (!em) {
124 /* Skip the test on error. */ 142 test_std_err(TEST_ALLOC_EXTENT_MAP);
125 return; 143 return -ENOMEM;
144 }
126 145
127 /* Add [0, 1K) */ 146 /* Add [0, 1K) */
128 em->start = 0; 147 em->start = 0;
@@ -130,25 +149,37 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
130 em->block_start = EXTENT_MAP_INLINE; 149 em->block_start = EXTENT_MAP_INLINE;
131 em->block_len = (u64)-1; 150 em->block_len = (u64)-1;
132 ret = add_extent_mapping(em_tree, em, 0); 151 ret = add_extent_mapping(em_tree, em, 0);
133 ASSERT(ret == 0); 152 if (ret < 0) {
153 test_err("cannot add extent range [0, 1K)");
154 goto out;
155 }
134 free_extent_map(em); 156 free_extent_map(em);
135 157
136 /* Add [4K, 4K) following [0, 1K) */ 158 /* Add [4K, 8K) following [0, 1K) */
137 em = alloc_extent_map(); 159 em = alloc_extent_map();
138 if (!em) 160 if (!em) {
161 test_std_err(TEST_ALLOC_EXTENT_MAP);
162 ret = -ENOMEM;
139 goto out; 163 goto out;
164 }
140 165
141 em->start = SZ_4K; 166 em->start = SZ_4K;
142 em->len = SZ_4K; 167 em->len = SZ_4K;
143 em->block_start = SZ_4K; 168 em->block_start = SZ_4K;
144 em->block_len = SZ_4K; 169 em->block_len = SZ_4K;
145 ret = add_extent_mapping(em_tree, em, 0); 170 ret = add_extent_mapping(em_tree, em, 0);
146 ASSERT(ret == 0); 171 if (ret < 0) {
172 test_err("cannot add extent range [4K, 8K)");
173 goto out;
174 }
147 free_extent_map(em); 175 free_extent_map(em);
148 176
149 em = alloc_extent_map(); 177 em = alloc_extent_map();
150 if (!em) 178 if (!em) {
179 test_std_err(TEST_ALLOC_EXTENT_MAP);
180 ret = -ENOMEM;
151 goto out; 181 goto out;
182 }
152 183
153 /* Add [0, 1K) */ 184 /* Add [0, 1K) */
154 em->start = 0; 185 em->start = 0;
@@ -156,22 +187,27 @@ static void test_case_2(struct btrfs_fs_info *fs_info,
156 em->block_start = EXTENT_MAP_INLINE; 187 em->block_start = EXTENT_MAP_INLINE;
157 em->block_len = (u64)-1; 188 em->block_len = (u64)-1;
158 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); 189 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
159 if (ret) 190 if (ret) {
160 test_err("case2 [0 1K]: ret %d", ret); 191 test_err("case2 [0 1K]: ret %d", ret);
192 goto out;
193 }
161 if (em && 194 if (em &&
162 (em->start != 0 || extent_map_end(em) != SZ_1K || 195 (em->start != 0 || extent_map_end(em) != SZ_1K ||
163 em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) 196 em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) {
164 test_err( 197 test_err(
165"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", 198"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
166 ret, em->start, em->len, em->block_start, 199 ret, em->start, em->len, em->block_start,
167 em->block_len); 200 em->block_len);
201 ret = -EINVAL;
202 }
168 free_extent_map(em); 203 free_extent_map(em);
169out: 204out:
170 /* free memory */
171 free_extent_map_tree(em_tree); 205 free_extent_map_tree(em_tree);
206
207 return ret;
172} 208}
173 209
174static void __test_case_3(struct btrfs_fs_info *fs_info, 210static int __test_case_3(struct btrfs_fs_info *fs_info,
175 struct extent_map_tree *em_tree, u64 start) 211 struct extent_map_tree *em_tree, u64 start)
176{ 212{
177 struct extent_map *em; 213 struct extent_map *em;
@@ -179,9 +215,10 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
179 int ret; 215 int ret;
180 216
181 em = alloc_extent_map(); 217 em = alloc_extent_map();
182 if (!em) 218 if (!em) {
183 /* Skip this test on error. */ 219 test_std_err(TEST_ALLOC_EXTENT_MAP);
184 return; 220 return -ENOMEM;
221 }
185 222
186 /* Add [4K, 8K) */ 223 /* Add [4K, 8K) */
187 em->start = SZ_4K; 224 em->start = SZ_4K;
@@ -189,12 +226,18 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
189 em->block_start = SZ_4K; 226 em->block_start = SZ_4K;
190 em->block_len = SZ_4K; 227 em->block_len = SZ_4K;
191 ret = add_extent_mapping(em_tree, em, 0); 228 ret = add_extent_mapping(em_tree, em, 0);
192 ASSERT(ret == 0); 229 if (ret < 0) {
230 test_err("cannot add extent range [4K, 8K)");
231 goto out;
232 }
193 free_extent_map(em); 233 free_extent_map(em);
194 234
195 em = alloc_extent_map(); 235 em = alloc_extent_map();
196 if (!em) 236 if (!em) {
237 test_std_err(TEST_ALLOC_EXTENT_MAP);
238 ret = -ENOMEM;
197 goto out; 239 goto out;
240 }
198 241
199 /* Add [0, 16K) */ 242 /* Add [0, 16K) */
200 em->start = 0; 243 em->start = 0;
@@ -202,24 +245,29 @@ static void __test_case_3(struct btrfs_fs_info *fs_info,
202 em->block_start = 0; 245 em->block_start = 0;
203 em->block_len = SZ_16K; 246 em->block_len = SZ_16K;
204 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); 247 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
205 if (ret) 248 if (ret) {
206 test_err("case3 [0x%llx 0x%llx): ret %d", 249 test_err("case3 [0x%llx 0x%llx): ret %d",
207 start, start + len, ret); 250 start, start + len, ret);
251 goto out;
252 }
208 /* 253 /*
209 * Since bytes within em are contiguous, em->block_start is identical to 254 * Since bytes within em are contiguous, em->block_start is identical to
210 * em->start. 255 * em->start.
211 */ 256 */
212 if (em && 257 if (em &&
213 (start < em->start || start + len > extent_map_end(em) || 258 (start < em->start || start + len > extent_map_end(em) ||
214 em->start != em->block_start || em->len != em->block_len)) 259 em->start != em->block_start || em->len != em->block_len)) {
215 test_err( 260 test_err(
216"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)", 261"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
217 start, start + len, ret, em->start, em->len, 262 start, start + len, ret, em->start, em->len,
218 em->block_start, em->block_len); 263 em->block_start, em->block_len);
264 ret = -EINVAL;
265 }
219 free_extent_map(em); 266 free_extent_map(em);
220out: 267out:
221 /* free memory */
222 free_extent_map_tree(em_tree); 268 free_extent_map_tree(em_tree);
269
270 return ret;
223} 271}
224 272
225/* 273/*
@@ -238,15 +286,23 @@ out:
238 * -> add_extent_mapping() 286 * -> add_extent_mapping()
239 * -> add_extent_mapping() 287 * -> add_extent_mapping()
240 */ 288 */
241static void test_case_3(struct btrfs_fs_info *fs_info, 289static int test_case_3(struct btrfs_fs_info *fs_info,
242 struct extent_map_tree *em_tree) 290 struct extent_map_tree *em_tree)
243{ 291{
244 __test_case_3(fs_info, em_tree, 0); 292 int ret;
245 __test_case_3(fs_info, em_tree, SZ_8K); 293
246 __test_case_3(fs_info, em_tree, (12 * 1024ULL)); 294 ret = __test_case_3(fs_info, em_tree, 0);
295 if (ret)
296 return ret;
297 ret = __test_case_3(fs_info, em_tree, SZ_8K);
298 if (ret)
299 return ret;
300 ret = __test_case_3(fs_info, em_tree, (12 * SZ_1K));
301
302 return ret;
247} 303}
248 304
249static void __test_case_4(struct btrfs_fs_info *fs_info, 305static int __test_case_4(struct btrfs_fs_info *fs_info,
250 struct extent_map_tree *em_tree, u64 start) 306 struct extent_map_tree *em_tree, u64 start)
251{ 307{
252 struct extent_map *em; 308 struct extent_map *em;
@@ -254,9 +310,10 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
254 int ret; 310 int ret;
255 311
256 em = alloc_extent_map(); 312 em = alloc_extent_map();
257 if (!em) 313 if (!em) {
258 /* Skip this test on error. */ 314 test_std_err(TEST_ALLOC_EXTENT_MAP);
259 return; 315 return -ENOMEM;
316 }
260 317
261 /* Add [0K, 8K) */ 318 /* Add [0K, 8K) */
262 em->start = 0; 319 em->start = 0;
@@ -264,44 +321,60 @@ static void __test_case_4(struct btrfs_fs_info *fs_info,
264 em->block_start = 0; 321 em->block_start = 0;
265 em->block_len = SZ_8K; 322 em->block_len = SZ_8K;
266 ret = add_extent_mapping(em_tree, em, 0); 323 ret = add_extent_mapping(em_tree, em, 0);
267 ASSERT(ret == 0); 324 if (ret < 0) {
325 test_err("cannot add extent range [0, 8K)");
326 goto out;
327 }
268 free_extent_map(em); 328 free_extent_map(em);
269 329
270 em = alloc_extent_map(); 330 em = alloc_extent_map();
271 if (!em) 331 if (!em) {
332 test_std_err(TEST_ALLOC_EXTENT_MAP);
333 ret = -ENOMEM;
272 goto out; 334 goto out;
335 }
273 336
274 /* Add [8K, 24K) */ 337 /* Add [8K, 32K) */
275 em->start = SZ_8K; 338 em->start = SZ_8K;
276 em->len = 24 * 1024ULL; 339 em->len = 24 * SZ_1K;
277 em->block_start = SZ_16K; /* avoid merging */ 340 em->block_start = SZ_16K; /* avoid merging */
278 em->block_len = 24 * 1024ULL; 341 em->block_len = 24 * SZ_1K;
279 ret = add_extent_mapping(em_tree, em, 0); 342 ret = add_extent_mapping(em_tree, em, 0);
280 ASSERT(ret == 0); 343 if (ret < 0) {
344 test_err("cannot add extent range [8K, 32K)");
345 goto out;
346 }
281 free_extent_map(em); 347 free_extent_map(em);
282 348
283 em = alloc_extent_map(); 349 em = alloc_extent_map();
284 if (!em) 350 if (!em) {
351 test_std_err(TEST_ALLOC_EXTENT_MAP);
352 ret = -ENOMEM;
285 goto out; 353 goto out;
354 }
286 /* Add [0K, 32K) */ 355 /* Add [0K, 32K) */
287 em->start = 0; 356 em->start = 0;
288 em->len = SZ_32K; 357 em->len = SZ_32K;
289 em->block_start = 0; 358 em->block_start = 0;
290 em->block_len = SZ_32K; 359 em->block_len = SZ_32K;
291 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); 360 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
292 if (ret) 361 if (ret) {
293 test_err("case4 [0x%llx 0x%llx): ret %d", 362 test_err("case4 [0x%llx 0x%llx): ret %d",
294 start, len, ret); 363 start, len, ret);
295 if (em && 364 goto out;
296 (start < em->start || start + len > extent_map_end(em))) 365 }
366 if (em && (start < em->start || start + len > extent_map_end(em))) {
297 test_err( 367 test_err(
298"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)", 368"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
299 start, len, ret, em->start, em->len, em->block_start, 369 start, len, ret, em->start, em->len, em->block_start,
300 em->block_len); 370 em->block_len);
371 ret = -EINVAL;
372 }
301 free_extent_map(em); 373 free_extent_map(em);
302out: 374out:
303 /* free memory */
304 free_extent_map_tree(em_tree); 375 free_extent_map_tree(em_tree);
376
377 return ret;
305} 378}
306 379
307/* 380/*
@@ -329,17 +402,24 @@ out:
329 * # handle -EEXIST when adding 402 * # handle -EEXIST when adding
330 * # [0, 32K) 403 * # [0, 32K)
331 */ 404 */
332static void test_case_4(struct btrfs_fs_info *fs_info, 405static int test_case_4(struct btrfs_fs_info *fs_info,
333 struct extent_map_tree *em_tree) 406 struct extent_map_tree *em_tree)
334{ 407{
335 __test_case_4(fs_info, em_tree, 0); 408 int ret;
336 __test_case_4(fs_info, em_tree, SZ_4K); 409
410 ret = __test_case_4(fs_info, em_tree, 0);
411 if (ret)
412 return ret;
413 ret = __test_case_4(fs_info, em_tree, SZ_4K);
414
415 return ret;
337} 416}
338 417
339int btrfs_test_extent_map(void) 418int btrfs_test_extent_map(void)
340{ 419{
341 struct btrfs_fs_info *fs_info = NULL; 420 struct btrfs_fs_info *fs_info = NULL;
342 struct extent_map_tree *em_tree; 421 struct extent_map_tree *em_tree;
422 int ret = 0;
343 423
344 test_msg("running extent_map tests"); 424 test_msg("running extent_map tests");
345 425
@@ -349,25 +429,32 @@ int btrfs_test_extent_map(void)
349 */ 429 */
350 fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE); 430 fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
351 if (!fs_info) { 431 if (!fs_info) {
352 test_msg("Couldn't allocate dummy fs info"); 432 test_std_err(TEST_ALLOC_FS_INFO);
353 return -ENOMEM; 433 return -ENOMEM;
354 } 434 }
355 435
356 em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL); 436 em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
357 if (!em_tree) 437 if (!em_tree) {
358 /* Skip the test on error. */ 438 ret = -ENOMEM;
359 goto out; 439 goto out;
440 }
360 441
361 extent_map_tree_init(em_tree); 442 extent_map_tree_init(em_tree);
362 443
363 test_case_1(fs_info, em_tree); 444 ret = test_case_1(fs_info, em_tree);
364 test_case_2(fs_info, em_tree); 445 if (ret)
365 test_case_3(fs_info, em_tree); 446 goto out;
366 test_case_4(fs_info, em_tree); 447 ret = test_case_2(fs_info, em_tree);
448 if (ret)
449 goto out;
450 ret = test_case_3(fs_info, em_tree);
451 if (ret)
452 goto out;
453 ret = test_case_4(fs_info, em_tree);
367 454
368 kfree(em_tree);
369out: 455out:
456 kfree(em_tree);
370 btrfs_free_dummy_fs_info(fs_info); 457 btrfs_free_dummy_fs_info(fs_info);
371 458
372 return 0; 459 return ret;
373} 460}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 5c2f77e9439b..af89f66f9e63 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -404,7 +404,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
404 }; 404 };
405 const struct btrfs_free_space_op *orig_free_space_ops; 405 const struct btrfs_free_space_op *orig_free_space_ops;
406 406
407 test_msg("running space stealing from bitmap to extent"); 407 test_msg("running space stealing from bitmap to extent tests");
408 408
409 /* 409 /*
410 * For this test, we want to ensure we end up with an extent entry 410 * For this test, we want to ensure we end up with an extent entry
@@ -834,9 +834,10 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
834 834
835 test_msg("running btrfs free space cache tests"); 835 test_msg("running btrfs free space cache tests");
836 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 836 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
837 if (!fs_info) 837 if (!fs_info) {
838 test_std_err(TEST_ALLOC_FS_INFO);
838 return -ENOMEM; 839 return -ENOMEM;
839 840 }
840 841
841 /* 842 /*
842 * For ppc64 (with 64k page size), bytes per bitmap might be 843 * For ppc64 (with 64k page size), bytes per bitmap might be
@@ -846,13 +847,14 @@ int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
846 cache = btrfs_alloc_dummy_block_group(fs_info, 847 cache = btrfs_alloc_dummy_block_group(fs_info,
847 BITS_PER_BITMAP * sectorsize + PAGE_SIZE); 848 BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
848 if (!cache) { 849 if (!cache) {
849 test_err("couldn't run the tests"); 850 test_std_err(TEST_ALLOC_BLOCK_GROUP);
850 btrfs_free_dummy_fs_info(fs_info); 851 btrfs_free_dummy_fs_info(fs_info);
851 return 0; 852 return 0;
852 } 853 }
853 854
854 root = btrfs_alloc_dummy_root(fs_info); 855 root = btrfs_alloc_dummy_root(fs_info);
855 if (IS_ERR(root)) { 856 if (IS_ERR(root)) {
857 test_std_err(TEST_ALLOC_ROOT);
856 ret = PTR_ERR(root); 858 ret = PTR_ERR(root);
857 goto out; 859 goto out;
858 } 860 }
@@ -874,6 +876,5 @@ out:
874 btrfs_free_dummy_block_group(cache); 876 btrfs_free_dummy_block_group(cache);
875 btrfs_free_dummy_root(root); 877 btrfs_free_dummy_root(root);
876 btrfs_free_dummy_fs_info(fs_info); 878 btrfs_free_dummy_fs_info(fs_info);
877 test_msg("free space cache tests finished");
878 return ret; 879 return ret;
879} 880}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index f7a969b986eb..a90dad166971 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -30,7 +30,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
30 unsigned int i; 30 unsigned int i;
31 int ret; 31 int ret;
32 32
33 info = search_free_space_info(trans, fs_info, cache, path, 0); 33 info = search_free_space_info(trans, cache, path, 0);
34 if (IS_ERR(info)) { 34 if (IS_ERR(info)) {
35 test_err("could not find free space info"); 35 test_err("could not find free space info");
36 ret = PTR_ERR(info); 36 ret = PTR_ERR(info);
@@ -115,7 +115,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
115 u32 flags; 115 u32 flags;
116 int ret; 116 int ret;
117 117
118 info = search_free_space_info(trans, fs_info, cache, path, 0); 118 info = search_free_space_info(trans, cache, path, 0);
119 if (IS_ERR(info)) { 119 if (IS_ERR(info)) {
120 test_err("could not find free space info"); 120 test_err("could not find free space info");
121 btrfs_release_path(path); 121 btrfs_release_path(path);
@@ -444,14 +444,14 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
444 444
445 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 445 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
446 if (!fs_info) { 446 if (!fs_info) {
447 test_err("couldn't allocate dummy fs info"); 447 test_std_err(TEST_ALLOC_FS_INFO);
448 ret = -ENOMEM; 448 ret = -ENOMEM;
449 goto out; 449 goto out;
450 } 450 }
451 451
452 root = btrfs_alloc_dummy_root(fs_info); 452 root = btrfs_alloc_dummy_root(fs_info);
453 if (IS_ERR(root)) { 453 if (IS_ERR(root)) {
454 test_err("couldn't allocate dummy root"); 454 test_std_err(TEST_ALLOC_ROOT);
455 ret = PTR_ERR(root); 455 ret = PTR_ERR(root);
456 goto out; 456 goto out;
457 } 457 }
@@ -463,7 +463,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
463 463
464 root->node = alloc_test_extent_buffer(root->fs_info, nodesize); 464 root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
465 if (!root->node) { 465 if (!root->node) {
466 test_err("couldn't allocate dummy buffer"); 466 test_std_err(TEST_ALLOC_EXTENT_BUFFER);
467 ret = -ENOMEM; 467 ret = -ENOMEM;
468 goto out; 468 goto out;
469 } 469 }
@@ -473,7 +473,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
473 473
474 cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment); 474 cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
475 if (!cache) { 475 if (!cache) {
476 test_err("couldn't allocate dummy block group cache"); 476 test_std_err(TEST_ALLOC_BLOCK_GROUP);
477 ret = -ENOMEM; 477 ret = -ENOMEM;
478 goto out; 478 goto out;
479 } 479 }
@@ -486,7 +486,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
486 486
487 path = btrfs_alloc_path(); 487 path = btrfs_alloc_path();
488 if (!path) { 488 if (!path) {
489 test_err("couldn't allocate path"); 489 test_std_err(TEST_ALLOC_ROOT);
490 ret = -ENOMEM; 490 ret = -ENOMEM;
491 goto out; 491 goto out;
492 } 492 }
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index af0c8e30d9e2..bc6dbd1b42fd 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -226,31 +226,34 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
226 u64 offset; 226 u64 offset;
227 int ret = -ENOMEM; 227 int ret = -ENOMEM;
228 228
229 test_msg("running btrfs_get_extent tests");
230
229 inode = btrfs_new_test_inode(); 231 inode = btrfs_new_test_inode();
230 if (!inode) { 232 if (!inode) {
231 test_err("couldn't allocate inode"); 233 test_std_err(TEST_ALLOC_INODE);
232 return ret; 234 return ret;
233 } 235 }
234 236
237 inode->i_mode = S_IFREG;
235 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 238 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
236 BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; 239 BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
237 BTRFS_I(inode)->location.offset = 0; 240 BTRFS_I(inode)->location.offset = 0;
238 241
239 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 242 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
240 if (!fs_info) { 243 if (!fs_info) {
241 test_err("couldn't allocate dummy fs info"); 244 test_std_err(TEST_ALLOC_FS_INFO);
242 goto out; 245 goto out;
243 } 246 }
244 247
245 root = btrfs_alloc_dummy_root(fs_info); 248 root = btrfs_alloc_dummy_root(fs_info);
246 if (IS_ERR(root)) { 249 if (IS_ERR(root)) {
247 test_err("couldn't allocate root"); 250 test_std_err(TEST_ALLOC_ROOT);
248 goto out; 251 goto out;
249 } 252 }
250 253
251 root->node = alloc_dummy_extent_buffer(fs_info, nodesize); 254 root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
252 if (!root->node) { 255 if (!root->node) {
253 test_err("couldn't allocate dummy buffer"); 256 test_std_err(TEST_ALLOC_ROOT);
254 goto out; 257 goto out;
255 } 258 }
256 259
@@ -827,9 +830,11 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
827 struct extent_map *em = NULL; 830 struct extent_map *em = NULL;
828 int ret = -ENOMEM; 831 int ret = -ENOMEM;
829 832
833 test_msg("running hole first btrfs_get_extent test");
834
830 inode = btrfs_new_test_inode(); 835 inode = btrfs_new_test_inode();
831 if (!inode) { 836 if (!inode) {
832 test_err("couldn't allocate inode"); 837 test_std_err(TEST_ALLOC_INODE);
833 return ret; 838 return ret;
834 } 839 }
835 840
@@ -839,19 +844,19 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
839 844
840 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 845 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
841 if (!fs_info) { 846 if (!fs_info) {
842 test_err("couldn't allocate dummy fs info"); 847 test_std_err(TEST_ALLOC_FS_INFO);
843 goto out; 848 goto out;
844 } 849 }
845 850
846 root = btrfs_alloc_dummy_root(fs_info); 851 root = btrfs_alloc_dummy_root(fs_info);
847 if (IS_ERR(root)) { 852 if (IS_ERR(root)) {
848 test_err("couldn't allocate root"); 853 test_std_err(TEST_ALLOC_ROOT);
849 goto out; 854 goto out;
850 } 855 }
851 856
852 root->node = alloc_dummy_extent_buffer(fs_info, nodesize); 857 root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
853 if (!root->node) { 858 if (!root->node) {
854 test_err("couldn't allocate dummy buffer"); 859 test_std_err(TEST_ALLOC_ROOT);
855 goto out; 860 goto out;
856 } 861 }
857 862
@@ -927,21 +932,23 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
927 struct btrfs_root *root = NULL; 932 struct btrfs_root *root = NULL;
928 int ret = -ENOMEM; 933 int ret = -ENOMEM;
929 934
935 test_msg("running outstanding_extents tests");
936
930 inode = btrfs_new_test_inode(); 937 inode = btrfs_new_test_inode();
931 if (!inode) { 938 if (!inode) {
932 test_err("couldn't allocate inode"); 939 test_std_err(TEST_ALLOC_INODE);
933 return ret; 940 return ret;
934 } 941 }
935 942
936 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 943 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
937 if (!fs_info) { 944 if (!fs_info) {
938 test_err("couldn't allocate dummy fs info"); 945 test_std_err(TEST_ALLOC_FS_INFO);
939 goto out; 946 goto out;
940 } 947 }
941 948
942 root = btrfs_alloc_dummy_root(fs_info); 949 root = btrfs_alloc_dummy_root(fs_info);
943 if (IS_ERR(root)) { 950 if (IS_ERR(root)) {
944 test_err("couldn't allocate root"); 951 test_std_err(TEST_ALLOC_ROOT);
945 goto out; 952 goto out;
946 } 953 }
947 954
@@ -1110,17 +1117,16 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
1110{ 1117{
1111 int ret; 1118 int ret;
1112 1119
1120 test_msg("running inode tests");
1121
1113 set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); 1122 set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
1114 set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); 1123 set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
1115 1124
1116 test_msg("running btrfs_get_extent tests");
1117 ret = test_btrfs_get_extent(sectorsize, nodesize); 1125 ret = test_btrfs_get_extent(sectorsize, nodesize);
1118 if (ret) 1126 if (ret)
1119 return ret; 1127 return ret;
1120 test_msg("running hole first btrfs_get_extent test");
1121 ret = test_hole_first(sectorsize, nodesize); 1128 ret = test_hole_first(sectorsize, nodesize);
1122 if (ret) 1129 if (ret)
1123 return ret; 1130 return ret;
1124 test_msg("running outstanding_extents tests");
1125 return test_extent_accounting(sectorsize, nodesize); 1131 return test_extent_accounting(sectorsize, nodesize);
1126} 1132}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 412b910b04cc..09aaca1efd62 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -32,7 +32,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
32 32
33 path = btrfs_alloc_path(); 33 path = btrfs_alloc_path();
34 if (!path) { 34 if (!path) {
35 test_err("couldn't allocate path"); 35 test_std_err(TEST_ALLOC_ROOT);
36 return -ENOMEM; 36 return -ENOMEM;
37 } 37 }
38 38
@@ -82,7 +82,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
82 82
83 path = btrfs_alloc_path(); 83 path = btrfs_alloc_path();
84 if (!path) { 84 if (!path) {
85 test_err("couldn't allocate path"); 85 test_std_err(TEST_ALLOC_ROOT);
86 return -ENOMEM; 86 return -ENOMEM;
87 } 87 }
88 88
@@ -132,7 +132,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
132 132
133 path = btrfs_alloc_path(); 133 path = btrfs_alloc_path();
134 if (!path) { 134 if (!path) {
135 test_err("couldn't allocate path"); 135 test_std_err(TEST_ALLOC_ROOT);
136 return -ENOMEM; 136 return -ENOMEM;
137 } 137 }
138 path->leave_spinning = 1; 138 path->leave_spinning = 1;
@@ -166,7 +166,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
166 166
167 path = btrfs_alloc_path(); 167 path = btrfs_alloc_path();
168 if (!path) { 168 if (!path) {
169 test_err("couldn't allocate path"); 169 test_std_err(TEST_ALLOC_ROOT);
170 return -ENOMEM; 170 return -ENOMEM;
171 } 171 }
172 172
@@ -215,7 +215,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
215 215
216 btrfs_init_dummy_trans(&trans, fs_info); 216 btrfs_init_dummy_trans(&trans, fs_info);
217 217
218 test_msg("qgroup basic add"); 218 test_msg("running qgroup add/remove tests");
219 ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID); 219 ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
220 if (ret) { 220 if (ret) {
221 test_err("couldn't create a qgroup %d", ret); 221 test_err("couldn't create a qgroup %d", ret);
@@ -316,7 +316,7 @@ static int test_multiple_refs(struct btrfs_root *root,
316 316
317 btrfs_init_dummy_trans(&trans, fs_info); 317 btrfs_init_dummy_trans(&trans, fs_info);
318 318
319 test_msg("qgroup multiple refs test"); 319 test_msg("running qgroup multiple refs test");
320 320
321 /* 321 /*
322 * We have BTRFS_FS_TREE_OBJECTID created already from the 322 * We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -457,13 +457,13 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
457 457
458 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); 458 fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
459 if (!fs_info) { 459 if (!fs_info) {
460 test_err("couldn't allocate dummy fs info"); 460 test_std_err(TEST_ALLOC_FS_INFO);
461 return -ENOMEM; 461 return -ENOMEM;
462 } 462 }
463 463
464 root = btrfs_alloc_dummy_root(fs_info); 464 root = btrfs_alloc_dummy_root(fs_info);
465 if (IS_ERR(root)) { 465 if (IS_ERR(root)) {
466 test_err("couldn't allocate root"); 466 test_std_err(TEST_ALLOC_ROOT);
467 ret = PTR_ERR(root); 467 ret = PTR_ERR(root);
468 goto out; 468 goto out;
469 } 469 }
@@ -495,7 +495,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
495 495
496 tmp_root = btrfs_alloc_dummy_root(fs_info); 496 tmp_root = btrfs_alloc_dummy_root(fs_info);
497 if (IS_ERR(tmp_root)) { 497 if (IS_ERR(tmp_root)) {
498 test_err("couldn't allocate a fs root"); 498 test_std_err(TEST_ALLOC_ROOT);
499 ret = PTR_ERR(tmp_root); 499 ret = PTR_ERR(tmp_root);
500 goto out; 500 goto out;
501 } 501 }
@@ -510,7 +510,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
510 510
511 tmp_root = btrfs_alloc_dummy_root(fs_info); 511 tmp_root = btrfs_alloc_dummy_root(fs_info);
512 if (IS_ERR(tmp_root)) { 512 if (IS_ERR(tmp_root)) {
513 test_err("couldn't allocate a fs root"); 513 test_std_err(TEST_ALLOC_ROOT);
514 ret = PTR_ERR(tmp_root); 514 ret = PTR_ERR(tmp_root);
515 goto out; 515 goto out;
516 } 516 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e4e665f422fc..3f6811cdf803 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -50,14 +50,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
50 btrfs_err(transaction->fs_info, 50 btrfs_err(transaction->fs_info,
51 "pending csums is %llu", 51 "pending csums is %llu",
52 transaction->delayed_refs.pending_csums); 52 transaction->delayed_refs.pending_csums);
53 while (!list_empty(&transaction->pending_chunks)) {
54 struct extent_map *em;
55
56 em = list_first_entry(&transaction->pending_chunks,
57 struct extent_map, list);
58 list_del_init(&em->list);
59 free_extent_map(em);
60 }
61 /* 53 /*
62 * If any block groups are found in ->deleted_bgs then it's 54 * If any block groups are found in ->deleted_bgs then it's
63 * because the transaction was aborted and a commit did not 55 * because the transaction was aborted and a commit did not
@@ -75,39 +67,11 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
75 btrfs_put_block_group_trimming(cache); 67 btrfs_put_block_group_trimming(cache);
76 btrfs_put_block_group(cache); 68 btrfs_put_block_group(cache);
77 } 69 }
70 WARN_ON(!list_empty(&transaction->dev_update_list));
78 kfree(transaction); 71 kfree(transaction);
79 } 72 }
80} 73}
81 74
82static void clear_btree_io_tree(struct extent_io_tree *tree)
83{
84 spin_lock(&tree->lock);
85 /*
86 * Do a single barrier for the waitqueue_active check here, the state
87 * of the waitqueue should not change once clear_btree_io_tree is
88 * called.
89 */
90 smp_mb();
91 while (!RB_EMPTY_ROOT(&tree->state)) {
92 struct rb_node *node;
93 struct extent_state *state;
94
95 node = rb_first(&tree->state);
96 state = rb_entry(node, struct extent_state, rb_node);
97 rb_erase(&state->rb_node, &tree->state);
98 RB_CLEAR_NODE(&state->rb_node);
99 /*
100 * btree io trees aren't supposed to have tasks waiting for
101 * changes in the flags of extent states ever.
102 */
103 ASSERT(!waitqueue_active(&state->wq));
104 free_extent_state(state);
105
106 cond_resched_lock(&tree->lock);
107 }
108 spin_unlock(&tree->lock);
109}
110
111static noinline void switch_commit_roots(struct btrfs_transaction *trans) 75static noinline void switch_commit_roots(struct btrfs_transaction *trans)
112{ 76{
113 struct btrfs_fs_info *fs_info = trans->fs_info; 77 struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -121,7 +85,7 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
121 root->commit_root = btrfs_root_node(root); 85 root->commit_root = btrfs_root_node(root);
122 if (is_fstree(root->root_key.objectid)) 86 if (is_fstree(root->root_key.objectid))
123 btrfs_unpin_free_ino(root); 87 btrfs_unpin_free_ino(root);
124 clear_btree_io_tree(&root->dirty_log_pages); 88 extent_io_tree_release(&root->dirty_log_pages);
125 btrfs_qgroup_clean_swapped_blocks(root); 89 btrfs_qgroup_clean_swapped_blocks(root);
126 } 90 }
127 91
@@ -263,19 +227,18 @@ loop:
263 spin_lock_init(&cur_trans->delayed_refs.lock); 227 spin_lock_init(&cur_trans->delayed_refs.lock);
264 228
265 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 229 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
266 INIT_LIST_HEAD(&cur_trans->pending_chunks); 230 INIT_LIST_HEAD(&cur_trans->dev_update_list);
267 INIT_LIST_HEAD(&cur_trans->switch_commits); 231 INIT_LIST_HEAD(&cur_trans->switch_commits);
268 INIT_LIST_HEAD(&cur_trans->dirty_bgs); 232 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
269 INIT_LIST_HEAD(&cur_trans->io_bgs); 233 INIT_LIST_HEAD(&cur_trans->io_bgs);
270 INIT_LIST_HEAD(&cur_trans->dropped_roots); 234 INIT_LIST_HEAD(&cur_trans->dropped_roots);
271 mutex_init(&cur_trans->cache_write_mutex); 235 mutex_init(&cur_trans->cache_write_mutex);
272 cur_trans->num_dirty_bgs = 0;
273 spin_lock_init(&cur_trans->dirty_bgs_lock); 236 spin_lock_init(&cur_trans->dirty_bgs_lock);
274 INIT_LIST_HEAD(&cur_trans->deleted_bgs); 237 INIT_LIST_HEAD(&cur_trans->deleted_bgs);
275 spin_lock_init(&cur_trans->dropped_roots_lock); 238 spin_lock_init(&cur_trans->dropped_roots_lock);
276 list_add_tail(&cur_trans->list, &fs_info->trans_list); 239 list_add_tail(&cur_trans->list, &fs_info->trans_list);
277 extent_io_tree_init(&cur_trans->dirty_pages, 240 extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
278 fs_info->btree_inode); 241 IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
279 fs_info->generation++; 242 fs_info->generation++;
280 cur_trans->transid = fs_info->generation; 243 cur_trans->transid = fs_info->generation;
281 fs_info->running_transaction = cur_trans; 244 fs_info->running_transaction = cur_trans;
@@ -928,7 +891,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
928 * superblock that points to btree nodes/leafs for which 891 * superblock that points to btree nodes/leafs for which
929 * writeback hasn't finished yet (and without errors). 892 * writeback hasn't finished yet (and without errors).
930 * We cleanup any entries left in the io tree when committing 893 * We cleanup any entries left in the io tree when committing
931 * the transaction (through clear_btree_io_tree()). 894 * the transaction (through extent_io_tree_release()).
932 */ 895 */
933 if (err == -ENOMEM) { 896 if (err == -ENOMEM) {
934 err = 0; 897 err = 0;
@@ -973,7 +936,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
973 * left in the io tree. For a log commit, we don't remove them 936 * left in the io tree. For a log commit, we don't remove them
974 * after committing the log because the tree can be accessed 937 * after committing the log because the tree can be accessed
975 * concurrently - we do it only at transaction commit time when 938 * concurrently - we do it only at transaction commit time when
976 * it's safe to do it (through clear_btree_io_tree()). 939 * it's safe to do it (through extent_io_tree_release()).
977 */ 940 */
978 err = clear_extent_bit(dirty_pages, start, end, 941 err = clear_extent_bit(dirty_pages, start, end,
979 EXTENT_NEED_WAIT, 0, 0, &cached_state); 942 EXTENT_NEED_WAIT, 0, 0, &cached_state);
@@ -1051,7 +1014,7 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
1051 blk_finish_plug(&plug); 1014 blk_finish_plug(&plug);
1052 ret2 = btrfs_wait_extents(fs_info, dirty_pages); 1015 ret2 = btrfs_wait_extents(fs_info, dirty_pages);
1053 1016
1054 clear_btree_io_tree(&trans->transaction->dirty_pages); 1017 extent_io_tree_release(&trans->transaction->dirty_pages);
1055 1018
1056 if (ret) 1019 if (ret)
1057 return ret; 1020 return ret;
@@ -1130,17 +1093,17 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
1130 if (ret) 1093 if (ret)
1131 return ret; 1094 return ret;
1132 1095
1133 ret = btrfs_run_dev_stats(trans, fs_info); 1096 ret = btrfs_run_dev_stats(trans);
1134 if (ret) 1097 if (ret)
1135 return ret; 1098 return ret;
1136 ret = btrfs_run_dev_replace(trans, fs_info); 1099 ret = btrfs_run_dev_replace(trans);
1137 if (ret) 1100 if (ret)
1138 return ret; 1101 return ret;
1139 ret = btrfs_run_qgroups(trans); 1102 ret = btrfs_run_qgroups(trans);
1140 if (ret) 1103 if (ret)
1141 return ret; 1104 return ret;
1142 1105
1143 ret = btrfs_setup_space_cache(trans, fs_info); 1106 ret = btrfs_setup_space_cache(trans);
1144 if (ret) 1107 if (ret)
1145 return ret; 1108 return ret;
1146 1109
@@ -1168,7 +1131,7 @@ again:
1168 } 1131 }
1169 1132
1170 while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { 1133 while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
1171 ret = btrfs_write_dirty_block_groups(trans, fs_info); 1134 ret = btrfs_write_dirty_block_groups(trans);
1172 if (ret) 1135 if (ret)
1173 return ret; 1136 return ret;
1174 ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 1137 ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
@@ -2241,8 +2204,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
2241 memcpy(fs_info->super_for_commit, fs_info->super_copy, 2204 memcpy(fs_info->super_for_commit, fs_info->super_copy,
2242 sizeof(*fs_info->super_copy)); 2205 sizeof(*fs_info->super_copy));
2243 2206
2244 btrfs_update_commit_device_size(fs_info); 2207 btrfs_commit_device_sizes(cur_trans);
2245 btrfs_update_commit_device_bytes_used(cur_trans);
2246 2208
2247 clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags); 2209 clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
2248 clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags); 2210 clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index f1ba78949d1b..78c446c222b7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -51,7 +51,7 @@ struct btrfs_transaction {
51 wait_queue_head_t writer_wait; 51 wait_queue_head_t writer_wait;
52 wait_queue_head_t commit_wait; 52 wait_queue_head_t commit_wait;
53 struct list_head pending_snapshots; 53 struct list_head pending_snapshots;
54 struct list_head pending_chunks; 54 struct list_head dev_update_list;
55 struct list_head switch_commits; 55 struct list_head switch_commits;
56 struct list_head dirty_bgs; 56 struct list_head dirty_bgs;
57 57
@@ -80,7 +80,6 @@ struct btrfs_transaction {
80 */ 80 */
81 struct mutex cache_write_mutex; 81 struct mutex cache_write_mutex;
82 spinlock_t dirty_bgs_lock; 82 spinlock_t dirty_bgs_lock;
83 unsigned int num_dirty_bgs;
84 /* Protected by spin lock fs_info->unused_bgs_lock. */ 83 /* Protected by spin lock fs_info->unused_bgs_lock. */
85 struct list_head deleted_bgs; 84 struct list_head deleted_bgs;
86 spinlock_t dropped_roots_lock; 85 spinlock_t dropped_roots_lock;
@@ -120,7 +119,6 @@ struct btrfs_trans_handle {
120 bool allocating_chunk; 119 bool allocating_chunk;
121 bool can_flush_pending_bgs; 120 bool can_flush_pending_bgs;
122 bool reloc_reserved; 121 bool reloc_reserved;
123 bool sync;
124 bool dirty; 122 bool dirty;
125 struct btrfs_root *root; 123 struct btrfs_root *root;
126 struct btrfs_fs_info *fs_info; 124 struct btrfs_fs_info *fs_info;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a62e1e837a89..748cd1598255 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -15,6 +15,9 @@
15 * carefully reviewed otherwise so it does not prevent mount of valid images. 15 * carefully reviewed otherwise so it does not prevent mount of valid images.
16 */ 16 */
17 17
18#include <linux/types.h>
19#include <linux/stddef.h>
20#include <linux/error-injection.h>
18#include "ctree.h" 21#include "ctree.h"
19#include "tree-checker.h" 22#include "tree-checker.h"
20#include "disk-io.h" 23#include "disk-io.h"
@@ -41,12 +44,12 @@
41 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. 44 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
42 * Allows callers to customize the output. 45 * Allows callers to customize the output.
43 */ 46 */
44__printf(4, 5) 47__printf(3, 4)
45__cold 48__cold
46static void generic_err(const struct btrfs_fs_info *fs_info, 49static void generic_err(const struct extent_buffer *eb, int slot,
47 const struct extent_buffer *eb, int slot,
48 const char *fmt, ...) 50 const char *fmt, ...)
49{ 51{
52 const struct btrfs_fs_info *fs_info = eb->fs_info;
50 struct va_format vaf; 53 struct va_format vaf;
51 va_list args; 54 va_list args;
52 55
@@ -66,12 +69,12 @@ static void generic_err(const struct btrfs_fs_info *fs_info,
66 * Customized reporter for extent data item, since its key objectid and 69 * Customized reporter for extent data item, since its key objectid and
67 * offset has its own meaning. 70 * offset has its own meaning.
68 */ 71 */
69__printf(4, 5) 72__printf(3, 4)
70__cold 73__cold
71static void file_extent_err(const struct btrfs_fs_info *fs_info, 74static void file_extent_err(const struct extent_buffer *eb, int slot,
72 const struct extent_buffer *eb, int slot,
73 const char *fmt, ...) 75 const char *fmt, ...)
74{ 76{
77 const struct btrfs_fs_info *fs_info = eb->fs_info;
75 struct btrfs_key key; 78 struct btrfs_key key;
76 struct va_format vaf; 79 struct va_format vaf;
77 va_list args; 80 va_list args;
@@ -94,26 +97,26 @@ static void file_extent_err(const struct btrfs_fs_info *fs_info,
94 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment 97 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
95 * Else return 1 98 * Else return 1
96 */ 99 */
97#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment) \ 100#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \
98({ \ 101({ \
99 if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \ 102 if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
100 file_extent_err((fs_info), (leaf), (slot), \ 103 file_extent_err((leaf), (slot), \
101 "invalid %s for file extent, have %llu, should be aligned to %u", \ 104 "invalid %s for file extent, have %llu, should be aligned to %u", \
102 (#name), btrfs_file_extent_##name((leaf), (fi)), \ 105 (#name), btrfs_file_extent_##name((leaf), (fi)), \
103 (alignment)); \ 106 (alignment)); \
104 (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ 107 (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \
105}) 108})
106 109
107static int check_extent_data_item(struct btrfs_fs_info *fs_info, 110static int check_extent_data_item(struct extent_buffer *leaf,
108 struct extent_buffer *leaf,
109 struct btrfs_key *key, int slot) 111 struct btrfs_key *key, int slot)
110{ 112{
113 struct btrfs_fs_info *fs_info = leaf->fs_info;
111 struct btrfs_file_extent_item *fi; 114 struct btrfs_file_extent_item *fi;
112 u32 sectorsize = fs_info->sectorsize; 115 u32 sectorsize = fs_info->sectorsize;
113 u32 item_size = btrfs_item_size_nr(leaf, slot); 116 u32 item_size = btrfs_item_size_nr(leaf, slot);
114 117
115 if (!IS_ALIGNED(key->offset, sectorsize)) { 118 if (!IS_ALIGNED(key->offset, sectorsize)) {
116 file_extent_err(fs_info, leaf, slot, 119 file_extent_err(leaf, slot,
117"unaligned file_offset for file extent, have %llu should be aligned to %u", 120"unaligned file_offset for file extent, have %llu should be aligned to %u",
118 key->offset, sectorsize); 121 key->offset, sectorsize);
119 return -EUCLEAN; 122 return -EUCLEAN;
@@ -122,7 +125,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
122 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 125 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
123 126
124 if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { 127 if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
125 file_extent_err(fs_info, leaf, slot, 128 file_extent_err(leaf, slot,
126 "invalid type for file extent, have %u expect range [0, %u]", 129 "invalid type for file extent, have %u expect range [0, %u]",
127 btrfs_file_extent_type(leaf, fi), 130 btrfs_file_extent_type(leaf, fi),
128 BTRFS_FILE_EXTENT_TYPES); 131 BTRFS_FILE_EXTENT_TYPES);
@@ -134,14 +137,14 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
134 * and must be caught in open_ctree(). 137 * and must be caught in open_ctree().
135 */ 138 */
136 if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { 139 if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
137 file_extent_err(fs_info, leaf, slot, 140 file_extent_err(leaf, slot,
138 "invalid compression for file extent, have %u expect range [0, %u]", 141 "invalid compression for file extent, have %u expect range [0, %u]",
139 btrfs_file_extent_compression(leaf, fi), 142 btrfs_file_extent_compression(leaf, fi),
140 BTRFS_COMPRESS_TYPES); 143 BTRFS_COMPRESS_TYPES);
141 return -EUCLEAN; 144 return -EUCLEAN;
142 } 145 }
143 if (btrfs_file_extent_encryption(leaf, fi)) { 146 if (btrfs_file_extent_encryption(leaf, fi)) {
144 file_extent_err(fs_info, leaf, slot, 147 file_extent_err(leaf, slot,
145 "invalid encryption for file extent, have %u expect 0", 148 "invalid encryption for file extent, have %u expect 0",
146 btrfs_file_extent_encryption(leaf, fi)); 149 btrfs_file_extent_encryption(leaf, fi));
147 return -EUCLEAN; 150 return -EUCLEAN;
@@ -149,7 +152,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
149 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { 152 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
150 /* Inline extent must have 0 as key offset */ 153 /* Inline extent must have 0 as key offset */
151 if (key->offset) { 154 if (key->offset) {
152 file_extent_err(fs_info, leaf, slot, 155 file_extent_err(leaf, slot,
153 "invalid file_offset for inline file extent, have %llu expect 0", 156 "invalid file_offset for inline file extent, have %llu expect 0",
154 key->offset); 157 key->offset);
155 return -EUCLEAN; 158 return -EUCLEAN;
@@ -163,7 +166,7 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
163 /* Uncompressed inline extent size must match item size */ 166 /* Uncompressed inline extent size must match item size */
164 if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + 167 if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
165 btrfs_file_extent_ram_bytes(leaf, fi)) { 168 btrfs_file_extent_ram_bytes(leaf, fi)) {
166 file_extent_err(fs_info, leaf, slot, 169 file_extent_err(leaf, slot,
167 "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", 170 "invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
168 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + 171 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
169 btrfs_file_extent_ram_bytes(leaf, fi)); 172 btrfs_file_extent_ram_bytes(leaf, fi));
@@ -174,41 +177,41 @@ static int check_extent_data_item(struct btrfs_fs_info *fs_info,
174 177
175 /* Regular or preallocated extent has fixed item size */ 178 /* Regular or preallocated extent has fixed item size */
176 if (item_size != sizeof(*fi)) { 179 if (item_size != sizeof(*fi)) {
177 file_extent_err(fs_info, leaf, slot, 180 file_extent_err(leaf, slot,
178 "invalid item size for reg/prealloc file extent, have %u expect %zu", 181 "invalid item size for reg/prealloc file extent, have %u expect %zu",
179 item_size, sizeof(*fi)); 182 item_size, sizeof(*fi));
180 return -EUCLEAN; 183 return -EUCLEAN;
181 } 184 }
182 if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) || 185 if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
183 CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) || 186 CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
184 CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) || 187 CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
185 CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) || 188 CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
186 CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize)) 189 CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
187 return -EUCLEAN; 190 return -EUCLEAN;
188 return 0; 191 return 0;
189} 192}
190 193
191static int check_csum_item(struct btrfs_fs_info *fs_info, 194static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
192 struct extent_buffer *leaf, struct btrfs_key *key,
193 int slot) 195 int slot)
194{ 196{
197 struct btrfs_fs_info *fs_info = leaf->fs_info;
195 u32 sectorsize = fs_info->sectorsize; 198 u32 sectorsize = fs_info->sectorsize;
196 u32 csumsize = btrfs_super_csum_size(fs_info->super_copy); 199 u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
197 200
198 if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { 201 if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
199 generic_err(fs_info, leaf, slot, 202 generic_err(leaf, slot,
200 "invalid key objectid for csum item, have %llu expect %llu", 203 "invalid key objectid for csum item, have %llu expect %llu",
201 key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); 204 key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
202 return -EUCLEAN; 205 return -EUCLEAN;
203 } 206 }
204 if (!IS_ALIGNED(key->offset, sectorsize)) { 207 if (!IS_ALIGNED(key->offset, sectorsize)) {
205 generic_err(fs_info, leaf, slot, 208 generic_err(leaf, slot,
206 "unaligned key offset for csum item, have %llu should be aligned to %u", 209 "unaligned key offset for csum item, have %llu should be aligned to %u",
207 key->offset, sectorsize); 210 key->offset, sectorsize);
208 return -EUCLEAN; 211 return -EUCLEAN;
209 } 212 }
210 if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { 213 if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
211 generic_err(fs_info, leaf, slot, 214 generic_err(leaf, slot,
212 "unaligned item size for csum item, have %u should be aligned to %u", 215 "unaligned item size for csum item, have %u should be aligned to %u",
213 btrfs_item_size_nr(leaf, slot), csumsize); 216 btrfs_item_size_nr(leaf, slot), csumsize);
214 return -EUCLEAN; 217 return -EUCLEAN;
@@ -220,12 +223,12 @@ static int check_csum_item(struct btrfs_fs_info *fs_info,
220 * Customized reported for dir_item, only important new info is key->objectid, 223 * Customized reported for dir_item, only important new info is key->objectid,
221 * which represents inode number 224 * which represents inode number
222 */ 225 */
223__printf(4, 5) 226__printf(3, 4)
224__cold 227__cold
225static void dir_item_err(const struct btrfs_fs_info *fs_info, 228static void dir_item_err(const struct extent_buffer *eb, int slot,
226 const struct extent_buffer *eb, int slot,
227 const char *fmt, ...) 229 const char *fmt, ...)
228{ 230{
231 const struct btrfs_fs_info *fs_info = eb->fs_info;
229 struct btrfs_key key; 232 struct btrfs_key key;
230 struct va_format vaf; 233 struct va_format vaf;
231 va_list args; 234 va_list args;
@@ -244,10 +247,10 @@ static void dir_item_err(const struct btrfs_fs_info *fs_info,
244 va_end(args); 247 va_end(args);
245} 248}
246 249
247static int check_dir_item(struct btrfs_fs_info *fs_info, 250static int check_dir_item(struct extent_buffer *leaf,
248 struct extent_buffer *leaf,
249 struct btrfs_key *key, int slot) 251 struct btrfs_key *key, int slot)
250{ 252{
253 struct btrfs_fs_info *fs_info = leaf->fs_info;
251 struct btrfs_dir_item *di; 254 struct btrfs_dir_item *di;
252 u32 item_size = btrfs_item_size_nr(leaf, slot); 255 u32 item_size = btrfs_item_size_nr(leaf, slot);
253 u32 cur = 0; 256 u32 cur = 0;
@@ -263,7 +266,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
263 266
264 /* header itself should not cross item boundary */ 267 /* header itself should not cross item boundary */
265 if (cur + sizeof(*di) > item_size) { 268 if (cur + sizeof(*di) > item_size) {
266 dir_item_err(fs_info, leaf, slot, 269 dir_item_err(leaf, slot,
267 "dir item header crosses item boundary, have %zu boundary %u", 270 "dir item header crosses item boundary, have %zu boundary %u",
268 cur + sizeof(*di), item_size); 271 cur + sizeof(*di), item_size);
269 return -EUCLEAN; 272 return -EUCLEAN;
@@ -272,7 +275,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
272 /* dir type check */ 275 /* dir type check */
273 dir_type = btrfs_dir_type(leaf, di); 276 dir_type = btrfs_dir_type(leaf, di);
274 if (dir_type >= BTRFS_FT_MAX) { 277 if (dir_type >= BTRFS_FT_MAX) {
275 dir_item_err(fs_info, leaf, slot, 278 dir_item_err(leaf, slot,
276 "invalid dir item type, have %u expect [0, %u)", 279 "invalid dir item type, have %u expect [0, %u)",
277 dir_type, BTRFS_FT_MAX); 280 dir_type, BTRFS_FT_MAX);
278 return -EUCLEAN; 281 return -EUCLEAN;
@@ -280,14 +283,14 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
280 283
281 if (key->type == BTRFS_XATTR_ITEM_KEY && 284 if (key->type == BTRFS_XATTR_ITEM_KEY &&
282 dir_type != BTRFS_FT_XATTR) { 285 dir_type != BTRFS_FT_XATTR) {
283 dir_item_err(fs_info, leaf, slot, 286 dir_item_err(leaf, slot,
284 "invalid dir item type for XATTR key, have %u expect %u", 287 "invalid dir item type for XATTR key, have %u expect %u",
285 dir_type, BTRFS_FT_XATTR); 288 dir_type, BTRFS_FT_XATTR);
286 return -EUCLEAN; 289 return -EUCLEAN;
287 } 290 }
288 if (dir_type == BTRFS_FT_XATTR && 291 if (dir_type == BTRFS_FT_XATTR &&
289 key->type != BTRFS_XATTR_ITEM_KEY) { 292 key->type != BTRFS_XATTR_ITEM_KEY) {
290 dir_item_err(fs_info, leaf, slot, 293 dir_item_err(leaf, slot,
291 "xattr dir type found for non-XATTR key"); 294 "xattr dir type found for non-XATTR key");
292 return -EUCLEAN; 295 return -EUCLEAN;
293 } 296 }
@@ -300,13 +303,13 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
300 name_len = btrfs_dir_name_len(leaf, di); 303 name_len = btrfs_dir_name_len(leaf, di);
301 data_len = btrfs_dir_data_len(leaf, di); 304 data_len = btrfs_dir_data_len(leaf, di);
302 if (name_len > max_name_len) { 305 if (name_len > max_name_len) {
303 dir_item_err(fs_info, leaf, slot, 306 dir_item_err(leaf, slot,
304 "dir item name len too long, have %u max %u", 307 "dir item name len too long, have %u max %u",
305 name_len, max_name_len); 308 name_len, max_name_len);
306 return -EUCLEAN; 309 return -EUCLEAN;
307 } 310 }
308 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) { 311 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
309 dir_item_err(fs_info, leaf, slot, 312 dir_item_err(leaf, slot,
310 "dir item name and data len too long, have %u max %u", 313 "dir item name and data len too long, have %u max %u",
311 name_len + data_len, 314 name_len + data_len,
312 BTRFS_MAX_XATTR_SIZE(fs_info)); 315 BTRFS_MAX_XATTR_SIZE(fs_info));
@@ -314,7 +317,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
314 } 317 }
315 318
316 if (data_len && dir_type != BTRFS_FT_XATTR) { 319 if (data_len && dir_type != BTRFS_FT_XATTR) {
317 dir_item_err(fs_info, leaf, slot, 320 dir_item_err(leaf, slot,
318 "dir item with invalid data len, have %u expect 0", 321 "dir item with invalid data len, have %u expect 0",
319 data_len); 322 data_len);
320 return -EUCLEAN; 323 return -EUCLEAN;
@@ -324,7 +327,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
324 327
325 /* header and name/data should not cross item boundary */ 328 /* header and name/data should not cross item boundary */
326 if (cur + total_size > item_size) { 329 if (cur + total_size > item_size) {
327 dir_item_err(fs_info, leaf, slot, 330 dir_item_err(leaf, slot,
328 "dir item data crosses item boundary, have %u boundary %u", 331 "dir item data crosses item boundary, have %u boundary %u",
329 cur + total_size, item_size); 332 cur + total_size, item_size);
330 return -EUCLEAN; 333 return -EUCLEAN;
@@ -342,7 +345,7 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
342 (unsigned long)(di + 1), name_len); 345 (unsigned long)(di + 1), name_len);
343 name_hash = btrfs_name_hash(namebuf, name_len); 346 name_hash = btrfs_name_hash(namebuf, name_len);
344 if (key->offset != name_hash) { 347 if (key->offset != name_hash) {
345 dir_item_err(fs_info, leaf, slot, 348 dir_item_err(leaf, slot,
346 "name hash mismatch with key, have 0x%016x expect 0x%016llx", 349 "name hash mismatch with key, have 0x%016x expect 0x%016llx",
347 name_hash, key->offset); 350 name_hash, key->offset);
348 return -EUCLEAN; 351 return -EUCLEAN;
@@ -354,12 +357,12 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
354 return 0; 357 return 0;
355} 358}
356 359
357__printf(4, 5) 360__printf(3, 4)
358__cold 361__cold
359static void block_group_err(const struct btrfs_fs_info *fs_info, 362static void block_group_err(const struct extent_buffer *eb, int slot,
360 const struct extent_buffer *eb, int slot,
361 const char *fmt, ...) 363 const char *fmt, ...)
362{ 364{
365 const struct btrfs_fs_info *fs_info = eb->fs_info;
363 struct btrfs_key key; 366 struct btrfs_key key;
364 struct va_format vaf; 367 struct va_format vaf;
365 va_list args; 368 va_list args;
@@ -378,8 +381,7 @@ static void block_group_err(const struct btrfs_fs_info *fs_info,
378 va_end(args); 381 va_end(args);
379} 382}
380 383
381static int check_block_group_item(struct btrfs_fs_info *fs_info, 384static int check_block_group_item(struct extent_buffer *leaf,
382 struct extent_buffer *leaf,
383 struct btrfs_key *key, int slot) 385 struct btrfs_key *key, int slot)
384{ 386{
385 struct btrfs_block_group_item bgi; 387 struct btrfs_block_group_item bgi;
@@ -392,13 +394,13 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
392 * handle it. We care more about the size. 394 * handle it. We care more about the size.
393 */ 395 */
394 if (key->offset == 0) { 396 if (key->offset == 0) {
395 block_group_err(fs_info, leaf, slot, 397 block_group_err(leaf, slot,
396 "invalid block group size 0"); 398 "invalid block group size 0");
397 return -EUCLEAN; 399 return -EUCLEAN;
398 } 400 }
399 401
400 if (item_size != sizeof(bgi)) { 402 if (item_size != sizeof(bgi)) {
401 block_group_err(fs_info, leaf, slot, 403 block_group_err(leaf, slot,
402 "invalid item size, have %u expect %zu", 404 "invalid item size, have %u expect %zu",
403 item_size, sizeof(bgi)); 405 item_size, sizeof(bgi));
404 return -EUCLEAN; 406 return -EUCLEAN;
@@ -408,7 +410,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
408 sizeof(bgi)); 410 sizeof(bgi));
409 if (btrfs_block_group_chunk_objectid(&bgi) != 411 if (btrfs_block_group_chunk_objectid(&bgi) !=
410 BTRFS_FIRST_CHUNK_TREE_OBJECTID) { 412 BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
411 block_group_err(fs_info, leaf, slot, 413 block_group_err(leaf, slot,
412 "invalid block group chunk objectid, have %llu expect %llu", 414 "invalid block group chunk objectid, have %llu expect %llu",
413 btrfs_block_group_chunk_objectid(&bgi), 415 btrfs_block_group_chunk_objectid(&bgi),
414 BTRFS_FIRST_CHUNK_TREE_OBJECTID); 416 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
@@ -416,7 +418,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
416 } 418 }
417 419
418 if (btrfs_block_group_used(&bgi) > key->offset) { 420 if (btrfs_block_group_used(&bgi) > key->offset) {
419 block_group_err(fs_info, leaf, slot, 421 block_group_err(leaf, slot,
420 "invalid block group used, have %llu expect [0, %llu)", 422 "invalid block group used, have %llu expect [0, %llu)",
421 btrfs_block_group_used(&bgi), key->offset); 423 btrfs_block_group_used(&bgi), key->offset);
422 return -EUCLEAN; 424 return -EUCLEAN;
@@ -424,7 +426,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
424 426
425 flags = btrfs_block_group_flags(&bgi); 427 flags = btrfs_block_group_flags(&bgi);
426 if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) { 428 if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
427 block_group_err(fs_info, leaf, slot, 429 block_group_err(leaf, slot,
428"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", 430"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
429 flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, 431 flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
430 hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); 432 hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
@@ -437,7 +439,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
437 type != BTRFS_BLOCK_GROUP_SYSTEM && 439 type != BTRFS_BLOCK_GROUP_SYSTEM &&
438 type != (BTRFS_BLOCK_GROUP_METADATA | 440 type != (BTRFS_BLOCK_GROUP_METADATA |
439 BTRFS_BLOCK_GROUP_DATA)) { 441 BTRFS_BLOCK_GROUP_DATA)) {
440 block_group_err(fs_info, leaf, slot, 442 block_group_err(leaf, slot,
441"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", 443"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
442 type, hweight64(type), 444 type, hweight64(type),
443 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, 445 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
@@ -448,37 +450,367 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
448 return 0; 450 return 0;
449} 451}
450 452
453__printf(4, 5)
454__cold
455static void chunk_err(const struct extent_buffer *leaf,
456 const struct btrfs_chunk *chunk, u64 logical,
457 const char *fmt, ...)
458{
459 const struct btrfs_fs_info *fs_info = leaf->fs_info;
460 bool is_sb;
461 struct va_format vaf;
462 va_list args;
463 int i;
464 int slot = -1;
465
466 /* Only superblock eb is able to have such small offset */
467 is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
468
469 if (!is_sb) {
470 /*
471 * Get the slot number by iterating through all slots, this
472 * would provide better readability.
473 */
474 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
475 if (btrfs_item_ptr_offset(leaf, i) ==
476 (unsigned long)chunk) {
477 slot = i;
478 break;
479 }
480 }
481 }
482 va_start(args, fmt);
483 vaf.fmt = fmt;
484 vaf.va = &args;
485
486 if (is_sb)
487 btrfs_crit(fs_info,
488 "corrupt superblock syschunk array: chunk_start=%llu, %pV",
489 logical, &vaf);
490 else
491 btrfs_crit(fs_info,
492 "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
493 BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
494 logical, &vaf);
495 va_end(args);
496}
497
498/*
499 * The common chunk check which could also work on super block sys chunk array.
500 *
501 * Return -EUCLEAN if anything is corrupted.
502 * Return 0 if everything is OK.
503 */
504int btrfs_check_chunk_valid(struct extent_buffer *leaf,
505 struct btrfs_chunk *chunk, u64 logical)
506{
507 struct btrfs_fs_info *fs_info = leaf->fs_info;
508 u64 length;
509 u64 stripe_len;
510 u16 num_stripes;
511 u16 sub_stripes;
512 u64 type;
513 u64 features;
514 bool mixed = false;
515
516 length = btrfs_chunk_length(leaf, chunk);
517 stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
518 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
519 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
520 type = btrfs_chunk_type(leaf, chunk);
521
522 if (!num_stripes) {
523 chunk_err(leaf, chunk, logical,
524 "invalid chunk num_stripes, have %u", num_stripes);
525 return -EUCLEAN;
526 }
527 if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
528 chunk_err(leaf, chunk, logical,
529 "invalid chunk logical, have %llu should aligned to %u",
530 logical, fs_info->sectorsize);
531 return -EUCLEAN;
532 }
533 if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
534 chunk_err(leaf, chunk, logical,
535 "invalid chunk sectorsize, have %u expect %u",
536 btrfs_chunk_sector_size(leaf, chunk),
537 fs_info->sectorsize);
538 return -EUCLEAN;
539 }
540 if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
541 chunk_err(leaf, chunk, logical,
542 "invalid chunk length, have %llu", length);
543 return -EUCLEAN;
544 }
545 if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
546 chunk_err(leaf, chunk, logical,
547 "invalid chunk stripe length: %llu",
548 stripe_len);
549 return -EUCLEAN;
550 }
551 if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
552 type) {
553 chunk_err(leaf, chunk, logical,
554 "unrecognized chunk type: 0x%llx",
555 ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
556 BTRFS_BLOCK_GROUP_PROFILE_MASK) &
557 btrfs_chunk_type(leaf, chunk));
558 return -EUCLEAN;
559 }
560
561 if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
562 (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
563 chunk_err(leaf, chunk, logical,
564 "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
565 type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
566 return -EUCLEAN;
567 }
568 if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
569 chunk_err(leaf, chunk, logical,
570 "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
571 type, BTRFS_BLOCK_GROUP_TYPE_MASK);
572 return -EUCLEAN;
573 }
574
575 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
576 (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
577 chunk_err(leaf, chunk, logical,
578 "system chunk with data or metadata type: 0x%llx",
579 type);
580 return -EUCLEAN;
581 }
582
583 features = btrfs_super_incompat_flags(fs_info->super_copy);
584 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
585 mixed = true;
586
587 if (!mixed) {
588 if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
589 (type & BTRFS_BLOCK_GROUP_DATA)) {
590 chunk_err(leaf, chunk, logical,
591 "mixed chunk type in non-mixed mode: 0x%llx", type);
592 return -EUCLEAN;
593 }
594 }
595
596 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
597 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
598 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
599 (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
600 (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
601 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
602 chunk_err(leaf, chunk, logical,
603 "invalid num_stripes:sub_stripes %u:%u for profile %llu",
604 num_stripes, sub_stripes,
605 type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
606 return -EUCLEAN;
607 }
608
609 return 0;
610}
611
612__printf(3, 4)
613__cold
614static void dev_item_err(const struct extent_buffer *eb, int slot,
615 const char *fmt, ...)
616{
617 struct btrfs_key key;
618 struct va_format vaf;
619 va_list args;
620
621 btrfs_item_key_to_cpu(eb, &key, slot);
622 va_start(args, fmt);
623
624 vaf.fmt = fmt;
625 vaf.va = &args;
626
627 btrfs_crit(eb->fs_info,
628 "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
629 btrfs_header_level(eb) == 0 ? "leaf" : "node",
630 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
631 key.objectid, &vaf);
632 va_end(args);
633}
634
635static int check_dev_item(struct extent_buffer *leaf,
636 struct btrfs_key *key, int slot)
637{
638 struct btrfs_fs_info *fs_info = leaf->fs_info;
639 struct btrfs_dev_item *ditem;
640 u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
641
642 if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
643 dev_item_err(leaf, slot,
644 "invalid objectid: has=%llu expect=%llu",
645 key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
646 return -EUCLEAN;
647 }
648 if (key->offset > max_devid) {
649 dev_item_err(leaf, slot,
650 "invalid devid: has=%llu expect=[0, %llu]",
651 key->offset, max_devid);
652 return -EUCLEAN;
653 }
654 ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
655 if (btrfs_device_id(leaf, ditem) != key->offset) {
656 dev_item_err(leaf, slot,
657 "devid mismatch: key has=%llu item has=%llu",
658 key->offset, btrfs_device_id(leaf, ditem));
659 return -EUCLEAN;
660 }
661
662 /*
663 * For device total_bytes, we don't have reliable way to check it, as
664 * it can be 0 for device removal. Device size check can only be done
665 * by dev extents check.
666 */
667 if (btrfs_device_bytes_used(leaf, ditem) >
668 btrfs_device_total_bytes(leaf, ditem)) {
669 dev_item_err(leaf, slot,
670 "invalid bytes used: have %llu expect [0, %llu]",
671 btrfs_device_bytes_used(leaf, ditem),
672 btrfs_device_total_bytes(leaf, ditem));
673 return -EUCLEAN;
674 }
675 /*
676 * Remaining members like io_align/type/gen/dev_group aren't really
677 * utilized. Skip them to make later usage of them easier.
678 */
679 return 0;
680}
681
682/* Inode item error output has the same format as dir_item_err() */
683#define inode_item_err(fs_info, eb, slot, fmt, ...) \
684 dir_item_err(eb, slot, fmt, __VA_ARGS__)
685
686static int check_inode_item(struct extent_buffer *leaf,
687 struct btrfs_key *key, int slot)
688{
689 struct btrfs_fs_info *fs_info = leaf->fs_info;
690 struct btrfs_inode_item *iitem;
691 u64 super_gen = btrfs_super_generation(fs_info->super_copy);
692 u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
693 u32 mode;
694
695 if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
696 key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
697 key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
698 key->objectid != BTRFS_FREE_INO_OBJECTID) {
699 generic_err(leaf, slot,
700 "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
701 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
702 BTRFS_FIRST_FREE_OBJECTID,
703 BTRFS_LAST_FREE_OBJECTID,
704 BTRFS_FREE_INO_OBJECTID);
705 return -EUCLEAN;
706 }
707 if (key->offset != 0) {
708 inode_item_err(fs_info, leaf, slot,
709 "invalid key offset: has %llu expect 0",
710 key->offset);
711 return -EUCLEAN;
712 }
713 iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
714
715 /* Here we use super block generation + 1 to handle log tree */
716 if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
717 inode_item_err(fs_info, leaf, slot,
718 "invalid inode generation: has %llu expect (0, %llu]",
719 btrfs_inode_generation(leaf, iitem),
720 super_gen + 1);
721 return -EUCLEAN;
722 }
723 /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
724 if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
725 inode_item_err(fs_info, leaf, slot,
726 "invalid inode generation: has %llu expect [0, %llu]",
727 btrfs_inode_transid(leaf, iitem), super_gen + 1);
728 return -EUCLEAN;
729 }
730
731 /*
732 * For size and nbytes it's better not to be too strict, as for dir
733 * item its size/nbytes can easily get wrong, but doesn't affect
734 * anything in the fs. So here we skip the check.
735 */
736 mode = btrfs_inode_mode(leaf, iitem);
737 if (mode & ~valid_mask) {
738 inode_item_err(fs_info, leaf, slot,
739 "unknown mode bit detected: 0x%x",
740 mode & ~valid_mask);
741 return -EUCLEAN;
742 }
743
744 /*
745 * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
746 * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
747 * Only needs to check BLK, LNK and SOCKS
748 */
749 if (!is_power_of_2(mode & S_IFMT)) {
750 if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
751 inode_item_err(fs_info, leaf, slot,
752 "invalid mode: has 0%o expect valid S_IF* bit(s)",
753 mode & S_IFMT);
754 return -EUCLEAN;
755 }
756 }
757 if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
758 inode_item_err(fs_info, leaf, slot,
759 "invalid nlink: has %u expect no more than 1 for dir",
760 btrfs_inode_nlink(leaf, iitem));
761 return -EUCLEAN;
762 }
763 if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
764 inode_item_err(fs_info, leaf, slot,
765 "unknown flags detected: 0x%llx",
766 btrfs_inode_flags(leaf, iitem) &
767 ~BTRFS_INODE_FLAG_MASK);
768 return -EUCLEAN;
769 }
770 return 0;
771}
772
451/* 773/*
452 * Common point to switch the item-specific validation. 774 * Common point to switch the item-specific validation.
453 */ 775 */
454static int check_leaf_item(struct btrfs_fs_info *fs_info, 776static int check_leaf_item(struct extent_buffer *leaf,
455 struct extent_buffer *leaf,
456 struct btrfs_key *key, int slot) 777 struct btrfs_key *key, int slot)
457{ 778{
458 int ret = 0; 779 int ret = 0;
780 struct btrfs_chunk *chunk;
459 781
460 switch (key->type) { 782 switch (key->type) {
461 case BTRFS_EXTENT_DATA_KEY: 783 case BTRFS_EXTENT_DATA_KEY:
462 ret = check_extent_data_item(fs_info, leaf, key, slot); 784 ret = check_extent_data_item(leaf, key, slot);
463 break; 785 break;
464 case BTRFS_EXTENT_CSUM_KEY: 786 case BTRFS_EXTENT_CSUM_KEY:
465 ret = check_csum_item(fs_info, leaf, key, slot); 787 ret = check_csum_item(leaf, key, slot);
466 break; 788 break;
467 case BTRFS_DIR_ITEM_KEY: 789 case BTRFS_DIR_ITEM_KEY:
468 case BTRFS_DIR_INDEX_KEY: 790 case BTRFS_DIR_INDEX_KEY:
469 case BTRFS_XATTR_ITEM_KEY: 791 case BTRFS_XATTR_ITEM_KEY:
470 ret = check_dir_item(fs_info, leaf, key, slot); 792 ret = check_dir_item(leaf, key, slot);
471 break; 793 break;
472 case BTRFS_BLOCK_GROUP_ITEM_KEY: 794 case BTRFS_BLOCK_GROUP_ITEM_KEY:
473 ret = check_block_group_item(fs_info, leaf, key, slot); 795 ret = check_block_group_item(leaf, key, slot);
796 break;
797 case BTRFS_CHUNK_ITEM_KEY:
798 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
799 ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
800 break;
801 case BTRFS_DEV_ITEM_KEY:
802 ret = check_dev_item(leaf, key, slot);
803 break;
804 case BTRFS_INODE_ITEM_KEY:
805 ret = check_inode_item(leaf, key, slot);
474 break; 806 break;
475 } 807 }
476 return ret; 808 return ret;
477} 809}
478 810
479static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf, 811static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
480 bool check_item_data)
481{ 812{
813 struct btrfs_fs_info *fs_info = leaf->fs_info;
482 /* No valid key type is 0, so all key should be larger than this key */ 814 /* No valid key type is 0, so all key should be larger than this key */
483 struct btrfs_key prev_key = {0, 0, 0}; 815 struct btrfs_key prev_key = {0, 0, 0};
484 struct btrfs_key key; 816 struct btrfs_key key;
@@ -486,7 +818,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
486 int slot; 818 int slot;
487 819
488 if (btrfs_header_level(leaf) != 0) { 820 if (btrfs_header_level(leaf) != 0) {
489 generic_err(fs_info, leaf, 0, 821 generic_err(leaf, 0,
490 "invalid level for leaf, have %d expect 0", 822 "invalid level for leaf, have %d expect 0",
491 btrfs_header_level(leaf)); 823 btrfs_header_level(leaf));
492 return -EUCLEAN; 824 return -EUCLEAN;
@@ -502,7 +834,6 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
502 */ 834 */
503 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { 835 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
504 u64 owner = btrfs_header_owner(leaf); 836 u64 owner = btrfs_header_owner(leaf);
505 struct btrfs_root *check_root;
506 837
507 /* These trees must never be empty */ 838 /* These trees must never be empty */
508 if (owner == BTRFS_ROOT_TREE_OBJECTID || 839 if (owner == BTRFS_ROOT_TREE_OBJECTID ||
@@ -511,34 +842,11 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
511 owner == BTRFS_DEV_TREE_OBJECTID || 842 owner == BTRFS_DEV_TREE_OBJECTID ||
512 owner == BTRFS_FS_TREE_OBJECTID || 843 owner == BTRFS_FS_TREE_OBJECTID ||
513 owner == BTRFS_DATA_RELOC_TREE_OBJECTID) { 844 owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
514 generic_err(fs_info, leaf, 0, 845 generic_err(leaf, 0,
515 "invalid root, root %llu must never be empty", 846 "invalid root, root %llu must never be empty",
516 owner); 847 owner);
517 return -EUCLEAN; 848 return -EUCLEAN;
518 } 849 }
519 key.objectid = owner;
520 key.type = BTRFS_ROOT_ITEM_KEY;
521 key.offset = (u64)-1;
522
523 check_root = btrfs_get_fs_root(fs_info, &key, false);
524 /*
525 * The only reason we also check NULL here is that during
526 * open_ctree() some roots has not yet been set up.
527 */
528 if (!IS_ERR_OR_NULL(check_root)) {
529 struct extent_buffer *eb;
530
531 eb = btrfs_root_node(check_root);
532 /* if leaf is the root, then it's fine */
533 if (leaf != eb) {
534 generic_err(fs_info, leaf, 0,
535 "invalid nritems, have %u should not be 0 for non-root leaf",
536 nritems);
537 free_extent_buffer(eb);
538 return -EUCLEAN;
539 }
540 free_extent_buffer(eb);
541 }
542 return 0; 850 return 0;
543 } 851 }
544 852
@@ -564,7 +872,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
564 872
565 /* Make sure the keys are in the right order */ 873 /* Make sure the keys are in the right order */
566 if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { 874 if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
567 generic_err(fs_info, leaf, slot, 875 generic_err(leaf, slot,
568 "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", 876 "bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
569 prev_key.objectid, prev_key.type, 877 prev_key.objectid, prev_key.type,
570 prev_key.offset, key.objectid, key.type, 878 prev_key.offset, key.objectid, key.type,
@@ -583,7 +891,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
583 item_end_expected = btrfs_item_offset_nr(leaf, 891 item_end_expected = btrfs_item_offset_nr(leaf,
584 slot - 1); 892 slot - 1);
585 if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { 893 if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
586 generic_err(fs_info, leaf, slot, 894 generic_err(leaf, slot,
587 "unexpected item end, have %u expect %u", 895 "unexpected item end, have %u expect %u",
588 btrfs_item_end_nr(leaf, slot), 896 btrfs_item_end_nr(leaf, slot),
589 item_end_expected); 897 item_end_expected);
@@ -597,7 +905,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
597 */ 905 */
598 if (btrfs_item_end_nr(leaf, slot) > 906 if (btrfs_item_end_nr(leaf, slot) >
599 BTRFS_LEAF_DATA_SIZE(fs_info)) { 907 BTRFS_LEAF_DATA_SIZE(fs_info)) {
600 generic_err(fs_info, leaf, slot, 908 generic_err(leaf, slot,
601 "slot end outside of leaf, have %u expect range [0, %u]", 909 "slot end outside of leaf, have %u expect range [0, %u]",
602 btrfs_item_end_nr(leaf, slot), 910 btrfs_item_end_nr(leaf, slot),
603 BTRFS_LEAF_DATA_SIZE(fs_info)); 911 BTRFS_LEAF_DATA_SIZE(fs_info));
@@ -607,7 +915,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
607 /* Also check if the item pointer overlaps with btrfs item. */ 915 /* Also check if the item pointer overlaps with btrfs item. */
608 if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > 916 if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
609 btrfs_item_ptr_offset(leaf, slot)) { 917 btrfs_item_ptr_offset(leaf, slot)) {
610 generic_err(fs_info, leaf, slot, 918 generic_err(leaf, slot,
611 "slot overlaps with its data, item end %lu data start %lu", 919 "slot overlaps with its data, item end %lu data start %lu",
612 btrfs_item_nr_offset(slot) + 920 btrfs_item_nr_offset(slot) +
613 sizeof(struct btrfs_item), 921 sizeof(struct btrfs_item),
@@ -620,7 +928,7 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
620 * Check if the item size and content meet other 928 * Check if the item size and content meet other
621 * criteria 929 * criteria
622 */ 930 */
623 ret = check_leaf_item(fs_info, leaf, &key, slot); 931 ret = check_leaf_item(leaf, &key, slot);
624 if (ret < 0) 932 if (ret < 0)
625 return ret; 933 return ret;
626 } 934 }
@@ -633,20 +941,20 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
633 return 0; 941 return 0;
634} 942}
635 943
636int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, 944int btrfs_check_leaf_full(struct extent_buffer *leaf)
637 struct extent_buffer *leaf)
638{ 945{
639 return check_leaf(fs_info, leaf, true); 946 return check_leaf(leaf, true);
640} 947}
948ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
641 949
642int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, 950int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
643 struct extent_buffer *leaf)
644{ 951{
645 return check_leaf(fs_info, leaf, false); 952 return check_leaf(leaf, false);
646} 953}
647 954
648int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node) 955int btrfs_check_node(struct extent_buffer *node)
649{ 956{
957 struct btrfs_fs_info *fs_info = node->fs_info;
650 unsigned long nr = btrfs_header_nritems(node); 958 unsigned long nr = btrfs_header_nritems(node);
651 struct btrfs_key key, next_key; 959 struct btrfs_key key, next_key;
652 int slot; 960 int slot;
@@ -655,7 +963,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
655 int ret = 0; 963 int ret = 0;
656 964
657 if (level <= 0 || level >= BTRFS_MAX_LEVEL) { 965 if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
658 generic_err(fs_info, node, 0, 966 generic_err(node, 0,
659 "invalid level for node, have %d expect [1, %d]", 967 "invalid level for node, have %d expect [1, %d]",
660 level, BTRFS_MAX_LEVEL - 1); 968 level, BTRFS_MAX_LEVEL - 1);
661 return -EUCLEAN; 969 return -EUCLEAN;
@@ -675,13 +983,13 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
675 btrfs_node_key_to_cpu(node, &next_key, slot + 1); 983 btrfs_node_key_to_cpu(node, &next_key, slot + 1);
676 984
677 if (!bytenr) { 985 if (!bytenr) {
678 generic_err(fs_info, node, slot, 986 generic_err(node, slot,
679 "invalid NULL node pointer"); 987 "invalid NULL node pointer");
680 ret = -EUCLEAN; 988 ret = -EUCLEAN;
681 goto out; 989 goto out;
682 } 990 }
683 if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) { 991 if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
684 generic_err(fs_info, node, slot, 992 generic_err(node, slot,
685 "unaligned pointer, have %llu should be aligned to %u", 993 "unaligned pointer, have %llu should be aligned to %u",
686 bytenr, fs_info->sectorsize); 994 bytenr, fs_info->sectorsize);
687 ret = -EUCLEAN; 995 ret = -EUCLEAN;
@@ -689,7 +997,7 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
689 } 997 }
690 998
691 if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { 999 if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
692 generic_err(fs_info, node, slot, 1000 generic_err(node, slot,
693 "bad key order, current (%llu %u %llu) next (%llu %u %llu)", 1001 "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
694 key.objectid, key.type, key.offset, 1002 key.objectid, key.type, key.offset,
695 next_key.objectid, next_key.type, 1003 next_key.objectid, next_key.type,
@@ -701,3 +1009,4 @@ int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
701out: 1009out:
702 return ret; 1010 return ret;
703} 1011}
1012ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index ff043275b784..32fecc9dc1dd 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -14,15 +14,16 @@
14 * Will check not only the item pointers, but also every possible member 14 * Will check not only the item pointers, but also every possible member
15 * in item data. 15 * in item data.
16 */ 16 */
17int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info, 17int btrfs_check_leaf_full(struct extent_buffer *leaf);
18 struct extent_buffer *leaf);
19 18
20/* 19/*
21 * Less strict leaf checker. 20 * Less strict leaf checker.
22 * Will only check item pointers, not reading item data. 21 * Will only check item pointers, not reading item data.
23 */ 22 */
24int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, 23int btrfs_check_leaf_relaxed(struct extent_buffer *leaf);
25 struct extent_buffer *leaf); 24int btrfs_check_node(struct extent_buffer *node);
26int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); 25
26int btrfs_check_chunk_valid(struct extent_buffer *leaf,
27 struct btrfs_chunk *chunk, u64 logical);
27 28
28#endif 29#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 561884f60d35..6adcd8a2c5c7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -139,7 +139,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
139 mutex_lock(&root->log_mutex); 139 mutex_lock(&root->log_mutex);
140 140
141 if (root->log_root) { 141 if (root->log_root) {
142 if (btrfs_need_log_full_commit(fs_info, trans)) { 142 if (btrfs_need_log_full_commit(trans)) {
143 ret = -EAGAIN; 143 ret = -EAGAIN;
144 goto out; 144 goto out;
145 } 145 }
@@ -225,6 +225,17 @@ void btrfs_end_log_trans(struct btrfs_root *root)
225 } 225 }
226} 226}
227 227
228static int btrfs_write_tree_block(struct extent_buffer *buf)
229{
230 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
231 buf->start + buf->len - 1);
232}
233
234static void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
235{
236 filemap_fdatawait_range(buf->pages[0]->mapping,
237 buf->start, buf->start + buf->len - 1);
238}
228 239
229/* 240/*
230 * the walk control struct is used to pass state down the chain when 241 * the walk control struct is used to pass state down the chain when
@@ -304,7 +315,7 @@ static int process_one_buffer(struct btrfs_root *log,
304 315
305 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { 316 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
306 if (wc->pin && btrfs_header_level(eb) == 0) 317 if (wc->pin && btrfs_header_level(eb) == 0)
307 ret = btrfs_exclude_logged_extents(fs_info, eb); 318 ret = btrfs_exclude_logged_extents(eb);
308 if (wc->write) 319 if (wc->write)
309 btrfs_write_tree_block(eb); 320 btrfs_write_tree_block(eb);
310 if (wc->wait) 321 if (wc->wait)
@@ -333,7 +344,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
333 struct extent_buffer *eb, int slot, 344 struct extent_buffer *eb, int slot,
334 struct btrfs_key *key) 345 struct btrfs_key *key)
335{ 346{
336 struct btrfs_fs_info *fs_info = root->fs_info;
337 int ret; 347 int ret;
338 u32 item_size; 348 u32 item_size;
339 u64 saved_i_size = 0; 349 u64 saved_i_size = 0;
@@ -454,10 +464,9 @@ insert:
454 found_size = btrfs_item_size_nr(path->nodes[0], 464 found_size = btrfs_item_size_nr(path->nodes[0],
455 path->slots[0]); 465 path->slots[0]);
456 if (found_size > item_size) 466 if (found_size > item_size)
457 btrfs_truncate_item(fs_info, path, item_size, 1); 467 btrfs_truncate_item(path, item_size, 1);
458 else if (found_size < item_size) 468 else if (found_size < item_size)
459 btrfs_extend_item(fs_info, path, 469 btrfs_extend_item(path, item_size - found_size);
460 item_size - found_size);
461 } else if (ret) { 470 } else if (ret) {
462 return ret; 471 return ret;
463 } 472 }
@@ -694,9 +703,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
694 goto out; 703 goto out;
695 704
696 if (ins.objectid > 0) { 705 if (ins.objectid > 0) {
706 struct btrfs_ref ref = { 0 };
697 u64 csum_start; 707 u64 csum_start;
698 u64 csum_end; 708 u64 csum_end;
699 LIST_HEAD(ordered_sums); 709 LIST_HEAD(ordered_sums);
710
700 /* 711 /*
701 * is this extent already allocated in the extent 712 * is this extent already allocated in the extent
702 * allocation tree? If so, just add a reference 713 * allocation tree? If so, just add a reference
@@ -704,10 +715,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
704 ret = btrfs_lookup_data_extent(fs_info, ins.objectid, 715 ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
705 ins.offset); 716 ins.offset);
706 if (ret == 0) { 717 if (ret == 0) {
707 ret = btrfs_inc_extent_ref(trans, root, 718 btrfs_init_generic_ref(&ref,
708 ins.objectid, ins.offset, 719 BTRFS_ADD_DELAYED_REF,
709 0, root->root_key.objectid, 720 ins.objectid, ins.offset, 0);
721 btrfs_init_data_ref(&ref,
722 root->root_key.objectid,
710 key->objectid, offset); 723 key->objectid, offset);
724 ret = btrfs_inc_extent_ref(trans, &ref);
711 if (ret) 725 if (ret)
712 goto out; 726 goto out;
713 } else { 727 } else {
@@ -2725,7 +2739,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2725 if (trans) { 2739 if (trans) {
2726 btrfs_tree_lock(next); 2740 btrfs_tree_lock(next);
2727 btrfs_set_lock_blocking_write(next); 2741 btrfs_set_lock_blocking_write(next);
2728 clean_tree_block(fs_info, next); 2742 btrfs_clean_tree_block(next);
2729 btrfs_wait_tree_block_writeback(next); 2743 btrfs_wait_tree_block_writeback(next);
2730 btrfs_tree_unlock(next); 2744 btrfs_tree_unlock(next);
2731 } else { 2745 } else {
@@ -2809,7 +2823,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2809 if (trans) { 2823 if (trans) {
2810 btrfs_tree_lock(next); 2824 btrfs_tree_lock(next);
2811 btrfs_set_lock_blocking_write(next); 2825 btrfs_set_lock_blocking_write(next);
2812 clean_tree_block(fs_info, next); 2826 btrfs_clean_tree_block(next);
2813 btrfs_wait_tree_block_writeback(next); 2827 btrfs_wait_tree_block_writeback(next);
2814 btrfs_tree_unlock(next); 2828 btrfs_tree_unlock(next);
2815 } else { 2829 } else {
@@ -2891,7 +2905,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2891 if (trans) { 2905 if (trans) {
2892 btrfs_tree_lock(next); 2906 btrfs_tree_lock(next);
2893 btrfs_set_lock_blocking_write(next); 2907 btrfs_set_lock_blocking_write(next);
2894 clean_tree_block(fs_info, next); 2908 btrfs_clean_tree_block(next);
2895 btrfs_wait_tree_block_writeback(next); 2909 btrfs_wait_tree_block_writeback(next);
2896 btrfs_tree_unlock(next); 2910 btrfs_tree_unlock(next);
2897 } else { 2911 } else {
@@ -3066,7 +3080,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3066 } 3080 }
3067 3081
3068 /* bail out if we need to do a full commit */ 3082 /* bail out if we need to do a full commit */
3069 if (btrfs_need_log_full_commit(fs_info, trans)) { 3083 if (btrfs_need_log_full_commit(trans)) {
3070 ret = -EAGAIN; 3084 ret = -EAGAIN;
3071 mutex_unlock(&root->log_mutex); 3085 mutex_unlock(&root->log_mutex);
3072 goto out; 3086 goto out;
@@ -3085,7 +3099,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3085 if (ret) { 3099 if (ret) {
3086 blk_finish_plug(&plug); 3100 blk_finish_plug(&plug);
3087 btrfs_abort_transaction(trans, ret); 3101 btrfs_abort_transaction(trans, ret);
3088 btrfs_set_log_full_commit(fs_info, trans); 3102 btrfs_set_log_full_commit(trans);
3089 mutex_unlock(&root->log_mutex); 3103 mutex_unlock(&root->log_mutex);
3090 goto out; 3104 goto out;
3091 } 3105 }
@@ -3127,7 +3141,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3127 list_del_init(&root_log_ctx.list); 3141 list_del_init(&root_log_ctx.list);
3128 3142
3129 blk_finish_plug(&plug); 3143 blk_finish_plug(&plug);
3130 btrfs_set_log_full_commit(fs_info, trans); 3144 btrfs_set_log_full_commit(trans);
3131 3145
3132 if (ret != -ENOSPC) { 3146 if (ret != -ENOSPC) {
3133 btrfs_abort_transaction(trans, ret); 3147 btrfs_abort_transaction(trans, ret);
@@ -3173,7 +3187,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3173 * now that we've moved on to the tree of log tree roots, 3187 * now that we've moved on to the tree of log tree roots,
3174 * check the full commit flag again 3188 * check the full commit flag again
3175 */ 3189 */
3176 if (btrfs_need_log_full_commit(fs_info, trans)) { 3190 if (btrfs_need_log_full_commit(trans)) {
3177 blk_finish_plug(&plug); 3191 blk_finish_plug(&plug);
3178 btrfs_wait_tree_log_extents(log, mark); 3192 btrfs_wait_tree_log_extents(log, mark);
3179 mutex_unlock(&log_root_tree->log_mutex); 3193 mutex_unlock(&log_root_tree->log_mutex);
@@ -3186,7 +3200,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3186 EXTENT_DIRTY | EXTENT_NEW); 3200 EXTENT_DIRTY | EXTENT_NEW);
3187 blk_finish_plug(&plug); 3201 blk_finish_plug(&plug);
3188 if (ret) { 3202 if (ret) {
3189 btrfs_set_log_full_commit(fs_info, trans); 3203 btrfs_set_log_full_commit(trans);
3190 btrfs_abort_transaction(trans, ret); 3204 btrfs_abort_transaction(trans, ret);
3191 mutex_unlock(&log_root_tree->log_mutex); 3205 mutex_unlock(&log_root_tree->log_mutex);
3192 goto out_wake_log_root; 3206 goto out_wake_log_root;
@@ -3196,7 +3210,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3196 ret = btrfs_wait_tree_log_extents(log_root_tree, 3210 ret = btrfs_wait_tree_log_extents(log_root_tree,
3197 EXTENT_NEW | EXTENT_DIRTY); 3211 EXTENT_NEW | EXTENT_DIRTY);
3198 if (ret) { 3212 if (ret) {
3199 btrfs_set_log_full_commit(fs_info, trans); 3213 btrfs_set_log_full_commit(trans);
3200 mutex_unlock(&log_root_tree->log_mutex); 3214 mutex_unlock(&log_root_tree->log_mutex);
3201 goto out_wake_log_root; 3215 goto out_wake_log_root;
3202 } 3216 }
@@ -3218,7 +3232,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3218 */ 3232 */
3219 ret = write_all_supers(fs_info, 1); 3233 ret = write_all_supers(fs_info, 1);
3220 if (ret) { 3234 if (ret) {
3221 btrfs_set_log_full_commit(fs_info, trans); 3235 btrfs_set_log_full_commit(trans);
3222 btrfs_abort_transaction(trans, ret); 3236 btrfs_abort_transaction(trans, ret);
3223 goto out_wake_log_root; 3237 goto out_wake_log_root;
3224 } 3238 }
@@ -3422,7 +3436,7 @@ fail:
3422out_unlock: 3436out_unlock:
3423 mutex_unlock(&dir->log_mutex); 3437 mutex_unlock(&dir->log_mutex);
3424 if (ret == -ENOSPC) { 3438 if (ret == -ENOSPC) {
3425 btrfs_set_log_full_commit(root->fs_info, trans); 3439 btrfs_set_log_full_commit(trans);
3426 ret = 0; 3440 ret = 0;
3427 } else if (ret < 0) 3441 } else if (ret < 0)
3428 btrfs_abort_transaction(trans, ret); 3442 btrfs_abort_transaction(trans, ret);
@@ -3438,7 +3452,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
3438 const char *name, int name_len, 3452 const char *name, int name_len,
3439 struct btrfs_inode *inode, u64 dirid) 3453 struct btrfs_inode *inode, u64 dirid)
3440{ 3454{
3441 struct btrfs_fs_info *fs_info = root->fs_info;
3442 struct btrfs_root *log; 3455 struct btrfs_root *log;
3443 u64 index; 3456 u64 index;
3444 int ret; 3457 int ret;
@@ -3456,7 +3469,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
3456 dirid, &index); 3469 dirid, &index);
3457 mutex_unlock(&inode->log_mutex); 3470 mutex_unlock(&inode->log_mutex);
3458 if (ret == -ENOSPC) { 3471 if (ret == -ENOSPC) {
3459 btrfs_set_log_full_commit(fs_info, trans); 3472 btrfs_set_log_full_commit(trans);
3460 ret = 0; 3473 ret = 0;
3461 } else if (ret < 0 && ret != -ENOENT) 3474 } else if (ret < 0 && ret != -ENOENT)
3462 btrfs_abort_transaction(trans, ret); 3475 btrfs_abort_transaction(trans, ret);
@@ -5442,7 +5455,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
5442 * Make sure any commits to the log are forced to be full 5455 * Make sure any commits to the log are forced to be full
5443 * commits. 5456 * commits.
5444 */ 5457 */
5445 btrfs_set_log_full_commit(fs_info, trans); 5458 btrfs_set_log_full_commit(trans);
5446 ret = true; 5459 ret = true;
5447 } 5460 }
5448 mutex_unlock(&inode->log_mutex); 5461 mutex_unlock(&inode->log_mutex);
@@ -5819,6 +5832,190 @@ out:
5819 return ret; 5832 return ret;
5820} 5833}
5821 5834
5835static int log_new_ancestors(struct btrfs_trans_handle *trans,
5836 struct btrfs_root *root,
5837 struct btrfs_path *path,
5838 struct btrfs_log_ctx *ctx)
5839{
5840 struct btrfs_key found_key;
5841
5842 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
5843
5844 while (true) {
5845 struct btrfs_fs_info *fs_info = root->fs_info;
5846 const u64 last_committed = fs_info->last_trans_committed;
5847 struct extent_buffer *leaf = path->nodes[0];
5848 int slot = path->slots[0];
5849 struct btrfs_key search_key;
5850 struct inode *inode;
5851 int ret = 0;
5852
5853 btrfs_release_path(path);
5854
5855 search_key.objectid = found_key.offset;
5856 search_key.type = BTRFS_INODE_ITEM_KEY;
5857 search_key.offset = 0;
5858 inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
5859 if (IS_ERR(inode))
5860 return PTR_ERR(inode);
5861
5862 if (BTRFS_I(inode)->generation > last_committed)
5863 ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
5864 LOG_INODE_EXISTS,
5865 0, LLONG_MAX, ctx);
5866 iput(inode);
5867 if (ret)
5868 return ret;
5869
5870 if (search_key.objectid == BTRFS_FIRST_FREE_OBJECTID)
5871 break;
5872
5873 search_key.type = BTRFS_INODE_REF_KEY;
5874 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
5875 if (ret < 0)
5876 return ret;
5877
5878 leaf = path->nodes[0];
5879 slot = path->slots[0];
5880 if (slot >= btrfs_header_nritems(leaf)) {
5881 ret = btrfs_next_leaf(root, path);
5882 if (ret < 0)
5883 return ret;
5884 else if (ret > 0)
5885 return -ENOENT;
5886 leaf = path->nodes[0];
5887 slot = path->slots[0];
5888 }
5889
5890 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5891 if (found_key.objectid != search_key.objectid ||
5892 found_key.type != BTRFS_INODE_REF_KEY)
5893 return -ENOENT;
5894 }
5895 return 0;
5896}
5897
5898static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
5899 struct btrfs_inode *inode,
5900 struct dentry *parent,
5901 struct btrfs_log_ctx *ctx)
5902{
5903 struct btrfs_root *root = inode->root;
5904 struct btrfs_fs_info *fs_info = root->fs_info;
5905 struct dentry *old_parent = NULL;
5906 struct super_block *sb = inode->vfs_inode.i_sb;
5907 int ret = 0;
5908
5909 while (true) {
5910 if (!parent || d_really_is_negative(parent) ||
5911 sb != parent->d_sb)
5912 break;
5913
5914 inode = BTRFS_I(d_inode(parent));
5915 if (root != inode->root)
5916 break;
5917
5918 if (inode->generation > fs_info->last_trans_committed) {
5919 ret = btrfs_log_inode(trans, root, inode,
5920 LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
5921 if (ret)
5922 break;
5923 }
5924 if (IS_ROOT(parent))
5925 break;
5926
5927 parent = dget_parent(parent);
5928 dput(old_parent);
5929 old_parent = parent;
5930 }
5931 dput(old_parent);
5932
5933 return ret;
5934}
5935
5936static int log_all_new_ancestors(struct btrfs_trans_handle *trans,
5937 struct btrfs_inode *inode,
5938 struct dentry *parent,
5939 struct btrfs_log_ctx *ctx)
5940{
5941 struct btrfs_root *root = inode->root;
5942 const u64 ino = btrfs_ino(inode);
5943 struct btrfs_path *path;
5944 struct btrfs_key search_key;
5945 int ret;
5946
5947 /*
5948 * For a single hard link case, go through a fast path that does not
5949 * need to iterate the fs/subvolume tree.
5950 */
5951 if (inode->vfs_inode.i_nlink < 2)
5952 return log_new_ancestors_fast(trans, inode, parent, ctx);
5953
5954 path = btrfs_alloc_path();
5955 if (!path)
5956 return -ENOMEM;
5957
5958 search_key.objectid = ino;
5959 search_key.type = BTRFS_INODE_REF_KEY;
5960 search_key.offset = 0;
5961again:
5962 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
5963 if (ret < 0)
5964 goto out;
5965 if (ret == 0)
5966 path->slots[0]++;
5967
5968 while (true) {
5969 struct extent_buffer *leaf = path->nodes[0];
5970 int slot = path->slots[0];
5971 struct btrfs_key found_key;
5972
5973 if (slot >= btrfs_header_nritems(leaf)) {
5974 ret = btrfs_next_leaf(root, path);
5975 if (ret < 0)
5976 goto out;
5977 else if (ret > 0)
5978 break;
5979 continue;
5980 }
5981
5982 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5983 if (found_key.objectid != ino ||
5984 found_key.type > BTRFS_INODE_EXTREF_KEY)
5985 break;
5986
5987 /*
5988 * Don't deal with extended references because they are rare
5989 * cases and too complex to deal with (we would need to keep
5990 * track of which subitem we are processing for each item in
5991 * this loop, etc). So just return some error to fallback to
5992 * a transaction commit.
5993 */
5994 if (found_key.type == BTRFS_INODE_EXTREF_KEY) {
5995 ret = -EMLINK;
5996 goto out;
5997 }
5998
5999 /*
6000 * Logging ancestors needs to do more searches on the fs/subvol
6001 * tree, so it releases the path as needed to avoid deadlocks.
6002 * Keep track of the last inode ref key and resume from that key
6003 * after logging all new ancestors for the current hard link.
6004 */
6005 memcpy(&search_key, &found_key, sizeof(search_key));
6006
6007 ret = log_new_ancestors(trans, root, path, ctx);
6008 if (ret)
6009 goto out;
6010 btrfs_release_path(path);
6011 goto again;
6012 }
6013 ret = 0;
6014out:
6015 btrfs_free_path(path);
6016 return ret;
6017}
6018
5822/* 6019/*
5823 * helper function around btrfs_log_inode to make sure newly created 6020 * helper function around btrfs_log_inode to make sure newly created
5824 * parent directories also end up in the log. A minimal inode and backref 6021 * parent directories also end up in the log. A minimal inode and backref
@@ -5836,11 +6033,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
5836 struct btrfs_root *root = inode->root; 6033 struct btrfs_root *root = inode->root;
5837 struct btrfs_fs_info *fs_info = root->fs_info; 6034 struct btrfs_fs_info *fs_info = root->fs_info;
5838 struct super_block *sb; 6035 struct super_block *sb;
5839 struct dentry *old_parent = NULL;
5840 int ret = 0; 6036 int ret = 0;
5841 u64 last_committed = fs_info->last_trans_committed; 6037 u64 last_committed = fs_info->last_trans_committed;
5842 bool log_dentries = false; 6038 bool log_dentries = false;
5843 struct btrfs_inode *orig_inode = inode;
5844 6039
5845 sb = inode->vfs_inode.i_sb; 6040 sb = inode->vfs_inode.i_sb;
5846 6041
@@ -5946,56 +6141,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
5946 * and has a link count of 2. 6141 * and has a link count of 2.
5947 */ 6142 */
5948 if (inode->last_unlink_trans > last_committed) { 6143 if (inode->last_unlink_trans > last_committed) {
5949 ret = btrfs_log_all_parents(trans, orig_inode, ctx); 6144 ret = btrfs_log_all_parents(trans, inode, ctx);
5950 if (ret) 6145 if (ret)
5951 goto end_trans; 6146 goto end_trans;
5952 } 6147 }
5953 6148
5954 /* 6149 ret = log_all_new_ancestors(trans, inode, parent, ctx);
5955 * If a new hard link was added to the inode in the current transaction 6150 if (ret)
5956 * and its link count is now greater than 1, we need to fallback to a
5957 * transaction commit, otherwise we can end up not logging all its new
5958 * parents for all the hard links. Here just from the dentry used to
5959 * fsync, we can not visit the ancestor inodes for all the other hard
5960 * links to figure out if any is new, so we fallback to a transaction
5961 * commit (instead of adding a lot of complexity of scanning a btree,
5962 * since this scenario is not a common use case).
5963 */
5964 if (inode->vfs_inode.i_nlink > 1 &&
5965 inode->last_link_trans > last_committed) {
5966 ret = -EMLINK;
5967 goto end_trans; 6151 goto end_trans;
5968 }
5969 6152
5970 while (1) {
5971 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
5972 break;
5973
5974 inode = BTRFS_I(d_inode(parent));
5975 if (root != inode->root)
5976 break;
5977
5978 if (inode->generation > last_committed) {
5979 ret = btrfs_log_inode(trans, root, inode,
5980 LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
5981 if (ret)
5982 goto end_trans;
5983 }
5984 if (IS_ROOT(parent))
5985 break;
5986
5987 parent = dget_parent(parent);
5988 dput(old_parent);
5989 old_parent = parent;
5990 }
5991 if (log_dentries) 6153 if (log_dentries)
5992 ret = log_new_dir_dentries(trans, root, orig_inode, ctx); 6154 ret = log_new_dir_dentries(trans, root, inode, ctx);
5993 else 6155 else
5994 ret = 0; 6156 ret = 0;
5995end_trans: 6157end_trans:
5996 dput(old_parent);
5997 if (ret < 0) { 6158 if (ret < 0) {
5998 btrfs_set_log_full_commit(fs_info, trans); 6159 btrfs_set_log_full_commit(trans);
5999 ret = 1; 6160 ret = 1;
6000 } 6161 }
6001 6162
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 0fab84a8f670..132e43d29034 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,16 +30,14 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
30 INIT_LIST_HEAD(&ctx->list); 30 INIT_LIST_HEAD(&ctx->list);
31} 31}
32 32
33static inline void btrfs_set_log_full_commit(struct btrfs_fs_info *fs_info, 33static inline void btrfs_set_log_full_commit(struct btrfs_trans_handle *trans)
34 struct btrfs_trans_handle *trans)
35{ 34{
36 WRITE_ONCE(fs_info->last_trans_log_full_commit, trans->transid); 35 WRITE_ONCE(trans->fs_info->last_trans_log_full_commit, trans->transid);
37} 36}
38 37
39static inline int btrfs_need_log_full_commit(struct btrfs_fs_info *fs_info, 38static inline int btrfs_need_log_full_commit(struct btrfs_trans_handle *trans)
40 struct btrfs_trans_handle *trans)
41{ 39{
42 return READ_ONCE(fs_info->last_trans_log_full_commit) == 40 return READ_ONCE(trans->fs_info->last_trans_log_full_commit) ==
43 trans->transid; 41 trans->transid;
44} 42}
45 43
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 3b2ae342e649..91caab63bdf5 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -121,12 +121,12 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
121 * An item with that type already exists. 121 * An item with that type already exists.
122 * Extend the item and store the new subid at the end. 122 * Extend the item and store the new subid at the end.
123 */ 123 */
124 btrfs_extend_item(fs_info, path, sizeof(subid_le)); 124 btrfs_extend_item(path, sizeof(subid_le));
125 eb = path->nodes[0]; 125 eb = path->nodes[0];
126 slot = path->slots[0]; 126 slot = path->slots[0];
127 offset = btrfs_item_ptr_offset(eb, slot); 127 offset = btrfs_item_ptr_offset(eb, slot);
128 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); 128 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
129 } else if (ret < 0) { 129 } else {
130 btrfs_warn(fs_info, 130 btrfs_warn(fs_info,
131 "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!", 131 "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
132 ret, (unsigned long long)key.objectid, 132 ret, (unsigned long long)key.objectid,
@@ -219,7 +219,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
219 move_src = offset + sizeof(subid); 219 move_src = offset + sizeof(subid);
220 move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); 220 move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
221 memmove_extent_buffer(eb, move_dst, move_src, move_len); 221 memmove_extent_buffer(eb, move_dst, move_src, move_len);
222 btrfs_truncate_item(fs_info, path, item_size - sizeof(subid), 1); 222 btrfs_truncate_item(path, item_size - sizeof(subid), 1);
223 223
224out: 224out:
225 btrfs_free_path(path); 225 btrfs_free_path(path);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index db934ceae9c1..1c2a6e4b39da 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@
27#include "math.h" 27#include "math.h"
28#include "dev-replace.h" 28#include "dev-replace.h"
29#include "sysfs.h" 29#include "sysfs.h"
30#include "tree-checker.h"
30 31
31const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 32const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
32 [BTRFS_RAID_RAID10] = { 33 [BTRFS_RAID_RAID10] = {
@@ -184,8 +185,7 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
184out_overflow:; 185out_overflow:;
185} 186}
186 187
187static int init_first_rw_device(struct btrfs_trans_handle *trans, 188static int init_first_rw_device(struct btrfs_trans_handle *trans);
188 struct btrfs_fs_info *fs_info);
189static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); 189static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
190static void __btrfs_reset_dev_stats(struct btrfs_device *dev); 190static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
191static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); 191static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
@@ -318,7 +318,6 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
318 mutex_init(&fs_devs->device_list_mutex); 318 mutex_init(&fs_devs->device_list_mutex);
319 319
320 INIT_LIST_HEAD(&fs_devs->devices); 320 INIT_LIST_HEAD(&fs_devs->devices);
321 INIT_LIST_HEAD(&fs_devs->resized_devices);
322 INIT_LIST_HEAD(&fs_devs->alloc_list); 321 INIT_LIST_HEAD(&fs_devs->alloc_list);
323 INIT_LIST_HEAD(&fs_devs->fs_list); 322 INIT_LIST_HEAD(&fs_devs->fs_list);
324 if (fsid) 323 if (fsid)
@@ -334,7 +333,9 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
334 333
335void btrfs_free_device(struct btrfs_device *device) 334void btrfs_free_device(struct btrfs_device *device)
336{ 335{
336 WARN_ON(!list_empty(&device->post_commit_list));
337 rcu_string_free(device->name); 337 rcu_string_free(device->name);
338 extent_io_tree_release(&device->alloc_state);
338 bio_put(device->flush_bio); 339 bio_put(device->flush_bio);
339 kfree(device); 340 kfree(device);
340} 341}
@@ -402,7 +403,7 @@ static struct btrfs_device *__alloc_device(void)
402 403
403 INIT_LIST_HEAD(&dev->dev_list); 404 INIT_LIST_HEAD(&dev->dev_list);
404 INIT_LIST_HEAD(&dev->dev_alloc_list); 405 INIT_LIST_HEAD(&dev->dev_alloc_list);
405 INIT_LIST_HEAD(&dev->resized_list); 406 INIT_LIST_HEAD(&dev->post_commit_list);
406 407
407 spin_lock_init(&dev->io_lock); 408 spin_lock_init(&dev->io_lock);
408 409
@@ -411,6 +412,7 @@ static struct btrfs_device *__alloc_device(void)
411 btrfs_device_data_ordered_init(dev); 412 btrfs_device_data_ordered_init(dev);
412 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); 413 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
413 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); 414 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
415 extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
414 416
415 return dev; 417 return dev;
416} 418}
@@ -1230,14 +1232,6 @@ again:
1230 mutex_unlock(&uuid_mutex); 1232 mutex_unlock(&uuid_mutex);
1231} 1233}
1232 1234
1233static void free_device_rcu(struct rcu_head *head)
1234{
1235 struct btrfs_device *device;
1236
1237 device = container_of(head, struct btrfs_device, rcu);
1238 btrfs_free_device(device);
1239}
1240
1241static void btrfs_close_bdev(struct btrfs_device *device) 1235static void btrfs_close_bdev(struct btrfs_device *device)
1242{ 1236{
1243 if (!device->bdev) 1237 if (!device->bdev)
@@ -1285,7 +1279,8 @@ static void btrfs_close_one_device(struct btrfs_device *device)
1285 list_replace_rcu(&device->dev_list, &new_device->dev_list); 1279 list_replace_rcu(&device->dev_list, &new_device->dev_list);
1286 new_device->fs_devices = device->fs_devices; 1280 new_device->fs_devices = device->fs_devices;
1287 1281
1288 call_rcu(&device->rcu, free_device_rcu); 1282 synchronize_rcu();
1283 btrfs_free_device(device);
1289} 1284}
1290 1285
1291static int close_fs_devices(struct btrfs_fs_devices *fs_devices) 1286static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
@@ -1505,58 +1500,29 @@ error_bdev_put:
1505 return device; 1500 return device;
1506} 1501}
1507 1502
1508static int contains_pending_extent(struct btrfs_transaction *transaction, 1503/*
1509 struct btrfs_device *device, 1504 * Try to find a chunk that intersects [start, start + len] range and when one
1510 u64 *start, u64 len) 1505 * such is found, record the end of it in *start
1506 */
1507static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1508 u64 len)
1511{ 1509{
1512 struct btrfs_fs_info *fs_info = device->fs_info; 1510 u64 physical_start, physical_end;
1513 struct extent_map *em;
1514 struct list_head *search_list = &fs_info->pinned_chunks;
1515 int ret = 0;
1516 u64 physical_start = *start;
1517 1511
1518 if (transaction) 1512 lockdep_assert_held(&device->fs_info->chunk_mutex);
1519 search_list = &transaction->pending_chunks;
1520again:
1521 list_for_each_entry(em, search_list, list) {
1522 struct map_lookup *map;
1523 int i;
1524 1513
1525 map = em->map_lookup; 1514 if (!find_first_extent_bit(&device->alloc_state, *start,
1526 for (i = 0; i < map->num_stripes; i++) { 1515 &physical_start, &physical_end,
1527 u64 end; 1516 CHUNK_ALLOCATED, NULL)) {
1528 1517
1529 if (map->stripes[i].dev != device) 1518 if (in_range(physical_start, *start, len) ||
1530 continue; 1519 in_range(*start, physical_start,
1531 if (map->stripes[i].physical >= physical_start + len || 1520 physical_end - physical_start)) {
1532 map->stripes[i].physical + em->orig_block_len <= 1521 *start = physical_end + 1;
1533 physical_start) 1522 return true;
1534 continue;
1535 /*
1536 * Make sure that while processing the pinned list we do
1537 * not override our *start with a lower value, because
1538 * we can have pinned chunks that fall within this
1539 * device hole and that have lower physical addresses
1540 * than the pending chunks we processed before. If we
1541 * do not take this special care we can end up getting
1542 * 2 pending chunks that start at the same physical
1543 * device offsets because the end offset of a pinned
1544 * chunk can be equal to the start offset of some
1545 * pending chunk.
1546 */
1547 end = map->stripes[i].physical + em->orig_block_len;
1548 if (end > *start) {
1549 *start = end;
1550 ret = 1;
1551 }
1552 } 1523 }
1553 } 1524 }
1554 if (search_list != &fs_info->pinned_chunks) { 1525 return false;
1555 search_list = &fs_info->pinned_chunks;
1556 goto again;
1557 }
1558
1559 return ret;
1560} 1526}
1561 1527
1562 1528
@@ -1581,8 +1547,7 @@ again:
1581 * But if we don't find suitable free space, it is used to store the size of 1547 * But if we don't find suitable free space, it is used to store the size of
1582 * the max free space. 1548 * the max free space.
1583 */ 1549 */
1584int find_free_dev_extent_start(struct btrfs_transaction *transaction, 1550int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
1585 struct btrfs_device *device, u64 num_bytes,
1586 u64 search_start, u64 *start, u64 *len) 1551 u64 search_start, u64 *start, u64 *len)
1587{ 1552{
1588 struct btrfs_fs_info *fs_info = device->fs_info; 1553 struct btrfs_fs_info *fs_info = device->fs_info;
@@ -1667,15 +1632,12 @@ again:
1667 * Have to check before we set max_hole_start, otherwise 1632 * Have to check before we set max_hole_start, otherwise
1668 * we could end up sending back this offset anyway. 1633 * we could end up sending back this offset anyway.
1669 */ 1634 */
1670 if (contains_pending_extent(transaction, device, 1635 if (contains_pending_extent(device, &search_start,
1671 &search_start,
1672 hole_size)) { 1636 hole_size)) {
1673 if (key.offset >= search_start) { 1637 if (key.offset >= search_start)
1674 hole_size = key.offset - search_start; 1638 hole_size = key.offset - search_start;
1675 } else { 1639 else
1676 WARN_ON_ONCE(1);
1677 hole_size = 0; 1640 hole_size = 0;
1678 }
1679 } 1641 }
1680 1642
1681 if (hole_size > max_hole_size) { 1643 if (hole_size > max_hole_size) {
@@ -1716,8 +1678,7 @@ next:
1716 if (search_end > search_start) { 1678 if (search_end > search_start) {
1717 hole_size = search_end - search_start; 1679 hole_size = search_end - search_start;
1718 1680
1719 if (contains_pending_extent(transaction, device, &search_start, 1681 if (contains_pending_extent(device, &search_start, hole_size)) {
1720 hole_size)) {
1721 btrfs_release_path(path); 1682 btrfs_release_path(path);
1722 goto again; 1683 goto again;
1723 } 1684 }
@@ -1742,13 +1703,11 @@ out:
1742 return ret; 1703 return ret;
1743} 1704}
1744 1705
1745int find_free_dev_extent(struct btrfs_trans_handle *trans, 1706int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1746 struct btrfs_device *device, u64 num_bytes,
1747 u64 *start, u64 *len) 1707 u64 *start, u64 *len)
1748{ 1708{
1749 /* FIXME use last free of some kind */ 1709 /* FIXME use last free of some kind */
1750 return find_free_dev_extent_start(trans->transaction, device, 1710 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1751 num_bytes, 0, start, len);
1752} 1711}
1753 1712
1754static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, 1713static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
@@ -1982,10 +1941,9 @@ static void update_dev_time(const char *path_name)
1982 filp_close(filp, NULL); 1941 filp_close(filp, NULL);
1983} 1942}
1984 1943
1985static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info, 1944static int btrfs_rm_dev_item(struct btrfs_device *device)
1986 struct btrfs_device *device)
1987{ 1945{
1988 struct btrfs_root *root = fs_info->chunk_root; 1946 struct btrfs_root *root = device->fs_info->chunk_root;
1989 int ret; 1947 int ret;
1990 struct btrfs_path *path; 1948 struct btrfs_path *path;
1991 struct btrfs_key key; 1949 struct btrfs_key key;
@@ -2186,12 +2144,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2186 * counter although write_all_supers() is not locked out. This 2144 * counter although write_all_supers() is not locked out. This
2187 * could give a filesystem state which requires a degraded mount. 2145 * could give a filesystem state which requires a degraded mount.
2188 */ 2146 */
2189 ret = btrfs_rm_dev_item(fs_info, device); 2147 ret = btrfs_rm_dev_item(device);
2190 if (ret) 2148 if (ret)
2191 goto error_undo; 2149 goto error_undo;
2192 2150
2193 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); 2151 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2194 btrfs_scrub_cancel_dev(fs_info, device); 2152 btrfs_scrub_cancel_dev(device);
2195 2153
2196 /* 2154 /*
2197 * the device list mutex makes sure that we don't change 2155 * the device list mutex makes sure that we don't change
@@ -2242,7 +2200,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2242 btrfs_scratch_superblocks(device->bdev, device->name->str); 2200 btrfs_scratch_superblocks(device->bdev, device->name->str);
2243 2201
2244 btrfs_close_bdev(device); 2202 btrfs_close_bdev(device);
2245 call_rcu(&device->rcu, free_device_rcu); 2203 synchronize_rcu();
2204 btrfs_free_device(device);
2246 2205
2247 if (cur_devices->open_devices == 0) { 2206 if (cur_devices->open_devices == 0) {
2248 while (fs_devices) { 2207 while (fs_devices) {
@@ -2299,9 +2258,9 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2299 fs_devices->open_devices--; 2258 fs_devices->open_devices--;
2300} 2259}
2301 2260
2302void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, 2261void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2303 struct btrfs_device *srcdev)
2304{ 2262{
2263 struct btrfs_fs_info *fs_info = srcdev->fs_info;
2305 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; 2264 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2306 2265
2307 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { 2266 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
@@ -2310,7 +2269,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2310 } 2269 }
2311 2270
2312 btrfs_close_bdev(srcdev); 2271 btrfs_close_bdev(srcdev);
2313 call_rcu(&srcdev->rcu, free_device_rcu); 2272 synchronize_rcu();
2273 btrfs_free_device(srcdev);
2314 2274
2315 /* if this is no devs we rather delete the fs_devices */ 2275 /* if this is no devs we rather delete the fs_devices */
2316 if (!fs_devices->num_devices) { 2276 if (!fs_devices->num_devices) {
@@ -2368,7 +2328,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2368 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); 2328 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2369 2329
2370 btrfs_close_bdev(tgtdev); 2330 btrfs_close_bdev(tgtdev);
2371 call_rcu(&tgtdev->rcu, free_device_rcu); 2331 synchronize_rcu();
2332 btrfs_free_device(tgtdev);
2372} 2333}
2373 2334
2374static struct btrfs_device *btrfs_find_device_by_path( 2335static struct btrfs_device *btrfs_find_device_by_path(
@@ -2503,9 +2464,9 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2503/* 2464/*
2504 * Store the expected generation for seed devices in device items. 2465 * Store the expected generation for seed devices in device items.
2505 */ 2466 */
2506static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, 2467static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2507 struct btrfs_fs_info *fs_info)
2508{ 2468{
2469 struct btrfs_fs_info *fs_info = trans->fs_info;
2509 struct btrfs_root *root = fs_info->chunk_root; 2470 struct btrfs_root *root = fs_info->chunk_root;
2510 struct btrfs_path *path; 2471 struct btrfs_path *path;
2511 struct extent_buffer *leaf; 2472 struct extent_buffer *leaf;
@@ -2705,7 +2666,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2705 2666
2706 if (seeding_dev) { 2667 if (seeding_dev) {
2707 mutex_lock(&fs_info->chunk_mutex); 2668 mutex_lock(&fs_info->chunk_mutex);
2708 ret = init_first_rw_device(trans, fs_info); 2669 ret = init_first_rw_device(trans);
2709 mutex_unlock(&fs_info->chunk_mutex); 2670 mutex_unlock(&fs_info->chunk_mutex);
2710 if (ret) { 2671 if (ret) {
2711 btrfs_abort_transaction(trans, ret); 2672 btrfs_abort_transaction(trans, ret);
@@ -2722,7 +2683,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2722 if (seeding_dev) { 2683 if (seeding_dev) {
2723 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; 2684 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
2724 2685
2725 ret = btrfs_finish_sprout(trans, fs_info); 2686 ret = btrfs_finish_sprout(trans);
2726 if (ret) { 2687 if (ret) {
2727 btrfs_abort_transaction(trans, ret); 2688 btrfs_abort_transaction(trans, ret);
2728 goto error_sysfs; 2689 goto error_sysfs;
@@ -2852,7 +2813,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2852{ 2813{
2853 struct btrfs_fs_info *fs_info = device->fs_info; 2814 struct btrfs_fs_info *fs_info = device->fs_info;
2854 struct btrfs_super_block *super_copy = fs_info->super_copy; 2815 struct btrfs_super_block *super_copy = fs_info->super_copy;
2855 struct btrfs_fs_devices *fs_devices;
2856 u64 old_total; 2816 u64 old_total;
2857 u64 diff; 2817 u64 diff;
2858 2818
@@ -2871,8 +2831,6 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2871 return -EINVAL; 2831 return -EINVAL;
2872 } 2832 }
2873 2833
2874 fs_devices = fs_info->fs_devices;
2875
2876 btrfs_set_super_total_bytes(super_copy, 2834 btrfs_set_super_total_bytes(super_copy,
2877 round_down(old_total + diff, fs_info->sectorsize)); 2835 round_down(old_total + diff, fs_info->sectorsize));
2878 device->fs_devices->total_rw_bytes += diff; 2836 device->fs_devices->total_rw_bytes += diff;
@@ -2880,9 +2838,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2880 btrfs_device_set_total_bytes(device, new_size); 2838 btrfs_device_set_total_bytes(device, new_size);
2881 btrfs_device_set_disk_total_bytes(device, new_size); 2839 btrfs_device_set_disk_total_bytes(device, new_size);
2882 btrfs_clear_space_info_full(device->fs_info); 2840 btrfs_clear_space_info_full(device->fs_info);
2883 if (list_empty(&device->resized_list)) 2841 if (list_empty(&device->post_commit_list))
2884 list_add_tail(&device->resized_list, 2842 list_add_tail(&device->post_commit_list,
2885 &fs_devices->resized_devices); 2843 &trans->transaction->dev_update_list);
2886 mutex_unlock(&fs_info->chunk_mutex); 2844 mutex_unlock(&fs_info->chunk_mutex);
2887 2845
2888 return btrfs_update_device(trans, device); 2846 return btrfs_update_device(trans, device);
@@ -3601,10 +3559,10 @@ static int chunk_soft_convert_filter(u64 chunk_type,
3601 return 0; 3559 return 0;
3602} 3560}
3603 3561
3604static int should_balance_chunk(struct btrfs_fs_info *fs_info, 3562static int should_balance_chunk(struct extent_buffer *leaf,
3605 struct extent_buffer *leaf,
3606 struct btrfs_chunk *chunk, u64 chunk_offset) 3563 struct btrfs_chunk *chunk, u64 chunk_offset)
3607{ 3564{
3565 struct btrfs_fs_info *fs_info = leaf->fs_info;
3608 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3566 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3609 struct btrfs_balance_args *bargs = NULL; 3567 struct btrfs_balance_args *bargs = NULL;
3610 u64 chunk_type = btrfs_chunk_type(leaf, chunk); 3568 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
@@ -3784,8 +3742,7 @@ again:
3784 spin_unlock(&fs_info->balance_lock); 3742 spin_unlock(&fs_info->balance_lock);
3785 } 3743 }
3786 3744
3787 ret = should_balance_chunk(fs_info, leaf, chunk, 3745 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3788 found_key.offset);
3789 3746
3790 btrfs_release_path(path); 3747 btrfs_release_path(path);
3791 if (!ret) { 3748 if (!ret) {
@@ -4661,8 +4618,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4661 if (IS_ERR(trans)) 4618 if (IS_ERR(trans))
4662 return PTR_ERR(trans); 4619 return PTR_ERR(trans);
4663 4620
4664 uuid_root = btrfs_create_tree(trans, fs_info, 4621 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4665 BTRFS_UUID_TREE_OBJECTID);
4666 if (IS_ERR(uuid_root)) { 4622 if (IS_ERR(uuid_root)) {
4667 ret = PTR_ERR(uuid_root); 4623 ret = PTR_ERR(uuid_root);
4668 btrfs_abort_transaction(trans, ret); 4624 btrfs_abort_transaction(trans, ret);
@@ -4722,15 +4678,16 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4722 int slot; 4678 int slot;
4723 int failed = 0; 4679 int failed = 0;
4724 bool retried = false; 4680 bool retried = false;
4725 bool checked_pending_chunks = false;
4726 struct extent_buffer *l; 4681 struct extent_buffer *l;
4727 struct btrfs_key key; 4682 struct btrfs_key key;
4728 struct btrfs_super_block *super_copy = fs_info->super_copy; 4683 struct btrfs_super_block *super_copy = fs_info->super_copy;
4729 u64 old_total = btrfs_super_total_bytes(super_copy); 4684 u64 old_total = btrfs_super_total_bytes(super_copy);
4730 u64 old_size = btrfs_device_get_total_bytes(device); 4685 u64 old_size = btrfs_device_get_total_bytes(device);
4731 u64 diff; 4686 u64 diff;
4687 u64 start;
4732 4688
4733 new_size = round_down(new_size, fs_info->sectorsize); 4689 new_size = round_down(new_size, fs_info->sectorsize);
4690 start = new_size;
4734 diff = round_down(old_size - new_size, fs_info->sectorsize); 4691 diff = round_down(old_size - new_size, fs_info->sectorsize);
4735 4692
4736 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) 4693 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
@@ -4742,6 +4699,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4742 4699
4743 path->reada = READA_BACK; 4700 path->reada = READA_BACK;
4744 4701
4702 trans = btrfs_start_transaction(root, 0);
4703 if (IS_ERR(trans)) {
4704 btrfs_free_path(path);
4705 return PTR_ERR(trans);
4706 }
4707
4745 mutex_lock(&fs_info->chunk_mutex); 4708 mutex_lock(&fs_info->chunk_mutex);
4746 4709
4747 btrfs_device_set_total_bytes(device, new_size); 4710 btrfs_device_set_total_bytes(device, new_size);
@@ -4749,7 +4712,21 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4749 device->fs_devices->total_rw_bytes -= diff; 4712 device->fs_devices->total_rw_bytes -= diff;
4750 atomic64_sub(diff, &fs_info->free_chunk_space); 4713 atomic64_sub(diff, &fs_info->free_chunk_space);
4751 } 4714 }
4752 mutex_unlock(&fs_info->chunk_mutex); 4715
4716 /*
4717 * Once the device's size has been set to the new size, ensure all
4718 * in-memory chunks are synced to disk so that the loop below sees them
4719 * and relocates them accordingly.
4720 */
4721 if (contains_pending_extent(device, &start, diff)) {
4722 mutex_unlock(&fs_info->chunk_mutex);
4723 ret = btrfs_commit_transaction(trans);
4724 if (ret)
4725 goto done;
4726 } else {
4727 mutex_unlock(&fs_info->chunk_mutex);
4728 btrfs_end_transaction(trans);
4729 }
4753 4730
4754again: 4731again:
4755 key.objectid = device->devid; 4732 key.objectid = device->devid;
@@ -4840,40 +4817,10 @@ again:
4840 } 4817 }
4841 4818
4842 mutex_lock(&fs_info->chunk_mutex); 4819 mutex_lock(&fs_info->chunk_mutex);
4843
4844 /*
4845 * We checked in the above loop all device extents that were already in
4846 * the device tree. However before we have updated the device's
4847 * total_bytes to the new size, we might have had chunk allocations that
4848 * have not complete yet (new block groups attached to transaction
4849 * handles), and therefore their device extents were not yet in the
4850 * device tree and we missed them in the loop above. So if we have any
4851 * pending chunk using a device extent that overlaps the device range
4852 * that we can not use anymore, commit the current transaction and
4853 * repeat the search on the device tree - this way we guarantee we will
4854 * not have chunks using device extents that end beyond 'new_size'.
4855 */
4856 if (!checked_pending_chunks) {
4857 u64 start = new_size;
4858 u64 len = old_size - new_size;
4859
4860 if (contains_pending_extent(trans->transaction, device,
4861 &start, len)) {
4862 mutex_unlock(&fs_info->chunk_mutex);
4863 checked_pending_chunks = true;
4864 failed = 0;
4865 retried = false;
4866 ret = btrfs_commit_transaction(trans);
4867 if (ret)
4868 goto done;
4869 goto again;
4870 }
4871 }
4872
4873 btrfs_device_set_disk_total_bytes(device, new_size); 4820 btrfs_device_set_disk_total_bytes(device, new_size);
4874 if (list_empty(&device->resized_list)) 4821 if (list_empty(&device->post_commit_list))
4875 list_add_tail(&device->resized_list, 4822 list_add_tail(&device->post_commit_list,
4876 &fs_info->fs_devices->resized_devices); 4823 &trans->transaction->dev_update_list);
4877 4824
4878 WARN_ON(diff > old_total); 4825 WARN_ON(diff > old_total);
4879 btrfs_set_super_total_bytes(super_copy, 4826 btrfs_set_super_total_bytes(super_copy,
@@ -4957,15 +4904,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4957 btrfs_set_fs_incompat(info, RAID56); 4904 btrfs_set_fs_incompat(info, RAID56);
4958} 4905}
4959 4906
4960#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
4961 - sizeof(struct btrfs_chunk)) \
4962 / sizeof(struct btrfs_stripe) + 1)
4963
4964#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
4965 - 2 * sizeof(struct btrfs_disk_key) \
4966 - 2 * sizeof(struct btrfs_chunk)) \
4967 / sizeof(struct btrfs_stripe) + 1)
4968
4969static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 4907static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4970 u64 start, u64 type) 4908 u64 start, u64 type)
4971{ 4909{
@@ -5038,7 +4976,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5038 } else { 4976 } else {
5039 btrfs_err(info, "invalid chunk type 0x%llx requested", 4977 btrfs_err(info, "invalid chunk type 0x%llx requested",
5040 type); 4978 type);
5041 BUG_ON(1); 4979 BUG();
5042 } 4980 }
5043 4981
5044 /* We don't want a chunk larger than 10% of writable space */ 4982 /* We don't want a chunk larger than 10% of writable space */
@@ -5079,7 +5017,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5079 if (total_avail == 0) 5017 if (total_avail == 0)
5080 continue; 5018 continue;
5081 5019
5082 ret = find_free_dev_extent(trans, device, 5020 ret = find_free_dev_extent(device,
5083 max_stripe_size * dev_stripes, 5021 max_stripe_size * dev_stripes,
5084 &dev_offset, &max_avail); 5022 &dev_offset, &max_avail);
5085 if (ret && ret != -ENOSPC) 5023 if (ret && ret != -ENOSPC)
@@ -5213,18 +5151,20 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5213 free_extent_map(em); 5151 free_extent_map(em);
5214 goto error; 5152 goto error;
5215 } 5153 }
5216
5217 list_add_tail(&em->list, &trans->transaction->pending_chunks);
5218 refcount_inc(&em->refs);
5219 write_unlock(&em_tree->lock); 5154 write_unlock(&em_tree->lock);
5220 5155
5221 ret = btrfs_make_block_group(trans, 0, type, start, chunk_size); 5156 ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
5222 if (ret) 5157 if (ret)
5223 goto error_del_extent; 5158 goto error_del_extent;
5224 5159
5225 for (i = 0; i < map->num_stripes; i++) 5160 for (i = 0; i < map->num_stripes; i++) {
5226 btrfs_device_set_bytes_used(map->stripes[i].dev, 5161 struct btrfs_device *dev = map->stripes[i].dev;
5227 map->stripes[i].dev->bytes_used + stripe_size); 5162
5163 btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
5164 if (list_empty(&dev->post_commit_list))
5165 list_add_tail(&dev->post_commit_list,
5166 &trans->transaction->dev_update_list);
5167 }
5228 5168
5229 atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space); 5169 atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
5230 5170
@@ -5243,8 +5183,6 @@ error_del_extent:
5243 free_extent_map(em); 5183 free_extent_map(em);
5244 /* One for the tree reference */ 5184 /* One for the tree reference */
5245 free_extent_map(em); 5185 free_extent_map(em);
5246 /* One for the pending_chunks list reference */
5247 free_extent_map(em);
5248error: 5186error:
5249 kfree(devices_info); 5187 kfree(devices_info);
5250 return ret; 5188 return ret;
@@ -5364,9 +5302,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5364 return __btrfs_alloc_chunk(trans, chunk_offset, type); 5302 return __btrfs_alloc_chunk(trans, chunk_offset, type);
5365} 5303}
5366 5304
5367static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, 5305static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5368 struct btrfs_fs_info *fs_info)
5369{ 5306{
5307 struct btrfs_fs_info *fs_info = trans->fs_info;
5370 u64 chunk_offset; 5308 u64 chunk_offset;
5371 u64 sys_chunk_offset; 5309 u64 sys_chunk_offset;
5372 u64 alloc_profile; 5310 u64 alloc_profile;
@@ -6714,99 +6652,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6714 return dev; 6652 return dev;
6715} 6653}
6716 6654
6717/* Return -EIO if any error, otherwise return 0. */
6718static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
6719 struct extent_buffer *leaf,
6720 struct btrfs_chunk *chunk, u64 logical)
6721{
6722 u64 length;
6723 u64 stripe_len;
6724 u16 num_stripes;
6725 u16 sub_stripes;
6726 u64 type;
6727 u64 features;
6728 bool mixed = false;
6729
6730 length = btrfs_chunk_length(leaf, chunk);
6731 stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6732 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6733 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6734 type = btrfs_chunk_type(leaf, chunk);
6735
6736 if (!num_stripes) {
6737 btrfs_err(fs_info, "invalid chunk num_stripes: %u",
6738 num_stripes);
6739 return -EIO;
6740 }
6741 if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
6742 btrfs_err(fs_info, "invalid chunk logical %llu", logical);
6743 return -EIO;
6744 }
6745 if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
6746 btrfs_err(fs_info, "invalid chunk sectorsize %u",
6747 btrfs_chunk_sector_size(leaf, chunk));
6748 return -EIO;
6749 }
6750 if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
6751 btrfs_err(fs_info, "invalid chunk length %llu", length);
6752 return -EIO;
6753 }
6754 if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
6755 btrfs_err(fs_info, "invalid chunk stripe length: %llu",
6756 stripe_len);
6757 return -EIO;
6758 }
6759 if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6760 type) {
6761 btrfs_err(fs_info, "unrecognized chunk type: %llu",
6762 ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
6763 BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6764 btrfs_chunk_type(leaf, chunk));
6765 return -EIO;
6766 }
6767
6768 if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
6769 btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
6770 return -EIO;
6771 }
6772
6773 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
6774 (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
6775 btrfs_err(fs_info,
6776 "system chunk with data or metadata type: 0x%llx", type);
6777 return -EIO;
6778 }
6779
6780 features = btrfs_super_incompat_flags(fs_info->super_copy);
6781 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
6782 mixed = true;
6783
6784 if (!mixed) {
6785 if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
6786 (type & BTRFS_BLOCK_GROUP_DATA)) {
6787 btrfs_err(fs_info,
6788 "mixed chunk type in non-mixed mode: 0x%llx", type);
6789 return -EIO;
6790 }
6791 }
6792
6793 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
6794 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
6795 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
6796 (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
6797 (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
6798 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
6799 num_stripes != 1)) {
6800 btrfs_err(fs_info,
6801 "invalid num_stripes:sub_stripes %u:%u for profile %llu",
6802 num_stripes, sub_stripes,
6803 type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
6804 return -EIO;
6805 }
6806
6807 return 0;
6808}
6809
6810static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, 6655static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6811 u64 devid, u8 *uuid, bool error) 6656 u64 devid, u8 *uuid, bool error)
6812{ 6657{
@@ -6818,10 +6663,30 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6818 devid, uuid); 6663 devid, uuid);
6819} 6664}
6820 6665
6821static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, 6666static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6822 struct extent_buffer *leaf, 6667{
6668 int index = btrfs_bg_flags_to_raid_index(type);
6669 int ncopies = btrfs_raid_array[index].ncopies;
6670 int data_stripes;
6671
6672 switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6673 case BTRFS_BLOCK_GROUP_RAID5:
6674 data_stripes = num_stripes - 1;
6675 break;
6676 case BTRFS_BLOCK_GROUP_RAID6:
6677 data_stripes = num_stripes - 2;
6678 break;
6679 default:
6680 data_stripes = num_stripes / ncopies;
6681 break;
6682 }
6683 return div_u64(chunk_len, data_stripes);
6684}
6685
6686static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6823 struct btrfs_chunk *chunk) 6687 struct btrfs_chunk *chunk)
6824{ 6688{
6689 struct btrfs_fs_info *fs_info = leaf->fs_info;
6825 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 6690 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
6826 struct map_lookup *map; 6691 struct map_lookup *map;
6827 struct extent_map *em; 6692 struct extent_map *em;
@@ -6837,9 +6702,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
6837 length = btrfs_chunk_length(leaf, chunk); 6702 length = btrfs_chunk_length(leaf, chunk);
6838 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 6703 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6839 6704
6840 ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); 6705 /*
6841 if (ret) 6706 * Only need to verify chunk item if we're reading from sys chunk array,
6842 return ret; 6707 * as chunk item in tree block is already verified by tree-checker.
6708 */
6709 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6710 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6711 if (ret)
6712 return ret;
6713 }
6843 6714
6844 read_lock(&map_tree->map_tree.lock); 6715 read_lock(&map_tree->map_tree.lock);
6845 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); 6716 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6877,6 +6748,8 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
6877 map->type = btrfs_chunk_type(leaf, chunk); 6748 map->type = btrfs_chunk_type(leaf, chunk);
6878 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 6749 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6879 map->verified_stripes = 0; 6750 map->verified_stripes = 0;
6751 em->orig_block_len = calc_stripe_length(map->type, em->len,
6752 map->num_stripes);
6880 for (i = 0; i < num_stripes; i++) { 6753 for (i = 0; i < num_stripes; i++) {
6881 map->stripes[i].physical = 6754 map->stripes[i].physical =
6882 btrfs_stripe_offset_nr(leaf, chunk, i); 6755 btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -7001,10 +6874,10 @@ out:
7001 return fs_devices; 6874 return fs_devices;
7002} 6875}
7003 6876
7004static int read_one_dev(struct btrfs_fs_info *fs_info, 6877static int read_one_dev(struct extent_buffer *leaf,
7005 struct extent_buffer *leaf,
7006 struct btrfs_dev_item *dev_item) 6878 struct btrfs_dev_item *dev_item)
7007{ 6879{
6880 struct btrfs_fs_info *fs_info = leaf->fs_info;
7008 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 6881 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7009 struct btrfs_device *device; 6882 struct btrfs_device *device;
7010 u64 devid; 6883 u64 devid;
@@ -7193,7 +7066,7 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7193 if (cur_offset + len > array_size) 7066 if (cur_offset + len > array_size)
7194 goto out_short_read; 7067 goto out_short_read;
7195 7068
7196 ret = read_one_chunk(fs_info, &key, sb, chunk); 7069 ret = read_one_chunk(&key, sb, chunk);
7197 if (ret) 7070 if (ret)
7198 break; 7071 break;
7199 } else { 7072 } else {
@@ -7334,14 +7207,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7334 struct btrfs_dev_item *dev_item; 7207 struct btrfs_dev_item *dev_item;
7335 dev_item = btrfs_item_ptr(leaf, slot, 7208 dev_item = btrfs_item_ptr(leaf, slot,
7336 struct btrfs_dev_item); 7209 struct btrfs_dev_item);
7337 ret = read_one_dev(fs_info, leaf, dev_item); 7210 ret = read_one_dev(leaf, dev_item);
7338 if (ret) 7211 if (ret)
7339 goto error; 7212 goto error;
7340 total_dev++; 7213 total_dev++;
7341 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 7214 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7342 struct btrfs_chunk *chunk; 7215 struct btrfs_chunk *chunk;
7343 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 7216 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7344 ret = read_one_chunk(fs_info, &found_key, leaf, chunk); 7217 ret = read_one_chunk(&found_key, leaf, chunk);
7345 if (ret) 7218 if (ret)
7346 goto error; 7219 goto error;
7347 } 7220 }
@@ -7530,9 +7403,9 @@ out:
7530/* 7403/*
7531 * called from commit_transaction. Writes all changed device stats to disk. 7404 * called from commit_transaction. Writes all changed device stats to disk.
7532 */ 7405 */
7533int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, 7406int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7534 struct btrfs_fs_info *fs_info)
7535{ 7407{
7408 struct btrfs_fs_info *fs_info = trans->fs_info;
7536 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 7409 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7537 struct btrfs_device *device; 7410 struct btrfs_device *device;
7538 int stats_cnt; 7411 int stats_cnt;
@@ -7674,51 +7547,34 @@ void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat
7674} 7547}
7675 7548
7676/* 7549/*
7677 * Update the size of all devices, which is used for writing out the 7550 * Update the size and bytes used for each device where it changed. This is
7678 * super blocks. 7551 * delayed since we would otherwise get errors while writing out the
7552 * superblocks.
7553 *
7554 * Must be invoked during transaction commit.
7679 */ 7555 */
7680void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) 7556void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7681{ 7557{
7682 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7683 struct btrfs_device *curr, *next; 7558 struct btrfs_device *curr, *next;
7684 7559
7685 if (list_empty(&fs_devices->resized_devices)) 7560 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7686 return;
7687
7688 mutex_lock(&fs_devices->device_list_mutex);
7689 mutex_lock(&fs_info->chunk_mutex);
7690 list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
7691 resized_list) {
7692 list_del_init(&curr->resized_list);
7693 curr->commit_total_bytes = curr->disk_total_bytes;
7694 }
7695 mutex_unlock(&fs_info->chunk_mutex);
7696 mutex_unlock(&fs_devices->device_list_mutex);
7697}
7698 7561
7699/* Must be invoked during the transaction commit */ 7562 if (list_empty(&trans->dev_update_list))
7700void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
7701{
7702 struct btrfs_fs_info *fs_info = trans->fs_info;
7703 struct extent_map *em;
7704 struct map_lookup *map;
7705 struct btrfs_device *dev;
7706 int i;
7707
7708 if (list_empty(&trans->pending_chunks))
7709 return; 7563 return;
7710 7564
7711 /* In order to kick the device replace finish process */ 7565 /*
7712 mutex_lock(&fs_info->chunk_mutex); 7566 * We don't need the device_list_mutex here. This list is owned by the
7713 list_for_each_entry(em, &trans->pending_chunks, list) { 7567 * transaction and the transaction must complete before the device is
7714 map = em->map_lookup; 7568 * released.
7715 7569 */
7716 for (i = 0; i < map->num_stripes; i++) { 7570 mutex_lock(&trans->fs_info->chunk_mutex);
7717 dev = map->stripes[i].dev; 7571 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7718 dev->commit_bytes_used = dev->bytes_used; 7572 post_commit_list) {
7719 } 7573 list_del_init(&curr->post_commit_list);
7574 curr->commit_total_bytes = curr->disk_total_bytes;
7575 curr->commit_bytes_used = curr->bytes_used;
7720 } 7576 }
7721 mutex_unlock(&fs_info->chunk_mutex); 7577 mutex_unlock(&trans->fs_info->chunk_mutex);
7722} 7578}
7723 7579
7724void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info) 7580void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
@@ -7751,25 +7607,6 @@ int btrfs_bg_type_to_factor(u64 flags)
7751} 7607}
7752 7608
7753 7609
7754static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
7755{
7756 int index = btrfs_bg_flags_to_raid_index(type);
7757 int ncopies = btrfs_raid_array[index].ncopies;
7758 int data_stripes;
7759
7760 switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
7761 case BTRFS_BLOCK_GROUP_RAID5:
7762 data_stripes = num_stripes - 1;
7763 break;
7764 case BTRFS_BLOCK_GROUP_RAID6:
7765 data_stripes = num_stripes - 2;
7766 break;
7767 default:
7768 data_stripes = num_stripes / ncopies;
7769 break;
7770 }
7771 return div_u64(chunk_len, data_stripes);
7772}
7773 7610
7774static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, 7611static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7775 u64 chunk_offset, u64 devid, 7612 u64 chunk_offset, u64 devid,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3ad9d58d1b66..b8a0e8d0672d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -45,6 +45,7 @@ struct btrfs_pending_bios {
45struct btrfs_device { 45struct btrfs_device {
46 struct list_head dev_list; 46 struct list_head dev_list;
47 struct list_head dev_alloc_list; 47 struct list_head dev_alloc_list;
48 struct list_head post_commit_list; /* chunk mutex */
48 struct btrfs_fs_devices *fs_devices; 49 struct btrfs_fs_devices *fs_devices;
49 struct btrfs_fs_info *fs_info; 50 struct btrfs_fs_info *fs_info;
50 51
@@ -102,18 +103,12 @@ struct btrfs_device {
102 * size of the device on the current transaction 103 * size of the device on the current transaction
103 * 104 *
104 * This variant is update when committing the transaction, 105 * This variant is update when committing the transaction,
105 * and protected by device_list_mutex 106 * and protected by chunk mutex
106 */ 107 */
107 u64 commit_total_bytes; 108 u64 commit_total_bytes;
108 109
109 /* bytes used on the current transaction */ 110 /* bytes used on the current transaction */
110 u64 commit_bytes_used; 111 u64 commit_bytes_used;
111 /*
112 * used to manage the device which is resized
113 *
114 * It is protected by chunk_lock.
115 */
116 struct list_head resized_list;
117 112
118 /* for sending down flush barriers */ 113 /* for sending down flush barriers */
119 struct bio *flush_bio; 114 struct bio *flush_bio;
@@ -123,7 +118,6 @@ struct btrfs_device {
123 struct scrub_ctx *scrub_ctx; 118 struct scrub_ctx *scrub_ctx;
124 119
125 struct btrfs_work work; 120 struct btrfs_work work;
126 struct rcu_head rcu;
127 121
128 /* readahead state */ 122 /* readahead state */
129 atomic_t reada_in_flight; 123 atomic_t reada_in_flight;
@@ -139,6 +133,8 @@ struct btrfs_device {
139 /* Counter to record the change of device stats */ 133 /* Counter to record the change of device stats */
140 atomic_t dev_stats_ccnt; 134 atomic_t dev_stats_ccnt;
141 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; 135 atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
136
137 struct extent_io_tree alloc_state;
142}; 138};
143 139
144/* 140/*
@@ -235,7 +231,6 @@ struct btrfs_fs_devices {
235 struct mutex device_list_mutex; 231 struct mutex device_list_mutex;
236 struct list_head devices; 232 struct list_head devices;
237 233
238 struct list_head resized_devices;
239 /* devices not currently being allocated */ 234 /* devices not currently being allocated */
240 struct list_head alloc_list; 235 struct list_head alloc_list;
241 236
@@ -258,6 +253,15 @@ struct btrfs_fs_devices {
258 253
259#define BTRFS_BIO_INLINE_CSUM_SIZE 64 254#define BTRFS_BIO_INLINE_CSUM_SIZE 64
260 255
256#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
257 - sizeof(struct btrfs_chunk)) \
258 / sizeof(struct btrfs_stripe) + 1)
259
260#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
261 - 2 * sizeof(struct btrfs_disk_key) \
262 - 2 * sizeof(struct btrfs_chunk)) \
263 / sizeof(struct btrfs_stripe) + 1)
264
261/* 265/*
262 * we need the mirror number and stripe index to be passed around 266 * we need the mirror number and stripe index to be passed around
263 * the call chain while we are processing end_io (especially errors). 267 * the call chain while we are processing end_io (especially errors).
@@ -449,22 +453,18 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
449int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); 453int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
450int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); 454int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
451int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); 455int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
452int find_free_dev_extent_start(struct btrfs_transaction *transaction, 456int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
453 struct btrfs_device *device, u64 num_bytes, 457 u64 search_start, u64 *start, u64 *max_avail);
454 u64 search_start, u64 *start, u64 *max_avail); 458int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
455int find_free_dev_extent(struct btrfs_trans_handle *trans,
456 struct btrfs_device *device, u64 num_bytes,
457 u64 *start, u64 *max_avail); 459 u64 *start, u64 *max_avail);
458void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 460void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
459int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, 461int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
460 struct btrfs_ioctl_get_dev_stats *stats); 462 struct btrfs_ioctl_get_dev_stats *stats);
461void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); 463void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
462int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 464int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
463int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, 465int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
464 struct btrfs_fs_info *fs_info);
465void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); 466void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
466void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, 467void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
467 struct btrfs_device *srcdev);
468void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev); 468void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
469void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 469void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
470int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 470int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
@@ -558,8 +558,7 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
558 558
559const char *get_raid_name(enum btrfs_raid_types type); 559const char *get_raid_name(enum btrfs_raid_types type);
560 560
561void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); 561void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
562void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
563 562
564struct list_head *btrfs_get_fs_uuids(void); 563struct list_head *btrfs_get_fs_uuids(void);
565void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); 564void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f141b45ce349..78b6ba2029e8 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -76,9 +76,8 @@ out:
76 return ret; 76 return ret;
77} 77}
78 78
79static int do_setxattr(struct btrfs_trans_handle *trans, 79int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
80 struct inode *inode, const char *name, 80 const char *name, const void *value, size_t size, int flags)
81 const void *value, size_t size, int flags)
82{ 81{
83 struct btrfs_dir_item *di = NULL; 82 struct btrfs_dir_item *di = NULL;
84 struct btrfs_root *root = BTRFS_I(inode)->root; 83 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -87,6 +86,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
87 size_t name_len = strlen(name); 86 size_t name_len = strlen(name);
88 int ret = 0; 87 int ret = 0;
89 88
89 ASSERT(trans);
90
90 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root->fs_info)) 91 if (name_len + size > BTRFS_MAX_XATTR_SIZE(root->fs_info))
91 return -ENOSPC; 92 return -ENOSPC;
92 93
@@ -174,7 +175,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
174 char *ptr; 175 char *ptr;
175 176
176 if (size > old_data_len) { 177 if (size > old_data_len) {
177 if (btrfs_leaf_free_space(fs_info, leaf) < 178 if (btrfs_leaf_free_space(leaf) <
178 (size - old_data_len)) { 179 (size - old_data_len)) {
179 ret = -ENOSPC; 180 ret = -ENOSPC;
180 goto out; 181 goto out;
@@ -184,17 +185,15 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
184 if (old_data_len + name_len + sizeof(*di) == item_size) { 185 if (old_data_len + name_len + sizeof(*di) == item_size) {
185 /* No other xattrs packed in the same leaf item. */ 186 /* No other xattrs packed in the same leaf item. */
186 if (size > old_data_len) 187 if (size > old_data_len)
187 btrfs_extend_item(fs_info, path, 188 btrfs_extend_item(path, size - old_data_len);
188 size - old_data_len);
189 else if (size < old_data_len) 189 else if (size < old_data_len)
190 btrfs_truncate_item(fs_info, path, 190 btrfs_truncate_item(path, data_size, 1);
191 data_size, 1);
192 } else { 191 } else {
193 /* There are other xattrs packed in the same item. */ 192 /* There are other xattrs packed in the same item. */
194 ret = btrfs_delete_one_dir_name(trans, root, path, di); 193 ret = btrfs_delete_one_dir_name(trans, root, path, di);
195 if (ret) 194 if (ret)
196 goto out; 195 goto out;
197 btrfs_extend_item(fs_info, path, data_size); 196 btrfs_extend_item(path, data_size);
198 } 197 }
199 198
200 item = btrfs_item_nr(slot); 199 item = btrfs_item_nr(slot);
@@ -220,24 +219,18 @@ out:
220/* 219/*
221 * @value: "" makes the attribute to empty, NULL removes it 220 * @value: "" makes the attribute to empty, NULL removes it
222 */ 221 */
223int btrfs_setxattr(struct btrfs_trans_handle *trans, 222int btrfs_setxattr_trans(struct inode *inode, const char *name,
224 struct inode *inode, const char *name, 223 const void *value, size_t size, int flags)
225 const void *value, size_t size, int flags)
226{ 224{
227 struct btrfs_root *root = BTRFS_I(inode)->root; 225 struct btrfs_root *root = BTRFS_I(inode)->root;
226 struct btrfs_trans_handle *trans;
228 int ret; 227 int ret;
229 228
230 if (btrfs_root_readonly(root))
231 return -EROFS;
232
233 if (trans)
234 return do_setxattr(trans, inode, name, value, size, flags);
235
236 trans = btrfs_start_transaction(root, 2); 229 trans = btrfs_start_transaction(root, 2);
237 if (IS_ERR(trans)) 230 if (IS_ERR(trans))
238 return PTR_ERR(trans); 231 return PTR_ERR(trans);
239 232
240 ret = do_setxattr(trans, inode, name, value, size, flags); 233 ret = btrfs_setxattr(trans, inode, name, value, size, flags);
241 if (ret) 234 if (ret)
242 goto out; 235 goto out;
243 236
@@ -370,7 +363,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
370 size_t size, int flags) 363 size_t size, int flags)
371{ 364{
372 name = xattr_full_name(handler, name); 365 name = xattr_full_name(handler, name);
373 return btrfs_setxattr(NULL, inode, name, buffer, size, flags); 366 return btrfs_setxattr_trans(inode, name, buffer, size, flags);
374} 367}
375 368
376static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, 369static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
@@ -378,8 +371,32 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
378 const char *name, const void *value, 371 const char *name, const void *value,
379 size_t size, int flags) 372 size_t size, int flags)
380{ 373{
374 int ret;
375 struct btrfs_trans_handle *trans;
376 struct btrfs_root *root = BTRFS_I(inode)->root;
377
381 name = xattr_full_name(handler, name); 378 name = xattr_full_name(handler, name);
382 return btrfs_set_prop(inode, name, value, size, flags); 379 ret = btrfs_validate_prop(name, value, size);
380 if (ret)
381 return ret;
382
383 trans = btrfs_start_transaction(root, 2);
384 if (IS_ERR(trans))
385 return PTR_ERR(trans);
386
387 ret = btrfs_set_prop(trans, inode, name, value, size, flags);
388 if (!ret) {
389 inode_inc_iversion(inode);
390 inode->i_ctime = current_time(inode);
391 set_bit(BTRFS_INODE_COPY_EVERYTHING,
392 &BTRFS_I(inode)->runtime_flags);
393 ret = btrfs_update_inode(trans, root, inode);
394 BUG_ON(ret);
395 }
396
397 btrfs_end_transaction(trans);
398
399 return ret;
383} 400}
384 401
385static const struct xattr_handler btrfs_security_xattr_handler = { 402static const struct xattr_handler btrfs_security_xattr_handler = {
@@ -419,10 +436,10 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
419}; 436};
420 437
421static int btrfs_initxattrs(struct inode *inode, 438static int btrfs_initxattrs(struct inode *inode,
422 const struct xattr *xattr_array, void *fs_info) 439 const struct xattr *xattr_array, void *fs_private)
423{ 440{
441 struct btrfs_trans_handle *trans = fs_private;
424 const struct xattr *xattr; 442 const struct xattr *xattr;
425 struct btrfs_trans_handle *trans = fs_info;
426 unsigned int nofs_flag; 443 unsigned int nofs_flag;
427 char *name; 444 char *name;
428 int err = 0; 445 int err = 0;
@@ -442,7 +459,7 @@ static int btrfs_initxattrs(struct inode *inode,
442 strcpy(name, XATTR_SECURITY_PREFIX); 459 strcpy(name, XATTR_SECURITY_PREFIX);
443 strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); 460 strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
444 err = btrfs_setxattr(trans, inode, name, xattr->value, 461 err = btrfs_setxattr(trans, inode, name, xattr->value,
445 xattr->value_len, 0); 462 xattr->value_len, 0);
446 kfree(name); 463 kfree(name);
447 if (err < 0) 464 if (err < 0)
448 break; 465 break;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 471fcac6ff55..1cd3fc0a8f17 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -12,9 +12,10 @@ extern const struct xattr_handler *btrfs_xattr_handlers[];
12 12
13int btrfs_getxattr(struct inode *inode, const char *name, 13int btrfs_getxattr(struct inode *inode, const char *name,
14 void *buffer, size_t size); 14 void *buffer, size_t size);
15int btrfs_setxattr(struct btrfs_trans_handle *trans, 15int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
16 struct inode *inode, const char *name, 16 const char *name, const void *value, size_t size, int flags);
17 const void *value, size_t size, int flags); 17int btrfs_setxattr_trans(struct inode *inode, const char *name,
18 const void *value, size_t size, int flags);
18ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 19ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
19 20
20int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 21int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 6b9e29d050f3..a6ff07cf11d5 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -90,6 +90,9 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
90 return container_of(list, struct workspace, list); 90 return container_of(list, struct workspace, list);
91} 91}
92 92
93static void zstd_free_workspace(struct list_head *ws);
94static struct list_head *zstd_alloc_workspace(unsigned int level);
95
93/* 96/*
94 * zstd_reclaim_timer_fn - reclaim timer 97 * zstd_reclaim_timer_fn - reclaim timer
95 * @t: timer 98 * @t: timer
@@ -124,7 +127,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
124 level = victim->level; 127 level = victim->level;
125 list_del(&victim->lru_list); 128 list_del(&victim->lru_list);
126 list_del(&victim->list); 129 list_del(&victim->list);
127 wsm.ops->free_workspace(&victim->list); 130 zstd_free_workspace(&victim->list);
128 131
129 if (list_empty(&wsm.idle_ws[level - 1])) 132 if (list_empty(&wsm.idle_ws[level - 1]))
130 clear_bit(level - 1, &wsm.active_map); 133 clear_bit(level - 1, &wsm.active_map);
@@ -180,7 +183,7 @@ static void zstd_init_workspace_manager(void)
180 for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) 183 for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
181 INIT_LIST_HEAD(&wsm.idle_ws[i]); 184 INIT_LIST_HEAD(&wsm.idle_ws[i]);
182 185
183 ws = wsm.ops->alloc_workspace(ZSTD_BTRFS_MAX_LEVEL); 186 ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
184 if (IS_ERR(ws)) { 187 if (IS_ERR(ws)) {
185 pr_warn( 188 pr_warn(
186 "BTRFS: cannot preallocate zstd compression workspace\n"); 189 "BTRFS: cannot preallocate zstd compression workspace\n");
@@ -202,7 +205,7 @@ static void zstd_cleanup_workspace_manager(void)
202 struct workspace, list); 205 struct workspace, list);
203 list_del(&workspace->list); 206 list_del(&workspace->list);
204 list_del(&workspace->lru_list); 207 list_del(&workspace->lru_list);
205 wsm.ops->free_workspace(&workspace->list); 208 zstd_free_workspace(&workspace->list);
206 } 209 }
207 } 210 }
208 spin_unlock(&wsm.lock); 211 spin_unlock(&wsm.lock);
@@ -272,7 +275,7 @@ again:
272 return ws; 275 return ws;
273 276
274 nofs_flag = memalloc_nofs_save(); 277 nofs_flag = memalloc_nofs_save();
275 ws = wsm.ops->alloc_workspace(level); 278 ws = zstd_alloc_workspace(level);
276 memalloc_nofs_restore(nofs_flag); 279 memalloc_nofs_restore(nofs_flag);
277 280
278 if (IS_ERR(ws)) { 281 if (IS_ERR(ws)) {