aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 00:58:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 00:58:53 -0400
commita1a4f841ec4585185c0e75bfae43a18b282dd316 (patch)
tree5dd4fbe1f841be3fdfb5df011e46f570a2f9bc76
parent575b94386bd539a7d803aee9fd4a8d275844c40f (diff)
parent39379faaad79e3cf403a6904a08676b7850043ae (diff)
Merge tag 'for-4.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mostly fixes and cleanups, nothing big, though the notable thing is the inserted/deleted lines delta -1124. User visible changes: - allow defrag on opened read-only files that have rw permissions; similar to what dedupe will allow on such files Core changes: - tree checker improvements, reported by fuzzing: * more checks for: block group items, essential trees * chunk type validation * mount time cross-checks that physical and logical chunks match * switch more error codes to EUCLEAN aka EFSCORRUPTED Fixes: - fsync corner case fixes - fix send failure when root has deleted files still open - send, fix incorrect file layout after hole punching beyond eof - fix races between mount and deice scan ioctl, found by fuzzing - fix deadlock when delayed iput is called from writeback on the same inode; rare but has been observed in practice, also removes code - fix pinned byte accounting, using the right percpu helpers; this should avoid some write IO inefficiency during low space conditions - don't remove block group that still has pinned bytes - reset on-disk device stats value after replace, otherwise this would report stale values for the new device Cleanups: - time64_t/timespec64 cleanups - remove remaining dead code in scrub handling NOCOW extents after disabling it in previous cycle - simplify fsync regarding ordered extents logic and remove all the related code - remove redundant arguments in order to reduce stack space consumption - remove support for V0 type of extents, not in use since 2.6.30 - remove several unused structure members - fewer indirect function calls by inlining some callbacks - qgroup rescan timing fixes - vfs: iget cleanups" * tag 'for-4.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (182 commits) btrfs: revert fs_devices state on error of btrfs_init_new_device btrfs: Exit gracefully when chunk map cannot be inserted to the tree btrfs: Introduce mount time chunk <-> dev extent mapping check btrfs: Verify that every chunk has corresponding block group at mount time btrfs: Check that each block group has corresponding chunk at mount time Btrfs: send, fix incorrect file layout after hole punching beyond eof btrfs: Use wrapper macro for rcu string to remove duplicate code btrfs: simplify btrfs_iget btrfs: lift make_bad_inode into btrfs_iget btrfs: simplify IS_ERR/PTR_ERR checks btrfs: btrfs_iget never returns an is_bad_inode inode btrfs: replace: Reset on-disk dev stats value after replace btrfs: extent-tree: Remove unused __btrfs_free_block_rsv btrfs: backref: Use ERR_CAST to return error code btrfs: Remove redundant btrfs_release_path from btrfs_unlink_subvol btrfs: Remove root parameter from btrfs_unlink_subvol btrfs: Remove fs_info from btrfs_add_root_ref btrfs: Remove fs_info from btrfs_del_root_ref btrfs: Remove fs_info from btrfs_del_root btrfs: Remove fs_info from btrfs_delete_delayed_dir_index ...
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.c53
-rw-r--r--fs/btrfs/ctree.h89
-rw-r--r--fs/btrfs/delayed-inode.c14
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c43
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dev-replace.c29
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/disk-io.c113
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c883
-rw-r--r--fs/btrfs/extent_io.c156
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c128
-rw-r--r--fs/btrfs/free-space-cache.c19
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode-map.c12
-rw-r--r--fs/btrfs/inode.c161
-rw-r--r--fs/btrfs/ioctl.c69
-rw-r--r--fs/btrfs/ordered-data.c138
-rw-r--r--fs/btrfs/ordered-data.h23
-rw-r--r--fs/btrfs/print-tree.c39
-rw-r--r--fs/btrfs/qgroup.c270
-rw-r--r--fs/btrfs/qgroup.h46
-rw-r--r--fs/btrfs/raid56.c109
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/relocation.c216
-rw-r--r--fs/btrfs/root-tree.c22
-rw-r--r--fs/btrfs/scrub.c679
-rw-r--r--fs/btrfs/send.c172
-rw-r--r--fs/btrfs/struct-funcs.c1
-rw-r--r--fs/btrfs/super.c115
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c24
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-checker.c115
-rw-r--r--fs/btrfs/tree-log.c270
-rw-r--r--fs/btrfs/volumes.c611
-rw-r--r--fs/btrfs/volumes.h31
-rw-r--r--include/trace/events/btrfs.h3
47 files changed, 1817 insertions, 2941 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15e1dfef56a5..3b66c957ea6f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -30,23 +30,22 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
30 name = XATTR_NAME_POSIX_ACL_DEFAULT; 30 name = XATTR_NAME_POSIX_ACL_DEFAULT;
31 break; 31 break;
32 default: 32 default:
33 BUG(); 33 return ERR_PTR(-EINVAL);
34 } 34 }
35 35
36 size = btrfs_getxattr(inode, name, "", 0); 36 size = btrfs_getxattr(inode, name, NULL, 0);
37 if (size > 0) { 37 if (size > 0) {
38 value = kzalloc(size, GFP_KERNEL); 38 value = kzalloc(size, GFP_KERNEL);
39 if (!value) 39 if (!value)
40 return ERR_PTR(-ENOMEM); 40 return ERR_PTR(-ENOMEM);
41 size = btrfs_getxattr(inode, name, value, size); 41 size = btrfs_getxattr(inode, name, value, size);
42 } 42 }
43 if (size > 0) { 43 if (size > 0)
44 acl = posix_acl_from_xattr(&init_user_ns, value, size); 44 acl = posix_acl_from_xattr(&init_user_ns, value, size);
45 } else if (size == -ERANGE || size == -ENODATA || size == 0) { 45 else if (size == -ENODATA || size == 0)
46 acl = NULL; 46 acl = NULL;
47 } else { 47 else
48 acl = ERR_PTR(-EIO); 48 acl = ERR_PTR(size);
49 }
50 kfree(value); 49 kfree(value);
51 50
52 return acl; 51 return acl;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0a8e2e29a66b..ae750b1574a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -925,7 +925,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
925 type = btrfs_get_extent_inline_ref_type(leaf, iref, 925 type = btrfs_get_extent_inline_ref_type(leaf, iref,
926 BTRFS_REF_TYPE_ANY); 926 BTRFS_REF_TYPE_ANY);
927 if (type == BTRFS_REF_TYPE_INVALID) 927 if (type == BTRFS_REF_TYPE_INVALID)
928 return -EINVAL; 928 return -EUCLEAN;
929 929
930 offset = btrfs_extent_inline_ref_offset(leaf, iref); 930 offset = btrfs_extent_inline_ref_offset(leaf, iref);
931 931
@@ -1793,7 +1793,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
1793 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 1793 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
1794 BTRFS_REF_TYPE_ANY); 1794 BTRFS_REF_TYPE_ANY);
1795 if (*out_type == BTRFS_REF_TYPE_INVALID) 1795 if (*out_type == BTRFS_REF_TYPE_INVALID)
1796 return -EINVAL; 1796 return -EUCLEAN;
1797 1797
1798 *ptr += btrfs_extent_inline_ref_size(*out_type); 1798 *ptr += btrfs_extent_inline_ref_size(*out_type);
1799 WARN_ON(*ptr > end); 1799 WARN_ON(*ptr > end);
@@ -2225,7 +2225,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
2225 2225
2226 fspath = init_data_container(total_bytes); 2226 fspath = init_data_container(total_bytes);
2227 if (IS_ERR(fspath)) 2227 if (IS_ERR(fspath))
2228 return (void *)fspath; 2228 return ERR_CAST(fspath);
2229 2229
2230 ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); 2230 ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
2231 if (!ifp) { 2231 if (!ifp) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7e075343daa5..1343ac57b438 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -178,7 +178,7 @@ struct btrfs_inode {
178 struct btrfs_delayed_node *delayed_node; 178 struct btrfs_delayed_node *delayed_node;
179 179
180 /* File creation time. */ 180 /* File creation time. */
181 struct timespec i_otime; 181 struct timespec64 i_otime;
182 182
183 /* Hook into fs_info->delayed_iputs */ 183 /* Hook into fs_info->delayed_iputs */
184 struct list_head delayed_iput; 184 struct list_head delayed_iput;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index a3fdb4fe967d..833cf3c35b4d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1539,7 +1539,12 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1539 } 1539 }
1540 1540
1541 device = multi->stripes[0].dev; 1541 device = multi->stripes[0].dev;
1542 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev); 1542 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
1543 !device->bdev || !device->name)
1544 block_ctx_out->dev = NULL;
1545 else
1546 block_ctx_out->dev = btrfsic_dev_state_lookup(
1547 device->bdev->bd_dev);
1543 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1548 block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1544 block_ctx_out->start = bytenr; 1549 block_ctx_out->start = bytenr;
1545 block_ctx_out->len = len; 1550 block_ctx_out->len = len;
@@ -1624,7 +1629,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1624 bio = btrfs_io_bio_alloc(num_pages - i); 1629 bio = btrfs_io_bio_alloc(num_pages - i);
1625 bio_set_dev(bio, block_ctx->dev->bdev); 1630 bio_set_dev(bio, block_ctx->dev->bdev);
1626 bio->bi_iter.bi_sector = dev_bytenr >> 9; 1631 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1627 bio_set_op_attrs(bio, REQ_OP_READ, 0); 1632 bio->bi_opf = REQ_OP_READ;
1628 1633
1629 for (j = i; j < num_pages; j++) { 1634 for (j = i; j < num_pages; j++) {
1630 ret = bio_add_page(bio, block_ctx->pagev[j], 1635 ret = bio_add_page(bio, block_ctx->pagev[j],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3e447b45bf7..9bfa66592aa7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -5,7 +5,6 @@
5 5
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/bio.h> 7#include <linux/bio.h>
8#include <linux/buffer_head.h>
9#include <linux/file.h> 8#include <linux/file.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/pagemap.h> 10#include <linux/pagemap.h>
@@ -14,10 +13,7 @@
14#include <linux/init.h> 13#include <linux/init.h>
15#include <linux/string.h> 14#include <linux/string.h>
16#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
17#include <linux/mpage.h>
18#include <linux/swap.h>
19#include <linux/writeback.h> 16#include <linux/writeback.h>
20#include <linux/bit_spinlock.h>
21#include <linux/slab.h> 17#include <linux/slab.h>
22#include <linux/sched/mm.h> 18#include <linux/sched/mm.h>
23#include <linux/log2.h> 19#include <linux/log2.h>
@@ -303,7 +299,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
303 struct bio *bio = NULL; 299 struct bio *bio = NULL;
304 struct compressed_bio *cb; 300 struct compressed_bio *cb;
305 unsigned long bytes_left; 301 unsigned long bytes_left;
306 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
307 int pg_index = 0; 302 int pg_index = 0;
308 struct page *page; 303 struct page *page;
309 u64 first_byte = disk_start; 304 u64 first_byte = disk_start;
@@ -342,9 +337,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
342 page = compressed_pages[pg_index]; 337 page = compressed_pages[pg_index];
343 page->mapping = inode->i_mapping; 338 page->mapping = inode->i_mapping;
344 if (bio->bi_iter.bi_size) 339 if (bio->bi_iter.bi_size)
345 submit = io_tree->ops->merge_bio_hook(page, 0, 340 submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0);
346 PAGE_SIZE,
347 bio, 0);
348 341
349 page->mapping = NULL; 342 page->mapping = NULL;
350 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < 343 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
@@ -613,7 +606,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
613 cb->len = bio->bi_iter.bi_size; 606 cb->len = bio->bi_iter.bi_size;
614 607
615 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); 608 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
616 bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); 609 comp_bio->bi_opf = REQ_OP_READ;
617 comp_bio->bi_private = cb; 610 comp_bio->bi_private = cb;
618 comp_bio->bi_end_io = end_compressed_bio_read; 611 comp_bio->bi_end_io = end_compressed_bio_read;
619 refcount_set(&cb->pending_bios, 1); 612 refcount_set(&cb->pending_bios, 1);
@@ -626,9 +619,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
626 page->index = em_start >> PAGE_SHIFT; 619 page->index = em_start >> PAGE_SHIFT;
627 620
628 if (comp_bio->bi_iter.bi_size) 621 if (comp_bio->bi_iter.bi_size)
629 submit = tree->ops->merge_bio_hook(page, 0, 622 submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE,
630 PAGE_SIZE, 623 comp_bio, 0);
631 comp_bio, 0);
632 624
633 page->mapping = NULL; 625 page->mapping = NULL;
634 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < 626 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
@@ -660,7 +652,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
660 } 652 }
661 653
662 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); 654 comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
663 bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); 655 comp_bio->bi_opf = REQ_OP_READ;
664 comp_bio->bi_private = cb; 656 comp_bio->bi_private = cb;
665 comp_bio->bi_end_io = end_compressed_bio_read; 657 comp_bio->bi_end_io = end_compressed_bio_read;
666 658
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bc326df472e..d436fb4c002e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -888,11 +888,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
888 btrfs_root_last_snapshot(&root->root_item) || 888 btrfs_root_last_snapshot(&root->root_item) ||
889 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 889 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
890 return 1; 890 return 1;
891#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 891
892 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
893 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
894 return 1;
895#endif
896 return 0; 892 return 0;
897} 893}
898 894
@@ -3128,8 +3124,7 @@ again:
3128 * higher levels 3124 * higher levels
3129 * 3125 *
3130 */ 3126 */
3131static void fixup_low_keys(struct btrfs_fs_info *fs_info, 3127static void fixup_low_keys(struct btrfs_path *path,
3132 struct btrfs_path *path,
3133 struct btrfs_disk_key *key, int level) 3128 struct btrfs_disk_key *key, int level)
3134{ 3129{
3135 int i; 3130 int i;
@@ -3181,7 +3176,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3181 btrfs_set_item_key(eb, &disk_key, slot); 3176 btrfs_set_item_key(eb, &disk_key, slot);
3182 btrfs_mark_buffer_dirty(eb); 3177 btrfs_mark_buffer_dirty(eb);
3183 if (slot == 0) 3178 if (slot == 0)
3184 fixup_low_keys(fs_info, path, &disk_key, 1); 3179 fixup_low_keys(path, &disk_key, 1);
3185} 3180}
3186 3181
3187/* 3182/*
@@ -3359,17 +3354,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3359 3354
3360 root_add_used(root, fs_info->nodesize); 3355 root_add_used(root, fs_info->nodesize);
3361 3356
3362 memzero_extent_buffer(c, 0, sizeof(struct btrfs_header));
3363 btrfs_set_header_nritems(c, 1); 3357 btrfs_set_header_nritems(c, 1);
3364 btrfs_set_header_level(c, level);
3365 btrfs_set_header_bytenr(c, c->start);
3366 btrfs_set_header_generation(c, trans->transid);
3367 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
3368 btrfs_set_header_owner(c, root->root_key.objectid);
3369
3370 write_extent_buffer_fsid(c, fs_info->fsid);
3371 write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid);
3372
3373 btrfs_set_node_key(c, &lower_key, 0); 3358 btrfs_set_node_key(c, &lower_key, 0);
3374 btrfs_set_node_blockptr(c, 0, lower->start); 3359 btrfs_set_node_blockptr(c, 0, lower->start);
3375 lower_gen = btrfs_header_generation(lower); 3360 lower_gen = btrfs_header_generation(lower);
@@ -3498,15 +3483,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3498 return PTR_ERR(split); 3483 return PTR_ERR(split);
3499 3484
3500 root_add_used(root, fs_info->nodesize); 3485 root_add_used(root, fs_info->nodesize);
3501 3486 ASSERT(btrfs_header_level(c) == level);
3502 memzero_extent_buffer(split, 0, sizeof(struct btrfs_header));
3503 btrfs_set_header_level(split, btrfs_header_level(c));
3504 btrfs_set_header_bytenr(split, split->start);
3505 btrfs_set_header_generation(split, trans->transid);
3506 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
3507 btrfs_set_header_owner(split, root->root_key.objectid);
3508 write_extent_buffer_fsid(split, fs_info->fsid);
3509 write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid);
3510 3487
3511 ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid); 3488 ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
3512 if (ret) { 3489 if (ret) {
@@ -3945,7 +3922,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
3945 clean_tree_block(fs_info, right); 3922 clean_tree_block(fs_info, right);
3946 3923
3947 btrfs_item_key(right, &disk_key, 0); 3924 btrfs_item_key(right, &disk_key, 0);
3948 fixup_low_keys(fs_info, path, &disk_key, 1); 3925 fixup_low_keys(path, &disk_key, 1);
3949 3926
3950 /* then fixup the leaf pointer in the path */ 3927 /* then fixup the leaf pointer in the path */
3951 if (path->slots[0] < push_items) { 3928 if (path->slots[0] < push_items) {
@@ -4292,15 +4269,6 @@ again:
4292 4269
4293 root_add_used(root, fs_info->nodesize); 4270 root_add_used(root, fs_info->nodesize);
4294 4271
4295 memzero_extent_buffer(right, 0, sizeof(struct btrfs_header));
4296 btrfs_set_header_bytenr(right, right->start);
4297 btrfs_set_header_generation(right, trans->transid);
4298 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
4299 btrfs_set_header_owner(right, root->root_key.objectid);
4300 btrfs_set_header_level(right, 0);
4301 write_extent_buffer_fsid(right, fs_info->fsid);
4302 write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid);
4303
4304 if (split == 0) { 4272 if (split == 0) {
4305 if (mid <= slot) { 4273 if (mid <= slot) {
4306 btrfs_set_header_nritems(right, 0); 4274 btrfs_set_header_nritems(right, 0);
@@ -4320,7 +4288,7 @@ again:
4320 path->nodes[0] = right; 4288 path->nodes[0] = right;
4321 path->slots[0] = 0; 4289 path->slots[0] = 0;
4322 if (path->slots[1] == 0) 4290 if (path->slots[1] == 0)
4323 fixup_low_keys(fs_info, path, &disk_key, 1); 4291 fixup_low_keys(path, &disk_key, 1);
4324 } 4292 }
4325 /* 4293 /*
4326 * We create a new leaf 'right' for the required ins_len and 4294 * We create a new leaf 'right' for the required ins_len and
@@ -4642,7 +4610,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
4642 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4610 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4643 btrfs_set_item_key(leaf, &disk_key, slot); 4611 btrfs_set_item_key(leaf, &disk_key, slot);
4644 if (slot == 0) 4612 if (slot == 0)
4645 fixup_low_keys(fs_info, path, &disk_key, 1); 4613 fixup_low_keys(path, &disk_key, 1);
4646 } 4614 }
4647 4615
4648 item = btrfs_item_nr(slot); 4616 item = btrfs_item_nr(slot);
@@ -4744,7 +4712,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4744 4712
4745 if (path->slots[0] == 0) { 4713 if (path->slots[0] == 0) {
4746 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4714 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4747 fixup_low_keys(fs_info, path, &disk_key, 1); 4715 fixup_low_keys(path, &disk_key, 1);
4748 } 4716 }
4749 btrfs_unlock_up_safe(path, 1); 4717 btrfs_unlock_up_safe(path, 1);
4750 4718
@@ -4886,7 +4854,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4886static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 4854static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4887 int level, int slot) 4855 int level, int slot)
4888{ 4856{
4889 struct btrfs_fs_info *fs_info = root->fs_info;
4890 struct extent_buffer *parent = path->nodes[level]; 4857 struct extent_buffer *parent = path->nodes[level];
4891 u32 nritems; 4858 u32 nritems;
4892 int ret; 4859 int ret;
@@ -4919,7 +4886,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4919 struct btrfs_disk_key disk_key; 4886 struct btrfs_disk_key disk_key;
4920 4887
4921 btrfs_node_key(parent, &disk_key, 0); 4888 btrfs_node_key(parent, &disk_key, 0);
4922 fixup_low_keys(fs_info, path, &disk_key, level + 1); 4889 fixup_low_keys(path, &disk_key, level + 1);
4923 } 4890 }
4924 btrfs_mark_buffer_dirty(parent); 4891 btrfs_mark_buffer_dirty(parent);
4925} 4892}
@@ -5022,7 +4989,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5022 struct btrfs_disk_key disk_key; 4989 struct btrfs_disk_key disk_key;
5023 4990
5024 btrfs_item_key(leaf, &disk_key, 0); 4991 btrfs_item_key(leaf, &disk_key, 0);
5025 fixup_low_keys(fs_info, path, &disk_key, 1); 4992 fixup_low_keys(path, &disk_key, 1);
5026 } 4993 }
5027 4994
5028 /* delete the leaf if it is mostly empty */ 4995 /* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..318be7864072 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -55,8 +55,6 @@ struct btrfs_ordered_sum;
55 55
56#define BTRFS_OLDEST_GENERATION 0ULL 56#define BTRFS_OLDEST_GENERATION 0ULL
57 57
58#define BTRFS_COMPAT_EXTENT_TREE_V0
59
60/* 58/*
61 * the max metadata block size. This limit is somewhat artificial, 59 * the max metadata block size. This limit is somewhat artificial,
62 * but the memmove costs go through the roof for larger blocks. 60 * but the memmove costs go through the roof for larger blocks.
@@ -86,6 +84,14 @@ static const int btrfs_csum_sizes[] = { 4 };
86 84
87#define BTRFS_DIRTY_METADATA_THRESH SZ_32M 85#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
88 86
87/*
88 * Use large batch size to reduce overhead of metadata updates. On the reader
89 * side, we only read it when we are close to ENOSPC and the read overhead is
90 * mostly related to the number of CPUs, so it is OK to use arbitrary large
91 * value here.
92 */
93#define BTRFS_TOTAL_BYTES_PINNED_BATCH SZ_128M
94
89#define BTRFS_MAX_EXTENT_SIZE SZ_128M 95#define BTRFS_MAX_EXTENT_SIZE SZ_128M
90 96
91 97
@@ -342,8 +348,8 @@ struct btrfs_path {
342 sizeof(struct btrfs_item)) 348 sizeof(struct btrfs_item))
343struct btrfs_dev_replace { 349struct btrfs_dev_replace {
344 u64 replace_state; /* see #define above */ 350 u64 replace_state; /* see #define above */
345 u64 time_started; /* seconds since 1-Jan-1970 */ 351 time64_t time_started; /* seconds since 1-Jan-1970 */
346 u64 time_stopped; /* seconds since 1-Jan-1970 */ 352 time64_t time_stopped; /* seconds since 1-Jan-1970 */
347 atomic64_t num_write_errors; 353 atomic64_t num_write_errors;
348 atomic64_t num_uncorrectable_read_errors; 354 atomic64_t num_uncorrectable_read_errors;
349 355
@@ -359,8 +365,6 @@ struct btrfs_dev_replace {
359 struct btrfs_device *srcdev; 365 struct btrfs_device *srcdev;
360 struct btrfs_device *tgtdev; 366 struct btrfs_device *tgtdev;
361 367
362 pid_t lock_owner;
363 atomic_t nesting_level;
364 struct mutex lock_finishing_cancel_unmount; 368 struct mutex lock_finishing_cancel_unmount;
365 rwlock_t lock; 369 rwlock_t lock;
366 atomic_t read_locks; 370 atomic_t read_locks;
@@ -1213,7 +1217,6 @@ struct btrfs_root {
1213 u64 defrag_trans_start; 1217 u64 defrag_trans_start;
1214 struct btrfs_key defrag_progress; 1218 struct btrfs_key defrag_progress;
1215 struct btrfs_key defrag_max; 1219 struct btrfs_key defrag_max;
1216 char *name;
1217 1220
1218 /* the dirty list is only used by non-reference counted roots */ 1221 /* the dirty list is only used by non-reference counted roots */
1219 struct list_head dirty_list; 1222 struct list_head dirty_list;
@@ -2428,32 +2431,6 @@ static inline u32 btrfs_file_extent_inline_item_len(
2428 return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; 2431 return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
2429} 2432}
2430 2433
2431/* this returns the number of file bytes represented by the inline item.
2432 * If an item is compressed, this is the uncompressed size
2433 */
2434static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
2435 int slot,
2436 const struct btrfs_file_extent_item *fi)
2437{
2438 struct btrfs_map_token token;
2439
2440 btrfs_init_map_token(&token);
2441 /*
2442 * return the space used on disk if this item isn't
2443 * compressed or encoded
2444 */
2445 if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
2446 btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
2447 btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
2448 return btrfs_file_extent_inline_item_len(eb,
2449 btrfs_item_nr(slot));
2450 }
2451
2452 /* otherwise use the ram bytes field */
2453 return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
2454}
2455
2456
2457/* btrfs_dev_stats_item */ 2434/* btrfs_dev_stats_item */
2458static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, 2435static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
2459 const struct btrfs_dev_stats_item *ptr, 2436 const struct btrfs_dev_stats_item *ptr,
@@ -2676,7 +2653,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
2676 u64 offset, u64 ram_bytes, 2653 u64 offset, u64 ram_bytes,
2677 struct btrfs_key *ins); 2654 struct btrfs_key *ins);
2678int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 2655int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
2679 struct btrfs_fs_info *fs_info,
2680 u64 root_objectid, u64 owner, u64 offset, 2656 u64 root_objectid, u64 owner, u64 offset,
2681 struct btrfs_key *ins); 2657 struct btrfs_key *ins);
2682int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, 2658int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
@@ -2716,15 +2692,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info);
2716int btrfs_read_block_groups(struct btrfs_fs_info *info); 2692int btrfs_read_block_groups(struct btrfs_fs_info *info);
2717int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); 2693int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
2718int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2694int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2719 struct btrfs_fs_info *fs_info, u64 bytes_used, 2695 u64 bytes_used, u64 type, u64 chunk_offset,
2720 u64 type, u64 chunk_offset, u64 size); 2696 u64 size);
2721void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); 2697void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
2722struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( 2698struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
2723 struct btrfs_fs_info *fs_info, 2699 struct btrfs_fs_info *fs_info,
2724 const u64 chunk_offset); 2700 const u64 chunk_offset);
2725int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 2701int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
2726 struct btrfs_fs_info *fs_info, u64 group_start, 2702 u64 group_start, struct extent_map *em);
2727 struct extent_map *em);
2728void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); 2703void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
2729void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); 2704void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
2730void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); 2705void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
@@ -2786,7 +2761,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
2786 unsigned short type); 2761 unsigned short type);
2787void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, 2762void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
2788 struct btrfs_block_rsv *rsv); 2763 struct btrfs_block_rsv *rsv);
2789void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
2790int btrfs_block_rsv_add(struct btrfs_root *root, 2764int btrfs_block_rsv_add(struct btrfs_root *root,
2791 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 2765 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
2792 enum btrfs_reserve_flush_enum flush); 2766 enum btrfs_reserve_flush_enum flush);
@@ -2803,8 +2777,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
2803void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, 2777void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
2804 struct btrfs_block_rsv *block_rsv, 2778 struct btrfs_block_rsv *block_rsv,
2805 u64 num_bytes); 2779 u64 num_bytes);
2806int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, 2780int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
2807 struct btrfs_block_group_cache *cache);
2808void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache); 2781void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
2809void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2782void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2810u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2783u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@ -2812,8 +2785,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
2812 u64 start, u64 end); 2785 u64 start, u64 end);
2813int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, 2786int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2814 u64 num_bytes, u64 *actual_bytes); 2787 u64 num_bytes, u64 *actual_bytes);
2815int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 2788int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
2816 struct btrfs_fs_info *fs_info, u64 type);
2817int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); 2789int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
2818 2790
2819int btrfs_init_space_info(struct btrfs_fs_info *fs_info); 2791int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
@@ -2822,10 +2794,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2822int btrfs_start_write_no_snapshotting(struct btrfs_root *root); 2794int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
2823void btrfs_end_write_no_snapshotting(struct btrfs_root *root); 2795void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
2824void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); 2796void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
2825void check_system_chunk(struct btrfs_trans_handle *trans, 2797void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
2826 struct btrfs_fs_info *fs_info, const u64 type);
2827u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 2798u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
2828 u64 start, u64 end); 2799 u64 start, u64 end);
2800void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
2829 2801
2830/* ctree.c */ 2802/* ctree.c */
2831int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, 2803int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3011,16 +2983,14 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3011int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 2983int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3012 2984
3013/* root-item.c */ 2985/* root-item.c */
3014int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 2986int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
3015 struct btrfs_fs_info *fs_info, 2987 u64 ref_id, u64 dirid, u64 sequence, const char *name,
3016 u64 root_id, u64 ref_id, u64 dirid, u64 sequence, 2988 int name_len);
3017 const char *name, int name_len); 2989int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
3018int btrfs_del_root_ref(struct btrfs_trans_handle *trans, 2990 u64 ref_id, u64 dirid, u64 *sequence, const char *name,
3019 struct btrfs_fs_info *fs_info, 2991 int name_len);
3020 u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
3021 const char *name, int name_len);
3022int btrfs_del_root(struct btrfs_trans_handle *trans, 2992int btrfs_del_root(struct btrfs_trans_handle *trans,
3023 struct btrfs_fs_info *fs_info, const struct btrfs_key *key); 2993 const struct btrfs_key *key);
3024int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2994int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3025 const struct btrfs_key *key, 2995 const struct btrfs_key *key,
3026 struct btrfs_root_item *item); 2996 struct btrfs_root_item *item);
@@ -3196,7 +3166,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
3196int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 3166int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
3197 size_t size, struct bio *bio, 3167 size_t size, struct bio *bio,
3198 unsigned long bio_flags); 3168 unsigned long bio_flags);
3199void btrfs_set_range_writeback(void *private_data, u64 start, u64 end); 3169void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
3200vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); 3170vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
3201int btrfs_readpage(struct file *file, struct page *page); 3171int btrfs_readpage(struct file *file, struct page *page);
3202void btrfs_evict_inode(struct inode *inode); 3172void btrfs_evict_inode(struct inode *inode);
@@ -3452,7 +3422,7 @@ do { \
3452#ifdef CONFIG_BTRFS_ASSERT 3422#ifdef CONFIG_BTRFS_ASSERT
3453 3423
3454__cold 3424__cold
3455static inline void assfail(char *expr, char *file, int line) 3425static inline void assfail(const char *expr, const char *file, int line)
3456{ 3426{
3457 pr_err("assertion failed: %s, file: %s, line: %d\n", 3427 pr_err("assertion failed: %s, file: %s, line: %d\n",
3458 expr, file, line); 3428 expr, file, line);
@@ -3465,6 +3435,13 @@ static inline void assfail(char *expr, char *file, int line)
3465#define ASSERT(expr) ((void)0) 3435#define ASSERT(expr) ((void)0)
3466#endif 3436#endif
3467 3437
3438__cold
3439static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
3440{
3441 btrfs_err(fs_info,
3442"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
3443}
3444
3468__printf(5, 6) 3445__printf(5, 6)
3469__cold 3446__cold
3470void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, 3447void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe6caa7e698b..f51b509f2d9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1222,7 +1222,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1222 1222
1223int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) 1223int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1224{ 1224{
1225 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1225 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1226 struct btrfs_trans_handle *trans; 1226 struct btrfs_trans_handle *trans;
1227 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1227 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1228 struct btrfs_path *path; 1228 struct btrfs_path *path;
@@ -1418,7 +1418,6 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1418 1418
1419/* Will return 0 or -ENOMEM */ 1419/* Will return 0 or -ENOMEM */
1420int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1420int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1421 struct btrfs_fs_info *fs_info,
1422 const char *name, int name_len, 1421 const char *name, int name_len,
1423 struct btrfs_inode *dir, 1422 struct btrfs_inode *dir,
1424 struct btrfs_disk_key *disk_key, u8 type, 1423 struct btrfs_disk_key *disk_key, u8 type,
@@ -1458,11 +1457,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1458 */ 1457 */
1459 BUG_ON(ret); 1458 BUG_ON(ret);
1460 1459
1461
1462 mutex_lock(&delayed_node->mutex); 1460 mutex_lock(&delayed_node->mutex);
1463 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1461 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1464 if (unlikely(ret)) { 1462 if (unlikely(ret)) {
1465 btrfs_err(fs_info, 1463 btrfs_err(trans->fs_info,
1466 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1464 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1467 name_len, name, delayed_node->root->objectid, 1465 name_len, name, delayed_node->root->objectid,
1468 delayed_node->inode_id, ret); 1466 delayed_node->inode_id, ret);
@@ -1495,7 +1493,6 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1495} 1493}
1496 1494
1497int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 1495int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1498 struct btrfs_fs_info *fs_info,
1499 struct btrfs_inode *dir, u64 index) 1496 struct btrfs_inode *dir, u64 index)
1500{ 1497{
1501 struct btrfs_delayed_node *node; 1498 struct btrfs_delayed_node *node;
@@ -1511,7 +1508,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1511 item_key.type = BTRFS_DIR_INDEX_KEY; 1508 item_key.type = BTRFS_DIR_INDEX_KEY;
1512 item_key.offset = index; 1509 item_key.offset = index;
1513 1510
1514 ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key); 1511 ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1512 &item_key);
1515 if (!ret) 1513 if (!ret)
1516 goto end; 1514 goto end;
1517 1515
@@ -1533,7 +1531,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1533 mutex_lock(&node->mutex); 1531 mutex_lock(&node->mutex);
1534 ret = __btrfs_add_delayed_deletion_item(node, item); 1532 ret = __btrfs_add_delayed_deletion_item(node, item);
1535 if (unlikely(ret)) { 1533 if (unlikely(ret)) {
1536 btrfs_err(fs_info, 1534 btrfs_err(trans->fs_info,
1537 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1535 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1538 index, node->root->objectid, node->inode_id, ret); 1536 index, node->root->objectid, node->inode_id, ret);
1539 BUG(); 1537 BUG();
@@ -1837,7 +1835,7 @@ release_node:
1837 1835
1838int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) 1836int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1839{ 1837{
1840 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1838 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1841 struct btrfs_delayed_node *delayed_node; 1839 struct btrfs_delayed_node *delayed_node;
1842 1840
1843 /* 1841 /*
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index ca7a97f3ab6b..33536cd681d4 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -86,14 +86,12 @@ static inline void btrfs_init_delayed_root(
86} 86}
87 87
88int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 88int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
89 struct btrfs_fs_info *fs_info,
90 const char *name, int name_len, 89 const char *name, int name_len,
91 struct btrfs_inode *dir, 90 struct btrfs_inode *dir,
92 struct btrfs_disk_key *disk_key, u8 type, 91 struct btrfs_disk_key *disk_key, u8 type,
93 u64 index); 92 u64 index);
94 93
95int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 94int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
96 struct btrfs_fs_info *fs_info,
97 struct btrfs_inode *dir, u64 index); 95 struct btrfs_inode *dir, u64 index);
98 96
99int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode); 97int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 03dec673d12a..62ff545ba1f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -709,13 +709,13 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
709 * to make sure the delayed ref is eventually processed before this 709 * to make sure the delayed ref is eventually processed before this
710 * transaction commits. 710 * transaction commits.
711 */ 711 */
712int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 712int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
713 struct btrfs_trans_handle *trans,
714 u64 bytenr, u64 num_bytes, u64 parent, 713 u64 bytenr, u64 num_bytes, u64 parent,
715 u64 ref_root, int level, int action, 714 u64 ref_root, int level, int action,
716 struct btrfs_delayed_extent_op *extent_op, 715 struct btrfs_delayed_extent_op *extent_op,
717 int *old_ref_mod, int *new_ref_mod) 716 int *old_ref_mod, int *new_ref_mod)
718{ 717{
718 struct btrfs_fs_info *fs_info = trans->fs_info;
719 struct btrfs_delayed_tree_ref *ref; 719 struct btrfs_delayed_tree_ref *ref;
720 struct btrfs_delayed_ref_head *head_ref; 720 struct btrfs_delayed_ref_head *head_ref;
721 struct btrfs_delayed_ref_root *delayed_refs; 721 struct btrfs_delayed_ref_root *delayed_refs;
@@ -730,27 +730,33 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
730 if (!ref) 730 if (!ref)
731 return -ENOMEM; 731 return -ENOMEM;
732 732
733 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
734 if (!head_ref) {
735 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
736 return -ENOMEM;
737 }
738
739 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
740 is_fstree(ref_root)) {
741 record = kmalloc(sizeof(*record), GFP_NOFS);
742 if (!record) {
743 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
744 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
745 return -ENOMEM;
746 }
747 }
748
733 if (parent) 749 if (parent)
734 ref_type = BTRFS_SHARED_BLOCK_REF_KEY; 750 ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
735 else 751 else
736 ref_type = BTRFS_TREE_BLOCK_REF_KEY; 752 ref_type = BTRFS_TREE_BLOCK_REF_KEY;
753
737 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 754 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
738 ref_root, action, ref_type); 755 ref_root, action, ref_type);
739 ref->root = ref_root; 756 ref->root = ref_root;
740 ref->parent = parent; 757 ref->parent = parent;
741 ref->level = level; 758 ref->level = level;
742 759
743 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
744 if (!head_ref)
745 goto free_ref;
746
747 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
748 is_fstree(ref_root)) {
749 record = kmalloc(sizeof(*record), GFP_NOFS);
750 if (!record)
751 goto free_head_ref;
752 }
753
754 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, 760 init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
755 ref_root, 0, action, false, is_system); 761 ref_root, 0, action, false, is_system);
756 head_ref->extent_op = extent_op; 762 head_ref->extent_op = extent_op;
@@ -779,25 +785,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
779 btrfs_qgroup_trace_extent_post(fs_info, record); 785 btrfs_qgroup_trace_extent_post(fs_info, record);
780 786
781 return 0; 787 return 0;
782
783free_head_ref:
784 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
785free_ref:
786 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
787
788 return -ENOMEM;
789} 788}
790 789
791/* 790/*
792 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. 791 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
793 */ 792 */
794int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 793int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
795 struct btrfs_trans_handle *trans,
796 u64 bytenr, u64 num_bytes, 794 u64 bytenr, u64 num_bytes,
797 u64 parent, u64 ref_root, 795 u64 parent, u64 ref_root,
798 u64 owner, u64 offset, u64 reserved, int action, 796 u64 owner, u64 offset, u64 reserved, int action,
799 int *old_ref_mod, int *new_ref_mod) 797 int *old_ref_mod, int *new_ref_mod)
800{ 798{
799 struct btrfs_fs_info *fs_info = trans->fs_info;
801 struct btrfs_delayed_data_ref *ref; 800 struct btrfs_delayed_data_ref *ref;
802 struct btrfs_delayed_ref_head *head_ref; 801 struct btrfs_delayed_ref_head *head_ref;
803 struct btrfs_delayed_ref_root *delayed_refs; 802 struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ea1aecb6a50d..d9f2a4ebd5db 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -234,14 +234,12 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
234 kmem_cache_free(btrfs_delayed_ref_head_cachep, head); 234 kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
235} 235}
236 236
237int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 237int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
238 struct btrfs_trans_handle *trans,
239 u64 bytenr, u64 num_bytes, u64 parent, 238 u64 bytenr, u64 num_bytes, u64 parent,
240 u64 ref_root, int level, int action, 239 u64 ref_root, int level, int action,
241 struct btrfs_delayed_extent_op *extent_op, 240 struct btrfs_delayed_extent_op *extent_op,
242 int *old_ref_mod, int *new_ref_mod); 241 int *old_ref_mod, int *new_ref_mod);
243int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, 242int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
244 struct btrfs_trans_handle *trans,
245 u64 bytenr, u64 num_bytes, 243 u64 bytenr, u64 num_bytes,
246 u64 parent, u64 ref_root, 244 u64 parent, u64 ref_root,
247 u64 owner, u64 offset, u64 reserved, int action, 245 u64 owner, u64 offset, u64 reserved, int action,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e2ba0419297a..dec01970d8c5 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -6,14 +6,9 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/bio.h> 7#include <linux/bio.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/buffer_head.h>
10#include <linux/blkdev.h> 9#include <linux/blkdev.h>
11#include <linux/random.h>
12#include <linux/iocontext.h>
13#include <linux/capability.h>
14#include <linux/kthread.h> 10#include <linux/kthread.h>
15#include <linux/math64.h> 11#include <linux/math64.h>
16#include <asm/div64.h>
17#include "ctree.h" 12#include "ctree.h"
18#include "extent_map.h" 13#include "extent_map.h"
19#include "disk-io.h" 14#include "disk-io.h"
@@ -465,7 +460,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
465 * go to the tgtdev as well (refer to btrfs_map_block()). 460 * go to the tgtdev as well (refer to btrfs_map_block()).
466 */ 461 */
467 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; 462 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
468 dev_replace->time_started = get_seconds(); 463 dev_replace->time_started = ktime_get_real_seconds();
469 dev_replace->cursor_left = 0; 464 dev_replace->cursor_left = 0;
470 dev_replace->committed_cursor_left = 0; 465 dev_replace->committed_cursor_left = 0;
471 dev_replace->cursor_left_last_write_of_item = 0; 466 dev_replace->cursor_left_last_write_of_item = 0;
@@ -511,7 +506,7 @@ leave:
511 dev_replace->srcdev = NULL; 506 dev_replace->srcdev = NULL;
512 dev_replace->tgtdev = NULL; 507 dev_replace->tgtdev = NULL;
513 btrfs_dev_replace_write_unlock(dev_replace); 508 btrfs_dev_replace_write_unlock(dev_replace);
514 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 509 btrfs_destroy_dev_replace_tgtdev(tgt_device);
515 return ret; 510 return ret;
516} 511}
517 512
@@ -618,7 +613,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
618 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; 613 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
619 dev_replace->tgtdev = NULL; 614 dev_replace->tgtdev = NULL;
620 dev_replace->srcdev = NULL; 615 dev_replace->srcdev = NULL;
621 dev_replace->time_stopped = get_seconds(); 616 dev_replace->time_stopped = ktime_get_real_seconds();
622 dev_replace->item_needs_writeback = 1; 617 dev_replace->item_needs_writeback = 1;
623 618
624 /* replace old device with new one in mapping tree */ 619 /* replace old device with new one in mapping tree */
@@ -637,7 +632,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
637 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 632 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
638 btrfs_rm_dev_replace_blocked(fs_info); 633 btrfs_rm_dev_replace_blocked(fs_info);
639 if (tgt_device) 634 if (tgt_device)
640 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 635 btrfs_destroy_dev_replace_tgtdev(tgt_device);
641 btrfs_rm_dev_replace_unblocked(fs_info); 636 btrfs_rm_dev_replace_unblocked(fs_info);
642 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 637 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
643 638
@@ -663,7 +658,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
663 tgt_device->commit_total_bytes = src_device->commit_total_bytes; 658 tgt_device->commit_total_bytes = src_device->commit_total_bytes;
664 tgt_device->commit_bytes_used = src_device->bytes_used; 659 tgt_device->commit_bytes_used = src_device->bytes_used;
665 660
666 btrfs_assign_next_active_device(fs_info, src_device, tgt_device); 661 btrfs_assign_next_active_device(src_device, tgt_device);
667 662
668 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 663 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
669 fs_info->fs_devices->rw_devices++; 664 fs_info->fs_devices->rw_devices++;
@@ -672,11 +667,17 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
672 667
673 btrfs_rm_dev_replace_blocked(fs_info); 668 btrfs_rm_dev_replace_blocked(fs_info);
674 669
675 btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device); 670 btrfs_rm_dev_replace_remove_srcdev(src_device);
676 671
677 btrfs_rm_dev_replace_unblocked(fs_info); 672 btrfs_rm_dev_replace_unblocked(fs_info);
678 673
679 /* 674 /*
675 * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
676 * update on-disk dev stats value during commit transaction
677 */
678 atomic_inc(&tgt_device->dev_stats_ccnt);
679
680 /*
680 * this is again a consistent state where no dev_replace procedure 681 * this is again a consistent state where no dev_replace procedure
681 * is running, the target device is part of the filesystem, the 682 * is running, the target device is part of the filesystem, the
682 * source device is not part of the filesystem anymore and its 1st 683 * source device is not part of the filesystem anymore and its 1st
@@ -807,7 +808,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
807 break; 808 break;
808 } 809 }
809 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 810 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
810 dev_replace->time_stopped = get_seconds(); 811 dev_replace->time_stopped = ktime_get_real_seconds();
811 dev_replace->item_needs_writeback = 1; 812 dev_replace->item_needs_writeback = 1;
812 btrfs_dev_replace_write_unlock(dev_replace); 813 btrfs_dev_replace_write_unlock(dev_replace);
813 btrfs_scrub_cancel(fs_info); 814 btrfs_scrub_cancel(fs_info);
@@ -826,7 +827,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
826 btrfs_dev_name(tgt_device)); 827 btrfs_dev_name(tgt_device));
827 828
828 if (tgt_device) 829 if (tgt_device)
829 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 830 btrfs_destroy_dev_replace_tgtdev(tgt_device);
830 831
831leave: 832leave:
832 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 833 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@@ -848,7 +849,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
848 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 849 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
849 dev_replace->replace_state = 850 dev_replace->replace_state =
850 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; 851 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
851 dev_replace->time_stopped = get_seconds(); 852 dev_replace->time_stopped = ktime_get_real_seconds();
852 dev_replace->item_needs_writeback = 1; 853 dev_replace->item_needs_writeback = 1;
853 btrfs_info(fs_info, "suspending dev_replace for unmount"); 854 btrfs_info(fs_info, "suspending dev_replace for unmount");
854 break; 855 break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 39e9766d1cbd..a678b07fcf01 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -160,8 +160,8 @@ second_insert:
160 } 160 }
161 btrfs_release_path(path); 161 btrfs_release_path(path);
162 162
163 ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name, 163 ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
164 name_len, dir, &disk_key, type, index); 164 &disk_key, type, index);
165out_free: 165out_free:
166 btrfs_free_path(path); 166 btrfs_free_path(path);
167 if (ret) 167 if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092dc9390..5124c15705ce 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -5,8 +5,6 @@
5 5
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/blkdev.h> 7#include <linux/blkdev.h>
8#include <linux/scatterlist.h>
9#include <linux/swap.h>
10#include <linux/radix-tree.h> 8#include <linux/radix-tree.h>
11#include <linux/writeback.h> 9#include <linux/writeback.h>
12#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
@@ -54,7 +52,6 @@
54 52
55static const struct extent_io_ops btree_extent_io_ops; 53static const struct extent_io_ops btree_extent_io_ops;
56static void end_workqueue_fn(struct btrfs_work *work); 54static void end_workqueue_fn(struct btrfs_work *work);
57static void free_fs_root(struct btrfs_root *root);
58static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 55static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
59static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 56static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
60 struct btrfs_fs_info *fs_info); 57 struct btrfs_fs_info *fs_info);
@@ -108,12 +105,9 @@ void __cold btrfs_end_io_wq_exit(void)
108 */ 105 */
109struct async_submit_bio { 106struct async_submit_bio {
110 void *private_data; 107 void *private_data;
111 struct btrfs_fs_info *fs_info;
112 struct bio *bio; 108 struct bio *bio;
113 extent_submit_bio_start_t *submit_bio_start; 109 extent_submit_bio_start_t *submit_bio_start;
114 extent_submit_bio_done_t *submit_bio_done;
115 int mirror_num; 110 int mirror_num;
116 unsigned long bio_flags;
117 /* 111 /*
118 * bio_offset is optional, can be used if the pages in the bio 112 * bio_offset is optional, can be used if the pages in the bio
119 * can't tell us where in the file the bio should go 113 * can't tell us where in the file the bio should go
@@ -212,7 +206,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
212 struct page *page, size_t pg_offset, u64 start, u64 len, 206 struct page *page, size_t pg_offset, u64 start, u64 len,
213 int create) 207 int create)
214{ 208{
215 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 209 struct btrfs_fs_info *fs_info = inode->root->fs_info;
216 struct extent_map_tree *em_tree = &inode->extent_tree; 210 struct extent_map_tree *em_tree = &inode->extent_tree;
217 struct extent_map *em; 211 struct extent_map *em;
218 int ret; 212 int ret;
@@ -615,8 +609,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
615 609
616 found_start = btrfs_header_bytenr(eb); 610 found_start = btrfs_header_bytenr(eb);
617 if (found_start != eb->start) { 611 if (found_start != eb->start) {
618 btrfs_err_rl(fs_info, "bad tree block start %llu %llu", 612 btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
619 found_start, eb->start); 613 eb->start, found_start);
620 ret = -EIO; 614 ret = -EIO;
621 goto err; 615 goto err;
622 } 616 }
@@ -628,8 +622,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
628 } 622 }
629 found_level = btrfs_header_level(eb); 623 found_level = btrfs_header_level(eb);
630 if (found_level >= BTRFS_MAX_LEVEL) { 624 if (found_level >= BTRFS_MAX_LEVEL) {
631 btrfs_err(fs_info, "bad tree block level %d", 625 btrfs_err(fs_info, "bad tree block level %d on %llu",
632 (int)btrfs_header_level(eb)); 626 (int)btrfs_header_level(eb), eb->start);
633 ret = -EIO; 627 ret = -EIO;
634 goto err; 628 goto err;
635 } 629 }
@@ -779,7 +773,7 @@ static void run_one_async_done(struct btrfs_work *work)
779 return; 773 return;
780 } 774 }
781 775
782 async->submit_bio_done(async->private_data, async->bio, async->mirror_num); 776 btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num);
783} 777}
784 778
785static void run_one_async_free(struct btrfs_work *work) 779static void run_one_async_free(struct btrfs_work *work)
@@ -793,8 +787,7 @@ static void run_one_async_free(struct btrfs_work *work)
793blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 787blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
794 int mirror_num, unsigned long bio_flags, 788 int mirror_num, unsigned long bio_flags,
795 u64 bio_offset, void *private_data, 789 u64 bio_offset, void *private_data,
796 extent_submit_bio_start_t *submit_bio_start, 790 extent_submit_bio_start_t *submit_bio_start)
797 extent_submit_bio_done_t *submit_bio_done)
798{ 791{
799 struct async_submit_bio *async; 792 struct async_submit_bio *async;
800 793
@@ -803,16 +796,13 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
803 return BLK_STS_RESOURCE; 796 return BLK_STS_RESOURCE;
804 797
805 async->private_data = private_data; 798 async->private_data = private_data;
806 async->fs_info = fs_info;
807 async->bio = bio; 799 async->bio = bio;
808 async->mirror_num = mirror_num; 800 async->mirror_num = mirror_num;
809 async->submit_bio_start = submit_bio_start; 801 async->submit_bio_start = submit_bio_start;
810 async->submit_bio_done = submit_bio_done;
811 802
812 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, 803 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
813 run_one_async_done, run_one_async_free); 804 run_one_async_done, run_one_async_free);
814 805
815 async->bio_flags = bio_flags;
816 async->bio_offset = bio_offset; 806 async->bio_offset = bio_offset;
817 807
818 async->status = 0; 808 async->status = 0;
@@ -851,24 +841,6 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
851 return btree_csum_one_bio(bio); 841 return btree_csum_one_bio(bio);
852} 842}
853 843
854static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
855 int mirror_num)
856{
857 struct inode *inode = private_data;
858 blk_status_t ret;
859
860 /*
861 * when we're called for a write, we're already in the async
862 * submission context. Just jump into btrfs_map_bio
863 */
864 ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
865 if (ret) {
866 bio->bi_status = ret;
867 bio_endio(bio);
868 }
869 return ret;
870}
871
872static int check_async_write(struct btrfs_inode *bi) 844static int check_async_write(struct btrfs_inode *bi)
873{ 845{
874 if (atomic_read(&bi->sync_writers)) 846 if (atomic_read(&bi->sync_writers))
@@ -911,8 +883,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
911 */ 883 */
912 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0, 884 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
913 bio_offset, private_data, 885 bio_offset, private_data,
914 btree_submit_bio_start, 886 btree_submit_bio_start);
915 btree_submit_bio_done);
916 } 887 }
917 888
918 if (ret) 889 if (ret)
@@ -961,8 +932,9 @@ static int btree_writepages(struct address_space *mapping,
961 932
962 fs_info = BTRFS_I(mapping->host)->root->fs_info; 933 fs_info = BTRFS_I(mapping->host)->root->fs_info;
963 /* this is a bit racy, but that's ok */ 934 /* this is a bit racy, but that's ok */
964 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, 935 ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
965 BTRFS_DIRTY_METADATA_THRESH); 936 BTRFS_DIRTY_METADATA_THRESH,
937 fs_info->dirty_metadata_batch);
966 if (ret < 0) 938 if (ret < 0)
967 return 0; 939 return 0;
968 } 940 }
@@ -1181,7 +1153,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
1181 root->highest_objectid = 0; 1153 root->highest_objectid = 0;
1182 root->nr_delalloc_inodes = 0; 1154 root->nr_delalloc_inodes = 0;
1183 root->nr_ordered_extents = 0; 1155 root->nr_ordered_extents = 0;
1184 root->name = NULL;
1185 root->inode_tree = RB_ROOT; 1156 root->inode_tree = RB_ROOT;
1186 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1157 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1187 root->block_rsv = NULL; 1158 root->block_rsv = NULL;
@@ -1292,15 +1263,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1292 goto fail; 1263 goto fail;
1293 } 1264 }
1294 1265
1295 memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
1296 btrfs_set_header_bytenr(leaf, leaf->start);
1297 btrfs_set_header_generation(leaf, trans->transid);
1298 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1299 btrfs_set_header_owner(leaf, objectid);
1300 root->node = leaf; 1266 root->node = leaf;
1301
1302 write_extent_buffer_fsid(leaf, fs_info->fsid);
1303 write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
1304 btrfs_mark_buffer_dirty(leaf); 1267 btrfs_mark_buffer_dirty(leaf);
1305 1268
1306 root->commit_root = btrfs_root_node(root); 1269 root->commit_root = btrfs_root_node(root);
@@ -1374,14 +1337,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1374 return ERR_CAST(leaf); 1337 return ERR_CAST(leaf);
1375 } 1338 }
1376 1339
1377 memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
1378 btrfs_set_header_bytenr(leaf, leaf->start);
1379 btrfs_set_header_generation(leaf, trans->transid);
1380 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1381 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1382 root->node = leaf; 1340 root->node = leaf;
1383 1341
1384 write_extent_buffer_fsid(root->node, fs_info->fsid);
1385 btrfs_mark_buffer_dirty(root->node); 1342 btrfs_mark_buffer_dirty(root->node);
1386 btrfs_tree_unlock(root->node); 1343 btrfs_tree_unlock(root->node);
1387 return root; 1344 return root;
@@ -1546,7 +1503,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
1546 1503
1547 return 0; 1504 return 0;
1548fail: 1505fail:
1549 /* the caller is responsible to call free_fs_root */ 1506 /* The caller is responsible to call btrfs_free_fs_root */
1550 return ret; 1507 return ret;
1551} 1508}
1552 1509
@@ -1651,14 +1608,14 @@ again:
1651 ret = btrfs_insert_fs_root(fs_info, root); 1608 ret = btrfs_insert_fs_root(fs_info, root);
1652 if (ret) { 1609 if (ret) {
1653 if (ret == -EEXIST) { 1610 if (ret == -EEXIST) {
1654 free_fs_root(root); 1611 btrfs_free_fs_root(root);
1655 goto again; 1612 goto again;
1656 } 1613 }
1657 goto fail; 1614 goto fail;
1658 } 1615 }
1659 return root; 1616 return root;
1660fail: 1617fail:
1661 free_fs_root(root); 1618 btrfs_free_fs_root(root);
1662 return ERR_PTR(ret); 1619 return ERR_PTR(ret);
1663} 1620}
1664 1621
@@ -1803,7 +1760,7 @@ static int transaction_kthread(void *arg)
1803 struct btrfs_trans_handle *trans; 1760 struct btrfs_trans_handle *trans;
1804 struct btrfs_transaction *cur; 1761 struct btrfs_transaction *cur;
1805 u64 transid; 1762 u64 transid;
1806 unsigned long now; 1763 time64_t now;
1807 unsigned long delay; 1764 unsigned long delay;
1808 bool cannot_commit; 1765 bool cannot_commit;
1809 1766
@@ -1819,7 +1776,7 @@ static int transaction_kthread(void *arg)
1819 goto sleep; 1776 goto sleep;
1820 } 1777 }
1821 1778
1822 now = get_seconds(); 1779 now = ktime_get_seconds();
1823 if (cur->state < TRANS_STATE_BLOCKED && 1780 if (cur->state < TRANS_STATE_BLOCKED &&
1824 !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && 1781 !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
1825 (now < cur->start_time || 1782 (now < cur->start_time ||
@@ -2196,8 +2153,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
2196 2153
2197static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) 2154static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
2198{ 2155{
2199 fs_info->dev_replace.lock_owner = 0;
2200 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2201 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); 2156 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2202 rwlock_init(&fs_info->dev_replace.lock); 2157 rwlock_init(&fs_info->dev_replace.lock);
2203 atomic_set(&fs_info->dev_replace.read_locks, 0); 2158 atomic_set(&fs_info->dev_replace.read_locks, 0);
@@ -3075,6 +3030,13 @@ retry_root_backup:
3075 fs_info->generation = generation; 3030 fs_info->generation = generation;
3076 fs_info->last_trans_committed = generation; 3031 fs_info->last_trans_committed = generation;
3077 3032
3033 ret = btrfs_verify_dev_extents(fs_info);
3034 if (ret) {
3035 btrfs_err(fs_info,
3036 "failed to verify dev extents against chunks: %d",
3037 ret);
3038 goto fail_block_groups;
3039 }
3078 ret = btrfs_recover_balance(fs_info); 3040 ret = btrfs_recover_balance(fs_info);
3079 if (ret) { 3041 if (ret) {
3080 btrfs_err(fs_info, "failed to recover balance: %d", ret); 3042 btrfs_err(fs_info, "failed to recover balance: %d", ret);
@@ -3875,10 +3837,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3875 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3837 __btrfs_remove_free_space_cache(root->free_ino_pinned);
3876 if (root->free_ino_ctl) 3838 if (root->free_ino_ctl)
3877 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3839 __btrfs_remove_free_space_cache(root->free_ino_ctl);
3878 free_fs_root(root); 3840 btrfs_free_fs_root(root);
3879} 3841}
3880 3842
3881static void free_fs_root(struct btrfs_root *root) 3843void btrfs_free_fs_root(struct btrfs_root *root)
3882{ 3844{
3883 iput(root->ino_cache_inode); 3845 iput(root->ino_cache_inode);
3884 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 3846 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
@@ -3890,15 +3852,9 @@ static void free_fs_root(struct btrfs_root *root)
3890 free_extent_buffer(root->commit_root); 3852 free_extent_buffer(root->commit_root);
3891 kfree(root->free_ino_ctl); 3853 kfree(root->free_ino_ctl);
3892 kfree(root->free_ino_pinned); 3854 kfree(root->free_ino_pinned);
3893 kfree(root->name);
3894 btrfs_put_fs_root(root); 3855 btrfs_put_fs_root(root);
3895} 3856}
3896 3857
3897void btrfs_free_fs_root(struct btrfs_root *root)
3898{
3899 free_fs_root(root);
3900}
3901
3902int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 3858int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
3903{ 3859{
3904 u64 root_objectid = 0; 3860 u64 root_objectid = 0;
@@ -4104,10 +4060,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
4104#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4060#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4105 /* 4061 /*
4106 * This is a fast path so only do this check if we have sanity tests 4062 * This is a fast path so only do this check if we have sanity tests
4107 * enabled. Normal people shouldn't be marking dummy buffers as dirty 4063 * enabled. Normal people shouldn't be using umapped buffers as dirty
4108 * outside of the sanity tests. 4064 * outside of the sanity tests.
4109 */ 4065 */
4110 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) 4066 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
4111 return; 4067 return;
4112#endif 4068#endif
4113 root = BTRFS_I(buf->pages[0]->mapping->host)->root; 4069 root = BTRFS_I(buf->pages[0]->mapping->host)->root;
@@ -4150,8 +4106,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
4150 if (flush_delayed) 4106 if (flush_delayed)
4151 btrfs_balance_delayed_items(fs_info); 4107 btrfs_balance_delayed_items(fs_info);
4152 4108
4153 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, 4109 ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
4154 BTRFS_DIRTY_METADATA_THRESH); 4110 BTRFS_DIRTY_METADATA_THRESH,
4111 fs_info->dirty_metadata_batch);
4155 if (ret > 0) { 4112 if (ret > 0) {
4156 balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); 4113 balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
4157 } 4114 }
@@ -4563,21 +4520,11 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
4563 return 0; 4520 return 0;
4564} 4521}
4565 4522
4566static struct btrfs_fs_info *btree_fs_info(void *private_data)
4567{
4568 struct inode *inode = private_data;
4569 return btrfs_sb(inode->i_sb);
4570}
4571
4572static const struct extent_io_ops btree_extent_io_ops = { 4523static const struct extent_io_ops btree_extent_io_ops = {
4573 /* mandatory callbacks */ 4524 /* mandatory callbacks */
4574 .submit_bio_hook = btree_submit_bio_hook, 4525 .submit_bio_hook = btree_submit_bio_hook,
4575 .readpage_end_io_hook = btree_readpage_end_io_hook, 4526 .readpage_end_io_hook = btree_readpage_end_io_hook,
4576 /* note we're sharing with inode.c for the merge bio hook */
4577 .merge_bio_hook = btrfs_merge_bio_hook,
4578 .readpage_io_failed_hook = btree_io_failed_hook, 4527 .readpage_io_failed_hook = btree_io_failed_hook,
4579 .set_range_writeback = btrfs_set_range_writeback,
4580 .tree_fs_info = btree_fs_info,
4581 4528
4582 /* optional callbacks */ 4529 /* optional callbacks */
4583}; 4530};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 1a3d277b027b..4cccba22640f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -120,8 +120,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
120blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 120blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
121 int mirror_num, unsigned long bio_flags, 121 int mirror_num, unsigned long bio_flags,
122 u64 bio_offset, void *private_data, 122 u64 bio_offset, void *private_data,
123 extent_submit_bio_start_t *submit_bio_start, 123 extent_submit_bio_start_t *submit_bio_start);
124 extent_submit_bio_done_t *submit_bio_done); 124blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
125 int mirror_num);
125int btrfs_write_tree_block(struct extent_buffer *buf); 126int btrfs_write_tree_block(struct extent_buffer *buf);
126void btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 127void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
127int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 128int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d9fe58c0080..de6f75f5547b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,24 +52,21 @@ enum {
52}; 52};
53 53
54static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 54static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
55 struct btrfs_fs_info *fs_info, 55 struct btrfs_delayed_ref_node *node, u64 parent,
56 struct btrfs_delayed_ref_node *node, u64 parent, 56 u64 root_objectid, u64 owner_objectid,
57 u64 root_objectid, u64 owner_objectid, 57 u64 owner_offset, int refs_to_drop,
58 u64 owner_offset, int refs_to_drop, 58 struct btrfs_delayed_extent_op *extra_op);
59 struct btrfs_delayed_extent_op *extra_op);
60static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 59static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
61 struct extent_buffer *leaf, 60 struct extent_buffer *leaf,
62 struct btrfs_extent_item *ei); 61 struct btrfs_extent_item *ei);
63static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 62static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
64 struct btrfs_fs_info *fs_info,
65 u64 parent, u64 root_objectid, 63 u64 parent, u64 root_objectid,
66 u64 flags, u64 owner, u64 offset, 64 u64 flags, u64 owner, u64 offset,
67 struct btrfs_key *ins, int ref_mod); 65 struct btrfs_key *ins, int ref_mod);
68static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 66static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
69 struct btrfs_delayed_ref_node *node, 67 struct btrfs_delayed_ref_node *node,
70 struct btrfs_delayed_extent_op *extent_op); 68 struct btrfs_delayed_extent_op *extent_op);
71static int do_chunk_alloc(struct btrfs_trans_handle *trans, 69static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
72 struct btrfs_fs_info *fs_info, u64 flags,
73 int force); 70 int force);
74static int find_next_key(struct btrfs_path *path, int level, 71static int find_next_key(struct btrfs_path *path, int level,
75 struct btrfs_key *key); 72 struct btrfs_key *key);
@@ -220,9 +217,9 @@ static int add_excluded_extent(struct btrfs_fs_info *fs_info,
220 return 0; 217 return 0;
221} 218}
222 219
223static void free_excluded_extents(struct btrfs_fs_info *fs_info, 220static void free_excluded_extents(struct btrfs_block_group_cache *cache)
224 struct btrfs_block_group_cache *cache)
225{ 221{
222 struct btrfs_fs_info *fs_info = cache->fs_info;
226 u64 start, end; 223 u64 start, end;
227 224
228 start = cache->key.objectid; 225 start = cache->key.objectid;
@@ -234,9 +231,9 @@ static void free_excluded_extents(struct btrfs_fs_info *fs_info,
234 start, end, EXTENT_UPTODATE); 231 start, end, EXTENT_UPTODATE);
235} 232}
236 233
237static int exclude_super_stripes(struct btrfs_fs_info *fs_info, 234static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
238 struct btrfs_block_group_cache *cache)
239{ 235{
236 struct btrfs_fs_info *fs_info = cache->fs_info;
240 u64 bytenr; 237 u64 bytenr;
241 u64 *logical; 238 u64 *logical;
242 int stripe_len; 239 int stripe_len;
@@ -558,7 +555,7 @@ static noinline void caching_thread(struct btrfs_work *work)
558 caching_ctl->progress = (u64)-1; 555 caching_ctl->progress = (u64)-1;
559 556
560 up_read(&fs_info->commit_root_sem); 557 up_read(&fs_info->commit_root_sem);
561 free_excluded_extents(fs_info, block_group); 558 free_excluded_extents(block_group);
562 mutex_unlock(&caching_ctl->mutex); 559 mutex_unlock(&caching_ctl->mutex);
563 560
564 wake_up(&caching_ctl->wait); 561 wake_up(&caching_ctl->wait);
@@ -666,7 +663,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
666 wake_up(&caching_ctl->wait); 663 wake_up(&caching_ctl->wait);
667 if (ret == 1) { 664 if (ret == 1) {
668 put_caching_control(caching_ctl); 665 put_caching_control(caching_ctl);
669 free_excluded_extents(fs_info, cache); 666 free_excluded_extents(cache);
670 return 0; 667 return 0;
671 } 668 }
672 } else { 669 } else {
@@ -758,7 +755,8 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
758 755
759 space_info = __find_space_info(fs_info, flags); 756 space_info = __find_space_info(fs_info, flags);
760 ASSERT(space_info); 757 ASSERT(space_info);
761 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); 758 percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
759 BTRFS_TOTAL_BYTES_PINNED_BATCH);
762} 760}
763 761
764/* 762/*
@@ -870,18 +868,16 @@ search_again:
870 num_refs = btrfs_extent_refs(leaf, ei); 868 num_refs = btrfs_extent_refs(leaf, ei);
871 extent_flags = btrfs_extent_flags(leaf, ei); 869 extent_flags = btrfs_extent_flags(leaf, ei);
872 } else { 870 } else {
873#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 871 ret = -EINVAL;
874 struct btrfs_extent_item_v0 *ei0; 872 btrfs_print_v0_err(fs_info);
875 BUG_ON(item_size != sizeof(*ei0)); 873 if (trans)
876 ei0 = btrfs_item_ptr(leaf, path->slots[0], 874 btrfs_abort_transaction(trans, ret);
877 struct btrfs_extent_item_v0); 875 else
878 num_refs = btrfs_extent_refs_v0(leaf, ei0); 876 btrfs_handle_fs_error(fs_info, ret, NULL);
879 /* FIXME: this isn't correct for data */ 877
880 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 878 goto out_free;
881#else
882 BUG();
883#endif
884 } 879 }
880
885 BUG_ON(num_refs == 0); 881 BUG_ON(num_refs == 0);
886 } else { 882 } else {
887 num_refs = 0; 883 num_refs = 0;
@@ -1039,89 +1035,6 @@ out_free:
1039 * tree block info structure. 1035 * tree block info structure.
1040 */ 1036 */
1041 1037
1042#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1043static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1044 struct btrfs_fs_info *fs_info,
1045 struct btrfs_path *path,
1046 u64 owner, u32 extra_size)
1047{
1048 struct btrfs_root *root = fs_info->extent_root;
1049 struct btrfs_extent_item *item;
1050 struct btrfs_extent_item_v0 *ei0;
1051 struct btrfs_extent_ref_v0 *ref0;
1052 struct btrfs_tree_block_info *bi;
1053 struct extent_buffer *leaf;
1054 struct btrfs_key key;
1055 struct btrfs_key found_key;
1056 u32 new_size = sizeof(*item);
1057 u64 refs;
1058 int ret;
1059
1060 leaf = path->nodes[0];
1061 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1062
1063 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1064 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1065 struct btrfs_extent_item_v0);
1066 refs = btrfs_extent_refs_v0(leaf, ei0);
1067
1068 if (owner == (u64)-1) {
1069 while (1) {
1070 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1071 ret = btrfs_next_leaf(root, path);
1072 if (ret < 0)
1073 return ret;
1074 BUG_ON(ret > 0); /* Corruption */
1075 leaf = path->nodes[0];
1076 }
1077 btrfs_item_key_to_cpu(leaf, &found_key,
1078 path->slots[0]);
1079 BUG_ON(key.objectid != found_key.objectid);
1080 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1081 path->slots[0]++;
1082 continue;
1083 }
1084 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1085 struct btrfs_extent_ref_v0);
1086 owner = btrfs_ref_objectid_v0(leaf, ref0);
1087 break;
1088 }
1089 }
1090 btrfs_release_path(path);
1091
1092 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1093 new_size += sizeof(*bi);
1094
1095 new_size -= sizeof(*ei0);
1096 ret = btrfs_search_slot(trans, root, &key, path,
1097 new_size + extra_size, 1);
1098 if (ret < 0)
1099 return ret;
1100 BUG_ON(ret); /* Corruption */
1101
1102 btrfs_extend_item(fs_info, path, new_size);
1103
1104 leaf = path->nodes[0];
1105 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1106 btrfs_set_extent_refs(leaf, item, refs);
1107 /* FIXME: get real generation */
1108 btrfs_set_extent_generation(leaf, item, 0);
1109 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1110 btrfs_set_extent_flags(leaf, item,
1111 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1112 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1113 bi = (struct btrfs_tree_block_info *)(item + 1);
1114 /* FIXME: get first key of the block */
1115 memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
1116 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1117 } else {
1118 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1119 }
1120 btrfs_mark_buffer_dirty(leaf);
1121 return 0;
1122}
1123#endif
1124
1125/* 1038/*
1126 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, 1039 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
1127 * is_data == BTRFS_REF_TYPE_DATA, data type is requried, 1040 * is_data == BTRFS_REF_TYPE_DATA, data type is requried,
@@ -1216,13 +1129,12 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
1216} 1129}
1217 1130
1218static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1131static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1219 struct btrfs_fs_info *fs_info,
1220 struct btrfs_path *path, 1132 struct btrfs_path *path,
1221 u64 bytenr, u64 parent, 1133 u64 bytenr, u64 parent,
1222 u64 root_objectid, 1134 u64 root_objectid,
1223 u64 owner, u64 offset) 1135 u64 owner, u64 offset)
1224{ 1136{
1225 struct btrfs_root *root = fs_info->extent_root; 1137 struct btrfs_root *root = trans->fs_info->extent_root;
1226 struct btrfs_key key; 1138 struct btrfs_key key;
1227 struct btrfs_extent_data_ref *ref; 1139 struct btrfs_extent_data_ref *ref;
1228 struct extent_buffer *leaf; 1140 struct extent_buffer *leaf;
@@ -1251,17 +1163,6 @@ again:
1251 if (parent) { 1163 if (parent) {
1252 if (!ret) 1164 if (!ret)
1253 return 0; 1165 return 0;
1254#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1255 key.type = BTRFS_EXTENT_REF_V0_KEY;
1256 btrfs_release_path(path);
1257 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1258 if (ret < 0) {
1259 err = ret;
1260 goto fail;
1261 }
1262 if (!ret)
1263 return 0;
1264#endif
1265 goto fail; 1166 goto fail;
1266 } 1167 }
1267 1168
@@ -1304,13 +1205,12 @@ fail:
1304} 1205}
1305 1206
1306static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1207static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1307 struct btrfs_fs_info *fs_info,
1308 struct btrfs_path *path, 1208 struct btrfs_path *path,
1309 u64 bytenr, u64 parent, 1209 u64 bytenr, u64 parent,
1310 u64 root_objectid, u64 owner, 1210 u64 root_objectid, u64 owner,
1311 u64 offset, int refs_to_add) 1211 u64 offset, int refs_to_add)
1312{ 1212{
1313 struct btrfs_root *root = fs_info->extent_root; 1213 struct btrfs_root *root = trans->fs_info->extent_root;
1314 struct btrfs_key key; 1214 struct btrfs_key key;
1315 struct extent_buffer *leaf; 1215 struct extent_buffer *leaf;
1316 u32 size; 1216 u32 size;
@@ -1384,7 +1284,6 @@ fail:
1384} 1284}
1385 1285
1386static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1286static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1387 struct btrfs_fs_info *fs_info,
1388 struct btrfs_path *path, 1287 struct btrfs_path *path,
1389 int refs_to_drop, int *last_ref) 1288 int refs_to_drop, int *last_ref)
1390{ 1289{
@@ -1406,13 +1305,10 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1406 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1305 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1407 struct btrfs_shared_data_ref); 1306 struct btrfs_shared_data_ref);
1408 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1307 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1409#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1308 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
1410 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1309 btrfs_print_v0_err(trans->fs_info);
1411 struct btrfs_extent_ref_v0 *ref0; 1310 btrfs_abort_transaction(trans, -EINVAL);
1412 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1311 return -EINVAL;
1413 struct btrfs_extent_ref_v0);
1414 num_refs = btrfs_ref_count_v0(leaf, ref0);
1415#endif
1416 } else { 1312 } else {
1417 BUG(); 1313 BUG();
1418 } 1314 }
@@ -1421,21 +1317,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1421 num_refs -= refs_to_drop; 1317 num_refs -= refs_to_drop;
1422 1318
1423 if (num_refs == 0) { 1319 if (num_refs == 0) {
1424 ret = btrfs_del_item(trans, fs_info->extent_root, path); 1320 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1425 *last_ref = 1; 1321 *last_ref = 1;
1426 } else { 1322 } else {
1427 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1323 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1428 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1324 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1429 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1325 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1430 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1326 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1431#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1432 else {
1433 struct btrfs_extent_ref_v0 *ref0;
1434 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1435 struct btrfs_extent_ref_v0);
1436 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1437 }
1438#endif
1439 btrfs_mark_buffer_dirty(leaf); 1327 btrfs_mark_buffer_dirty(leaf);
1440 } 1328 }
1441 return ret; 1329 return ret;
@@ -1453,6 +1341,8 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1453 1341
1454 leaf = path->nodes[0]; 1342 leaf = path->nodes[0];
1455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1343 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1344
1345 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
1456 if (iref) { 1346 if (iref) {
1457 /* 1347 /*
1458 * If type is invalid, we should have bailed out earlier than 1348 * If type is invalid, we should have bailed out earlier than
@@ -1475,13 +1365,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1475 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1365 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1476 struct btrfs_shared_data_ref); 1366 struct btrfs_shared_data_ref);
1477 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1367 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1478#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1479 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1480 struct btrfs_extent_ref_v0 *ref0;
1481 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1482 struct btrfs_extent_ref_v0);
1483 num_refs = btrfs_ref_count_v0(leaf, ref0);
1484#endif
1485 } else { 1368 } else {
1486 WARN_ON(1); 1369 WARN_ON(1);
1487 } 1370 }
@@ -1489,12 +1372,11 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1489} 1372}
1490 1373
1491static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1374static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1492 struct btrfs_fs_info *fs_info,
1493 struct btrfs_path *path, 1375 struct btrfs_path *path,
1494 u64 bytenr, u64 parent, 1376 u64 bytenr, u64 parent,
1495 u64 root_objectid) 1377 u64 root_objectid)
1496{ 1378{
1497 struct btrfs_root *root = fs_info->extent_root; 1379 struct btrfs_root *root = trans->fs_info->extent_root;
1498 struct btrfs_key key; 1380 struct btrfs_key key;
1499 int ret; 1381 int ret;
1500 1382
@@ -1510,20 +1392,10 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1510 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1392 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1511 if (ret > 0) 1393 if (ret > 0)
1512 ret = -ENOENT; 1394 ret = -ENOENT;
1513#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1514 if (ret == -ENOENT && parent) {
1515 btrfs_release_path(path);
1516 key.type = BTRFS_EXTENT_REF_V0_KEY;
1517 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1518 if (ret > 0)
1519 ret = -ENOENT;
1520 }
1521#endif
1522 return ret; 1395 return ret;
1523} 1396}
1524 1397
1525static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1398static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1526 struct btrfs_fs_info *fs_info,
1527 struct btrfs_path *path, 1399 struct btrfs_path *path,
1528 u64 bytenr, u64 parent, 1400 u64 bytenr, u64 parent,
1529 u64 root_objectid) 1401 u64 root_objectid)
@@ -1540,7 +1412,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1540 key.offset = root_objectid; 1412 key.offset = root_objectid;
1541 } 1413 }
1542 1414
1543 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, 1415 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
1544 path, &key, 0); 1416 path, &key, 0);
1545 btrfs_release_path(path); 1417 btrfs_release_path(path);
1546 return ret; 1418 return ret;
@@ -1599,13 +1471,13 @@ static int find_next_key(struct btrfs_path *path, int level,
1599 */ 1471 */
1600static noinline_for_stack 1472static noinline_for_stack
1601int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1473int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1602 struct btrfs_fs_info *fs_info,
1603 struct btrfs_path *path, 1474 struct btrfs_path *path,
1604 struct btrfs_extent_inline_ref **ref_ret, 1475 struct btrfs_extent_inline_ref **ref_ret,
1605 u64 bytenr, u64 num_bytes, 1476 u64 bytenr, u64 num_bytes,
1606 u64 parent, u64 root_objectid, 1477 u64 parent, u64 root_objectid,
1607 u64 owner, u64 offset, int insert) 1478 u64 owner, u64 offset, int insert)
1608{ 1479{
1480 struct btrfs_fs_info *fs_info = trans->fs_info;
1609 struct btrfs_root *root = fs_info->extent_root; 1481 struct btrfs_root *root = fs_info->extent_root;
1610 struct btrfs_key key; 1482 struct btrfs_key key;
1611 struct extent_buffer *leaf; 1483 struct extent_buffer *leaf;
@@ -1635,8 +1507,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1635 extra_size = -1; 1507 extra_size = -1;
1636 1508
1637 /* 1509 /*
1638 * Owner is our parent level, so we can just add one to get the level 1510 * Owner is our level, so we can just add one to get the level for the
1639 * for the block we are interested in. 1511 * block we are interested in.
1640 */ 1512 */
1641 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) { 1513 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1642 key.type = BTRFS_METADATA_ITEM_KEY; 1514 key.type = BTRFS_METADATA_ITEM_KEY;
@@ -1684,23 +1556,12 @@ again:
1684 1556
1685 leaf = path->nodes[0]; 1557 leaf = path->nodes[0];
1686 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1558 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1687#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1559 if (unlikely(item_size < sizeof(*ei))) {
1688 if (item_size < sizeof(*ei)) { 1560 err = -EINVAL;
1689 if (!insert) { 1561 btrfs_print_v0_err(fs_info);
1690 err = -ENOENT; 1562 btrfs_abort_transaction(trans, err);
1691 goto out; 1563 goto out;
1692 }
1693 ret = convert_extent_item_v0(trans, fs_info, path, owner,
1694 extra_size);
1695 if (ret < 0) {
1696 err = ret;
1697 goto out;
1698 }
1699 leaf = path->nodes[0];
1700 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1701 } 1564 }
1702#endif
1703 BUG_ON(item_size < sizeof(*ei));
1704 1565
1705 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1706 flags = btrfs_extent_flags(leaf, ei); 1567 flags = btrfs_extent_flags(leaf, ei);
@@ -1727,7 +1588,7 @@ again:
1727 iref = (struct btrfs_extent_inline_ref *)ptr; 1588 iref = (struct btrfs_extent_inline_ref *)ptr;
1728 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); 1589 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1729 if (type == BTRFS_REF_TYPE_INVALID) { 1590 if (type == BTRFS_REF_TYPE_INVALID) {
1730 err = -EINVAL; 1591 err = -EUCLEAN;
1731 goto out; 1592 goto out;
1732 } 1593 }
1733 1594
@@ -1863,7 +1724,6 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1863} 1724}
1864 1725
1865static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1726static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1866 struct btrfs_fs_info *fs_info,
1867 struct btrfs_path *path, 1727 struct btrfs_path *path,
1868 struct btrfs_extent_inline_ref **ref_ret, 1728 struct btrfs_extent_inline_ref **ref_ret,
1869 u64 bytenr, u64 num_bytes, u64 parent, 1729 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1871,9 +1731,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1871{ 1731{
1872 int ret; 1732 int ret;
1873 1733
1874 ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret, 1734 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1875 bytenr, num_bytes, parent, 1735 num_bytes, parent, root_objectid,
1876 root_objectid, owner, offset, 0); 1736 owner, offset, 0);
1877 if (ret != -ENOENT) 1737 if (ret != -ENOENT)
1878 return ret; 1738 return ret;
1879 1739
@@ -1881,12 +1741,11 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1881 *ref_ret = NULL; 1741 *ref_ret = NULL;
1882 1742
1883 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1743 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1884 ret = lookup_tree_block_ref(trans, fs_info, path, bytenr, 1744 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1885 parent, root_objectid); 1745 root_objectid);
1886 } else { 1746 } else {
1887 ret = lookup_extent_data_ref(trans, fs_info, path, bytenr, 1747 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1888 parent, root_objectid, owner, 1748 root_objectid, owner, offset);
1889 offset);
1890 } 1749 }
1891 return ret; 1750 return ret;
1892} 1751}
@@ -1895,14 +1754,14 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1895 * helper to update/remove inline back ref 1754 * helper to update/remove inline back ref
1896 */ 1755 */
1897static noinline_for_stack 1756static noinline_for_stack
1898void update_inline_extent_backref(struct btrfs_fs_info *fs_info, 1757void update_inline_extent_backref(struct btrfs_path *path,
1899 struct btrfs_path *path,
1900 struct btrfs_extent_inline_ref *iref, 1758 struct btrfs_extent_inline_ref *iref,
1901 int refs_to_mod, 1759 int refs_to_mod,
1902 struct btrfs_delayed_extent_op *extent_op, 1760 struct btrfs_delayed_extent_op *extent_op,
1903 int *last_ref) 1761 int *last_ref)
1904{ 1762{
1905 struct extent_buffer *leaf; 1763 struct extent_buffer *leaf = path->nodes[0];
1764 struct btrfs_fs_info *fs_info = leaf->fs_info;
1906 struct btrfs_extent_item *ei; 1765 struct btrfs_extent_item *ei;
1907 struct btrfs_extent_data_ref *dref = NULL; 1766 struct btrfs_extent_data_ref *dref = NULL;
1908 struct btrfs_shared_data_ref *sref = NULL; 1767 struct btrfs_shared_data_ref *sref = NULL;
@@ -1913,7 +1772,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
1913 int type; 1772 int type;
1914 u64 refs; 1773 u64 refs;
1915 1774
1916 leaf = path->nodes[0];
1917 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1775 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1918 refs = btrfs_extent_refs(leaf, ei); 1776 refs = btrfs_extent_refs(leaf, ei);
1919 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1777 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
@@ -1965,7 +1823,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
1965 1823
1966static noinline_for_stack 1824static noinline_for_stack
1967int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1825int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1968 struct btrfs_fs_info *fs_info,
1969 struct btrfs_path *path, 1826 struct btrfs_path *path,
1970 u64 bytenr, u64 num_bytes, u64 parent, 1827 u64 bytenr, u64 num_bytes, u64 parent,
1971 u64 root_objectid, u64 owner, 1828 u64 root_objectid, u64 owner,
@@ -1975,15 +1832,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1975 struct btrfs_extent_inline_ref *iref; 1832 struct btrfs_extent_inline_ref *iref;
1976 int ret; 1833 int ret;
1977 1834
1978 ret = lookup_inline_extent_backref(trans, fs_info, path, &iref, 1835 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1979 bytenr, num_bytes, parent, 1836 num_bytes, parent, root_objectid,
1980 root_objectid, owner, offset, 1); 1837 owner, offset, 1);
1981 if (ret == 0) { 1838 if (ret == 0) {
1982 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1839 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1983 update_inline_extent_backref(fs_info, path, iref, 1840 update_inline_extent_backref(path, iref, refs_to_add,
1984 refs_to_add, extent_op, NULL); 1841 extent_op, NULL);
1985 } else if (ret == -ENOENT) { 1842 } else if (ret == -ENOENT) {
1986 setup_inline_extent_backref(fs_info, path, iref, parent, 1843 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1987 root_objectid, owner, offset, 1844 root_objectid, owner, offset,
1988 refs_to_add, extent_op); 1845 refs_to_add, extent_op);
1989 ret = 0; 1846 ret = 0;
@@ -1992,7 +1849,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1992} 1849}
1993 1850
1994static int insert_extent_backref(struct btrfs_trans_handle *trans, 1851static int insert_extent_backref(struct btrfs_trans_handle *trans,
1995 struct btrfs_fs_info *fs_info,
1996 struct btrfs_path *path, 1852 struct btrfs_path *path,
1997 u64 bytenr, u64 parent, u64 root_objectid, 1853 u64 bytenr, u64 parent, u64 root_objectid,
1998 u64 owner, u64 offset, int refs_to_add) 1854 u64 owner, u64 offset, int refs_to_add)
@@ -2000,18 +1856,17 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
2000 int ret; 1856 int ret;
2001 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1857 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2002 BUG_ON(refs_to_add != 1); 1858 BUG_ON(refs_to_add != 1);
2003 ret = insert_tree_block_ref(trans, fs_info, path, bytenr, 1859 ret = insert_tree_block_ref(trans, path, bytenr, parent,
2004 parent, root_objectid); 1860 root_objectid);
2005 } else { 1861 } else {
2006 ret = insert_extent_data_ref(trans, fs_info, path, bytenr, 1862 ret = insert_extent_data_ref(trans, path, bytenr, parent,
2007 parent, root_objectid, 1863 root_objectid, owner, offset,
2008 owner, offset, refs_to_add); 1864 refs_to_add);
2009 } 1865 }
2010 return ret; 1866 return ret;
2011} 1867}
2012 1868
2013static int remove_extent_backref(struct btrfs_trans_handle *trans, 1869static int remove_extent_backref(struct btrfs_trans_handle *trans,
2014 struct btrfs_fs_info *fs_info,
2015 struct btrfs_path *path, 1870 struct btrfs_path *path,
2016 struct btrfs_extent_inline_ref *iref, 1871 struct btrfs_extent_inline_ref *iref,
2017 int refs_to_drop, int is_data, int *last_ref) 1872 int refs_to_drop, int is_data, int *last_ref)
@@ -2020,14 +1875,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
2020 1875
2021 BUG_ON(!is_data && refs_to_drop != 1); 1876 BUG_ON(!is_data && refs_to_drop != 1);
2022 if (iref) { 1877 if (iref) {
2023 update_inline_extent_backref(fs_info, path, iref, 1878 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
2024 -refs_to_drop, NULL, last_ref); 1879 last_ref);
2025 } else if (is_data) { 1880 } else if (is_data) {
2026 ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop, 1881 ret = remove_extent_data_ref(trans, path, refs_to_drop,
2027 last_ref); 1882 last_ref);
2028 } else { 1883 } else {
2029 *last_ref = 1; 1884 *last_ref = 1;
2030 ret = btrfs_del_item(trans, fs_info->extent_root, path); 1885 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
2031 } 1886 }
2032 return ret; 1887 return ret;
2033} 1888}
@@ -2185,13 +2040,13 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2185 owner, offset, BTRFS_ADD_DELAYED_REF); 2040 owner, offset, BTRFS_ADD_DELAYED_REF);
2186 2041
2187 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 2042 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2188 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 2043 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
2189 num_bytes, parent, 2044 num_bytes, parent,
2190 root_objectid, (int)owner, 2045 root_objectid, (int)owner,
2191 BTRFS_ADD_DELAYED_REF, NULL, 2046 BTRFS_ADD_DELAYED_REF, NULL,
2192 &old_ref_mod, &new_ref_mod); 2047 &old_ref_mod, &new_ref_mod);
2193 } else { 2048 } else {
2194 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 2049 ret = btrfs_add_delayed_data_ref(trans, bytenr,
2195 num_bytes, parent, 2050 num_bytes, parent,
2196 root_objectid, owner, offset, 2051 root_objectid, owner, offset,
2197 0, BTRFS_ADD_DELAYED_REF, 2052 0, BTRFS_ADD_DELAYED_REF,
@@ -2207,8 +2062,41 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2207 return ret; 2062 return ret;
2208} 2063}
2209 2064
2065/*
2066 * __btrfs_inc_extent_ref - insert backreference for a given extent
2067 *
2068 * @trans: Handle of transaction
2069 *
2070 * @node: The delayed ref node used to get the bytenr/length for
2071 * extent whose references are incremented.
2072 *
2073 * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
2074 * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
2075 * bytenr of the parent block. Since new extents are always
2076 * created with indirect references, this will only be the case
2077 * when relocating a shared extent. In that case, root_objectid
2078 * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
2079 * be 0
2080 *
2081 * @root_objectid: The id of the root where this modification has originated,
2082 * this can be either one of the well-known metadata trees or
2083 * the subvolume id which references this extent.
2084 *
2085 * @owner: For data extents it is the inode number of the owning file.
2086 * For metadata extents this parameter holds the level in the
2087 * tree of the extent.
2088 *
2089 * @offset: For metadata extents the offset is ignored and is currently
2090 * always passed as 0. For data extents it is the fileoffset
2091 * this extent belongs to.
2092 *
2093 * @refs_to_add Number of references to add
2094 *
2095 * @extent_op Pointer to a structure, holding information necessary when
2096 * updating a tree block's flags
2097 *
2098 */
2210static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2099static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2211 struct btrfs_fs_info *fs_info,
2212 struct btrfs_delayed_ref_node *node, 2100 struct btrfs_delayed_ref_node *node,
2213 u64 parent, u64 root_objectid, 2101 u64 parent, u64 root_objectid,
2214 u64 owner, u64 offset, int refs_to_add, 2102 u64 owner, u64 offset, int refs_to_add,
@@ -2230,10 +2118,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2230 path->reada = READA_FORWARD; 2118 path->reada = READA_FORWARD;
2231 path->leave_spinning = 1; 2119 path->leave_spinning = 1;
2232 /* this will setup the path even if it fails to insert the back ref */ 2120 /* this will setup the path even if it fails to insert the back ref */
2233 ret = insert_inline_extent_backref(trans, fs_info, path, bytenr, 2121 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
2234 num_bytes, parent, root_objectid, 2122 parent, root_objectid, owner,
2235 owner, offset, 2123 offset, refs_to_add, extent_op);
2236 refs_to_add, extent_op);
2237 if ((ret < 0 && ret != -EAGAIN) || !ret) 2124 if ((ret < 0 && ret != -EAGAIN) || !ret)
2238 goto out; 2125 goto out;
2239 2126
@@ -2256,8 +2143,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2256 path->reada = READA_FORWARD; 2143 path->reada = READA_FORWARD;
2257 path->leave_spinning = 1; 2144 path->leave_spinning = 1;
2258 /* now insert the actual backref */ 2145 /* now insert the actual backref */
2259 ret = insert_extent_backref(trans, fs_info, path, bytenr, parent, 2146 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
2260 root_objectid, owner, offset, refs_to_add); 2147 owner, offset, refs_to_add);
2261 if (ret) 2148 if (ret)
2262 btrfs_abort_transaction(trans, ret); 2149 btrfs_abort_transaction(trans, ret);
2263out: 2150out:
@@ -2266,7 +2153,6 @@ out:
2266} 2153}
2267 2154
2268static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 2155static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2269 struct btrfs_fs_info *fs_info,
2270 struct btrfs_delayed_ref_node *node, 2156 struct btrfs_delayed_ref_node *node,
2271 struct btrfs_delayed_extent_op *extent_op, 2157 struct btrfs_delayed_extent_op *extent_op,
2272 int insert_reserved) 2158 int insert_reserved)
@@ -2283,7 +2169,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2283 ins.type = BTRFS_EXTENT_ITEM_KEY; 2169 ins.type = BTRFS_EXTENT_ITEM_KEY;
2284 2170
2285 ref = btrfs_delayed_node_to_data_ref(node); 2171 ref = btrfs_delayed_node_to_data_ref(node);
2286 trace_run_delayed_data_ref(fs_info, node, ref, node->action); 2172 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
2287 2173
2288 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2174 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2289 parent = ref->parent; 2175 parent = ref->parent;
@@ -2292,17 +2178,16 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2292 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2178 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2293 if (extent_op) 2179 if (extent_op)
2294 flags |= extent_op->flags_to_set; 2180 flags |= extent_op->flags_to_set;
2295 ret = alloc_reserved_file_extent(trans, fs_info, 2181 ret = alloc_reserved_file_extent(trans, parent, ref_root,
2296 parent, ref_root, flags, 2182 flags, ref->objectid,
2297 ref->objectid, ref->offset, 2183 ref->offset, &ins,
2298 &ins, node->ref_mod); 2184 node->ref_mod);
2299 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2185 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2300 ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent, 2186 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2301 ref_root, ref->objectid, 2187 ref->objectid, ref->offset,
2302 ref->offset, node->ref_mod, 2188 node->ref_mod, extent_op);
2303 extent_op);
2304 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2189 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2305 ret = __btrfs_free_extent(trans, fs_info, node, parent, 2190 ret = __btrfs_free_extent(trans, node, parent,
2306 ref_root, ref->objectid, 2191 ref_root, ref->objectid,
2307 ref->offset, node->ref_mod, 2192 ref->offset, node->ref_mod,
2308 extent_op); 2193 extent_op);
@@ -2331,10 +2216,10 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2331} 2216}
2332 2217
2333static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 2218static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2334 struct btrfs_fs_info *fs_info,
2335 struct btrfs_delayed_ref_head *head, 2219 struct btrfs_delayed_ref_head *head,
2336 struct btrfs_delayed_extent_op *extent_op) 2220 struct btrfs_delayed_extent_op *extent_op)
2337{ 2221{
2222 struct btrfs_fs_info *fs_info = trans->fs_info;
2338 struct btrfs_key key; 2223 struct btrfs_key key;
2339 struct btrfs_path *path; 2224 struct btrfs_path *path;
2340 struct btrfs_extent_item *ei; 2225 struct btrfs_extent_item *ei;
@@ -2400,18 +2285,14 @@ again:
2400 2285
2401 leaf = path->nodes[0]; 2286 leaf = path->nodes[0];
2402 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2287 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2403#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2288
2404 if (item_size < sizeof(*ei)) { 2289 if (unlikely(item_size < sizeof(*ei))) {
2405 ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0); 2290 err = -EINVAL;
2406 if (ret < 0) { 2291 btrfs_print_v0_err(fs_info);
2407 err = ret; 2292 btrfs_abort_transaction(trans, err);
2408 goto out; 2293 goto out;
2409 }
2410 leaf = path->nodes[0];
2411 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2412 } 2294 }
2413#endif 2295
2414 BUG_ON(item_size < sizeof(*ei));
2415 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2296 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2416 __run_delayed_extent_op(extent_op, leaf, ei); 2297 __run_delayed_extent_op(extent_op, leaf, ei);
2417 2298
@@ -2422,7 +2303,6 @@ out:
2422} 2303}
2423 2304
2424static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2305static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2425 struct btrfs_fs_info *fs_info,
2426 struct btrfs_delayed_ref_node *node, 2306 struct btrfs_delayed_ref_node *node,
2427 struct btrfs_delayed_extent_op *extent_op, 2307 struct btrfs_delayed_extent_op *extent_op,
2428 int insert_reserved) 2308 int insert_reserved)
@@ -2433,14 +2313,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2433 u64 ref_root = 0; 2313 u64 ref_root = 0;
2434 2314
2435 ref = btrfs_delayed_node_to_tree_ref(node); 2315 ref = btrfs_delayed_node_to_tree_ref(node);
2436 trace_run_delayed_tree_ref(fs_info, node, ref, node->action); 2316 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
2437 2317
2438 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2318 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2439 parent = ref->parent; 2319 parent = ref->parent;
2440 ref_root = ref->root; 2320 ref_root = ref->root;
2441 2321
2442 if (node->ref_mod != 1) { 2322 if (node->ref_mod != 1) {
2443 btrfs_err(fs_info, 2323 btrfs_err(trans->fs_info,
2444 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", 2324 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2445 node->bytenr, node->ref_mod, node->action, ref_root, 2325 node->bytenr, node->ref_mod, node->action, ref_root,
2446 parent); 2326 parent);
@@ -2450,13 +2330,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2450 BUG_ON(!extent_op || !extent_op->update_flags); 2330 BUG_ON(!extent_op || !extent_op->update_flags);
2451 ret = alloc_reserved_tree_block(trans, node, extent_op); 2331 ret = alloc_reserved_tree_block(trans, node, extent_op);
2452 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2332 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2453 ret = __btrfs_inc_extent_ref(trans, fs_info, node, 2333 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2454 parent, ref_root, 2334 ref->level, 0, 1, extent_op);
2455 ref->level, 0, 1,
2456 extent_op);
2457 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2335 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2458 ret = __btrfs_free_extent(trans, fs_info, node, 2336 ret = __btrfs_free_extent(trans, node, parent, ref_root,
2459 parent, ref_root,
2460 ref->level, 0, 1, extent_op); 2337 ref->level, 0, 1, extent_op);
2461 } else { 2338 } else {
2462 BUG(); 2339 BUG();
@@ -2466,7 +2343,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2466 2343
2467/* helper function to actually process a single delayed ref entry */ 2344/* helper function to actually process a single delayed ref entry */
2468static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2345static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2469 struct btrfs_fs_info *fs_info,
2470 struct btrfs_delayed_ref_node *node, 2346 struct btrfs_delayed_ref_node *node,
2471 struct btrfs_delayed_extent_op *extent_op, 2347 struct btrfs_delayed_extent_op *extent_op,
2472 int insert_reserved) 2348 int insert_reserved)
@@ -2475,18 +2351,18 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2475 2351
2476 if (trans->aborted) { 2352 if (trans->aborted) {
2477 if (insert_reserved) 2353 if (insert_reserved)
2478 btrfs_pin_extent(fs_info, node->bytenr, 2354 btrfs_pin_extent(trans->fs_info, node->bytenr,
2479 node->num_bytes, 1); 2355 node->num_bytes, 1);
2480 return 0; 2356 return 0;
2481 } 2357 }
2482 2358
2483 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2359 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2484 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2360 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2485 ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, 2361 ret = run_delayed_tree_ref(trans, node, extent_op,
2486 insert_reserved); 2362 insert_reserved);
2487 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2363 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2488 node->type == BTRFS_SHARED_DATA_REF_KEY) 2364 node->type == BTRFS_SHARED_DATA_REF_KEY)
2489 ret = run_delayed_data_ref(trans, fs_info, node, extent_op, 2365 ret = run_delayed_data_ref(trans, node, extent_op,
2490 insert_reserved); 2366 insert_reserved);
2491 else 2367 else
2492 BUG(); 2368 BUG();
@@ -2528,7 +2404,6 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
2528} 2404}
2529 2405
2530static int cleanup_extent_op(struct btrfs_trans_handle *trans, 2406static int cleanup_extent_op(struct btrfs_trans_handle *trans,
2531 struct btrfs_fs_info *fs_info,
2532 struct btrfs_delayed_ref_head *head) 2407 struct btrfs_delayed_ref_head *head)
2533{ 2408{
2534 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 2409 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
@@ -2542,21 +2417,22 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
2542 return 0; 2417 return 0;
2543 } 2418 }
2544 spin_unlock(&head->lock); 2419 spin_unlock(&head->lock);
2545 ret = run_delayed_extent_op(trans, fs_info, head, extent_op); 2420 ret = run_delayed_extent_op(trans, head, extent_op);
2546 btrfs_free_delayed_extent_op(extent_op); 2421 btrfs_free_delayed_extent_op(extent_op);
2547 return ret ? ret : 1; 2422 return ret ? ret : 1;
2548} 2423}
2549 2424
2550static int cleanup_ref_head(struct btrfs_trans_handle *trans, 2425static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2551 struct btrfs_fs_info *fs_info,
2552 struct btrfs_delayed_ref_head *head) 2426 struct btrfs_delayed_ref_head *head)
2553{ 2427{
2428
2429 struct btrfs_fs_info *fs_info = trans->fs_info;
2554 struct btrfs_delayed_ref_root *delayed_refs; 2430 struct btrfs_delayed_ref_root *delayed_refs;
2555 int ret; 2431 int ret;
2556 2432
2557 delayed_refs = &trans->transaction->delayed_refs; 2433 delayed_refs = &trans->transaction->delayed_refs;
2558 2434
2559 ret = cleanup_extent_op(trans, fs_info, head); 2435 ret = cleanup_extent_op(trans, head);
2560 if (ret < 0) { 2436 if (ret < 0) {
2561 unselect_delayed_ref_head(delayed_refs, head); 2437 unselect_delayed_ref_head(delayed_refs, head);
2562 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2438 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
@@ -2598,8 +2474,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2598 flags = BTRFS_BLOCK_GROUP_METADATA; 2474 flags = BTRFS_BLOCK_GROUP_METADATA;
2599 space_info = __find_space_info(fs_info, flags); 2475 space_info = __find_space_info(fs_info, flags);
2600 ASSERT(space_info); 2476 ASSERT(space_info);
2601 percpu_counter_add(&space_info->total_bytes_pinned, 2477 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2602 -head->num_bytes); 2478 -head->num_bytes,
2479 BTRFS_TOTAL_BYTES_PINNED_BATCH);
2603 2480
2604 if (head->is_data) { 2481 if (head->is_data) {
2605 spin_lock(&delayed_refs->lock); 2482 spin_lock(&delayed_refs->lock);
@@ -2705,7 +2582,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2705 * up and move on to the next ref_head. 2582 * up and move on to the next ref_head.
2706 */ 2583 */
2707 if (!ref) { 2584 if (!ref) {
2708 ret = cleanup_ref_head(trans, fs_info, locked_ref); 2585 ret = cleanup_ref_head(trans, locked_ref);
2709 if (ret > 0 ) { 2586 if (ret > 0 ) {
2710 /* We dropped our lock, we need to loop. */ 2587 /* We dropped our lock, we need to loop. */
2711 ret = 0; 2588 ret = 0;
@@ -2752,7 +2629,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2752 locked_ref->extent_op = NULL; 2629 locked_ref->extent_op = NULL;
2753 spin_unlock(&locked_ref->lock); 2630 spin_unlock(&locked_ref->lock);
2754 2631
2755 ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, 2632 ret = run_one_delayed_ref(trans, ref, extent_op,
2756 must_insert_reserved); 2633 must_insert_reserved);
2757 2634
2758 btrfs_free_delayed_extent_op(extent_op); 2635 btrfs_free_delayed_extent_op(extent_op);
@@ -3227,12 +3104,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
3227 3104
3228 ret = 1; 3105 ret = 1;
3229 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3106 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3230#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3231 if (item_size < sizeof(*ei)) {
3232 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3233 goto out;
3234 }
3235#endif
3236 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 3107 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3237 3108
3238 if (item_size != sizeof(*ei) + 3109 if (item_size != sizeof(*ei) +
@@ -4060,11 +3931,7 @@ static void update_space_info(struct btrfs_fs_info *info, u64 flags,
4060 struct btrfs_space_info *found; 3931 struct btrfs_space_info *found;
4061 int factor; 3932 int factor;
4062 3933
4063 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 3934 factor = btrfs_bg_type_to_factor(flags);
4064 BTRFS_BLOCK_GROUP_RAID10))
4065 factor = 2;
4066 else
4067 factor = 1;
4068 3935
4069 found = __find_space_info(info, flags); 3936 found = __find_space_info(info, flags);
4070 ASSERT(found); 3937 ASSERT(found);
@@ -4289,7 +4156,7 @@ again:
4289 if (IS_ERR(trans)) 4156 if (IS_ERR(trans))
4290 return PTR_ERR(trans); 4157 return PTR_ERR(trans);
4291 4158
4292 ret = do_chunk_alloc(trans, fs_info, alloc_target, 4159 ret = do_chunk_alloc(trans, alloc_target,
4293 CHUNK_ALLOC_NO_FORCE); 4160 CHUNK_ALLOC_NO_FORCE);
4294 btrfs_end_transaction(trans); 4161 btrfs_end_transaction(trans);
4295 if (ret < 0) { 4162 if (ret < 0) {
@@ -4309,9 +4176,10 @@ again:
4309 * allocation, and no removed chunk in current transaction, 4176 * allocation, and no removed chunk in current transaction,
4310 * don't bother committing the transaction. 4177 * don't bother committing the transaction.
4311 */ 4178 */
4312 have_pinned_space = percpu_counter_compare( 4179 have_pinned_space = __percpu_counter_compare(
4313 &data_sinfo->total_bytes_pinned, 4180 &data_sinfo->total_bytes_pinned,
4314 used + bytes - data_sinfo->total_bytes); 4181 used + bytes - data_sinfo->total_bytes,
4182 BTRFS_TOTAL_BYTES_PINNED_BATCH);
4315 spin_unlock(&data_sinfo->lock); 4183 spin_unlock(&data_sinfo->lock);
4316 4184
4317 /* commit the current transaction and try again */ 4185 /* commit the current transaction and try again */
@@ -4358,7 +4226,7 @@ commit_trans:
4358 data_sinfo->flags, bytes, 1); 4226 data_sinfo->flags, bytes, 1);
4359 spin_unlock(&data_sinfo->lock); 4227 spin_unlock(&data_sinfo->lock);
4360 4228
4361 return ret; 4229 return 0;
4362} 4230}
4363 4231
4364int btrfs_check_data_free_space(struct inode *inode, 4232int btrfs_check_data_free_space(struct inode *inode,
@@ -4511,9 +4379,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4511 * for allocating a chunk, otherwise if it's false, reserve space necessary for 4379 * for allocating a chunk, otherwise if it's false, reserve space necessary for
4512 * removing a chunk. 4380 * removing a chunk.
4513 */ 4381 */
4514void check_system_chunk(struct btrfs_trans_handle *trans, 4382void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4515 struct btrfs_fs_info *fs_info, u64 type)
4516{ 4383{
4384 struct btrfs_fs_info *fs_info = trans->fs_info;
4517 struct btrfs_space_info *info; 4385 struct btrfs_space_info *info;
4518 u64 left; 4386 u64 left;
4519 u64 thresh; 4387 u64 thresh;
@@ -4552,7 +4420,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
4552 * the paths we visit in the chunk tree (they were already COWed 4420 * the paths we visit in the chunk tree (they were already COWed
4553 * or created in the current transaction for example). 4421 * or created in the current transaction for example).
4554 */ 4422 */
4555 ret = btrfs_alloc_chunk(trans, fs_info, flags); 4423 ret = btrfs_alloc_chunk(trans, flags);
4556 } 4424 }
4557 4425
4558 if (!ret) { 4426 if (!ret) {
@@ -4573,11 +4441,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
4573 * - return 1 if it successfully allocates a chunk, 4441 * - return 1 if it successfully allocates a chunk,
4574 * - return errors including -ENOSPC otherwise. 4442 * - return errors including -ENOSPC otherwise.
4575 */ 4443 */
4576static int do_chunk_alloc(struct btrfs_trans_handle *trans, 4444static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4577 struct btrfs_fs_info *fs_info, u64 flags, int force) 4445 int force)
4578{ 4446{
4447 struct btrfs_fs_info *fs_info = trans->fs_info;
4579 struct btrfs_space_info *space_info; 4448 struct btrfs_space_info *space_info;
4580 int wait_for_alloc = 0; 4449 bool wait_for_alloc = false;
4450 bool should_alloc = false;
4581 int ret = 0; 4451 int ret = 0;
4582 4452
4583 /* Don't re-enter if we're already allocating a chunk */ 4453 /* Don't re-enter if we're already allocating a chunk */
@@ -4587,45 +4457,44 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4587 space_info = __find_space_info(fs_info, flags); 4457 space_info = __find_space_info(fs_info, flags);
4588 ASSERT(space_info); 4458 ASSERT(space_info);
4589 4459
4590again: 4460 do {
4591 spin_lock(&space_info->lock); 4461 spin_lock(&space_info->lock);
4592 if (force < space_info->force_alloc) 4462 if (force < space_info->force_alloc)
4593 force = space_info->force_alloc; 4463 force = space_info->force_alloc;
4594 if (space_info->full) { 4464 should_alloc = should_alloc_chunk(fs_info, space_info, force);
4595 if (should_alloc_chunk(fs_info, space_info, force)) 4465 if (space_info->full) {
4596 ret = -ENOSPC; 4466 /* No more free physical space */
4597 else 4467 if (should_alloc)
4598 ret = 0; 4468 ret = -ENOSPC;
4599 spin_unlock(&space_info->lock); 4469 else
4600 return ret; 4470 ret = 0;
4601 } 4471 spin_unlock(&space_info->lock);
4602 4472 return ret;
4603 if (!should_alloc_chunk(fs_info, space_info, force)) { 4473 } else if (!should_alloc) {
4604 spin_unlock(&space_info->lock); 4474 spin_unlock(&space_info->lock);
4605 return 0; 4475 return 0;
4606 } else if (space_info->chunk_alloc) { 4476 } else if (space_info->chunk_alloc) {
4607 wait_for_alloc = 1; 4477 /*
4608 } else { 4478 * Someone is already allocating, so we need to block
4609 space_info->chunk_alloc = 1; 4479 * until this someone is finished and then loop to
4610 } 4480 * recheck if we should continue with our allocation
4611 4481 * attempt.
4612 spin_unlock(&space_info->lock); 4482 */
4613 4483 wait_for_alloc = true;
4614 mutex_lock(&fs_info->chunk_mutex); 4484 spin_unlock(&space_info->lock);
4485 mutex_lock(&fs_info->chunk_mutex);
4486 mutex_unlock(&fs_info->chunk_mutex);
4487 } else {
4488 /* Proceed with allocation */
4489 space_info->chunk_alloc = 1;
4490 wait_for_alloc = false;
4491 spin_unlock(&space_info->lock);
4492 }
4615 4493
4616 /*
4617 * The chunk_mutex is held throughout the entirety of a chunk
4618 * allocation, so once we've acquired the chunk_mutex we know that the
4619 * other guy is done and we need to recheck and see if we should
4620 * allocate.
4621 */
4622 if (wait_for_alloc) {
4623 mutex_unlock(&fs_info->chunk_mutex);
4624 wait_for_alloc = 0;
4625 cond_resched(); 4494 cond_resched();
4626 goto again; 4495 } while (wait_for_alloc);
4627 }
4628 4496
4497 mutex_lock(&fs_info->chunk_mutex);
4629 trans->allocating_chunk = true; 4498 trans->allocating_chunk = true;
4630 4499
4631 /* 4500 /*
@@ -4651,9 +4520,9 @@ again:
4651 * Check if we have enough space in SYSTEM chunk because we may need 4520 * Check if we have enough space in SYSTEM chunk because we may need
4652 * to update devices. 4521 * to update devices.
4653 */ 4522 */
4654 check_system_chunk(trans, fs_info, flags); 4523 check_system_chunk(trans, flags);
4655 4524
4656 ret = btrfs_alloc_chunk(trans, fs_info, flags); 4525 ret = btrfs_alloc_chunk(trans, flags);
4657 trans->allocating_chunk = false; 4526 trans->allocating_chunk = false;
4658 4527
4659 spin_lock(&space_info->lock); 4528 spin_lock(&space_info->lock);
@@ -4703,6 +4572,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
4703 u64 space_size; 4572 u64 space_size;
4704 u64 avail; 4573 u64 avail;
4705 u64 used; 4574 u64 used;
4575 int factor;
4706 4576
4707 /* Don't overcommit when in mixed mode. */ 4577 /* Don't overcommit when in mixed mode. */
4708 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) 4578 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -4737,10 +4607,8 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
4737 * doesn't include the parity drive, so we don't have to 4607 * doesn't include the parity drive, so we don't have to
4738 * change the math 4608 * change the math
4739 */ 4609 */
4740 if (profile & (BTRFS_BLOCK_GROUP_DUP | 4610 factor = btrfs_bg_type_to_factor(profile);
4741 BTRFS_BLOCK_GROUP_RAID1 | 4611 avail = div_u64(avail, factor);
4742 BTRFS_BLOCK_GROUP_RAID10))
4743 avail >>= 1;
4744 4612
4745 /* 4613 /*
4746 * If we aren't flushing all things, let us overcommit up to 4614 * If we aren't flushing all things, let us overcommit up to
@@ -4912,8 +4780,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4912 return 0; 4780 return 0;
4913 4781
4914 /* See if there is enough pinned space to make this reservation */ 4782 /* See if there is enough pinned space to make this reservation */
4915 if (percpu_counter_compare(&space_info->total_bytes_pinned, 4783 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4916 bytes) >= 0) 4784 bytes,
4785 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
4917 goto commit; 4786 goto commit;
4918 4787
4919 /* 4788 /*
@@ -4930,8 +4799,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4930 bytes -= delayed_rsv->size; 4799 bytes -= delayed_rsv->size;
4931 spin_unlock(&delayed_rsv->lock); 4800 spin_unlock(&delayed_rsv->lock);
4932 4801
4933 if (percpu_counter_compare(&space_info->total_bytes_pinned, 4802 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4934 bytes) < 0) { 4803 bytes,
4804 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
4935 return -ENOSPC; 4805 return -ENOSPC;
4936 } 4806 }
4937 4807
@@ -4984,7 +4854,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
4984 ret = PTR_ERR(trans); 4854 ret = PTR_ERR(trans);
4985 break; 4855 break;
4986 } 4856 }
4987 ret = do_chunk_alloc(trans, fs_info, 4857 ret = do_chunk_alloc(trans,
4988 btrfs_metadata_alloc_profile(fs_info), 4858 btrfs_metadata_alloc_profile(fs_info),
4989 CHUNK_ALLOC_NO_FORCE); 4859 CHUNK_ALLOC_NO_FORCE);
4990 btrfs_end_transaction(trans); 4860 btrfs_end_transaction(trans);
@@ -5659,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5659 kfree(rsv); 5529 kfree(rsv);
5660} 5530}
5661 5531
5662void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5663{
5664 kfree(rsv);
5665}
5666
5667int btrfs_block_rsv_add(struct btrfs_root *root, 5532int btrfs_block_rsv_add(struct btrfs_root *root,
5668 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 5533 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5669 enum btrfs_reserve_flush_enum flush) 5534 enum btrfs_reserve_flush_enum flush)
@@ -6019,7 +5884,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6019 5884
6020int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) 5885int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6021{ 5886{
6022 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 5887 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6023 unsigned nr_extents; 5888 unsigned nr_extents;
6024 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 5889 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6025 int ret = 0; 5890 int ret = 0;
@@ -6092,7 +5957,7 @@ out_fail:
6092void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, 5957void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6093 bool qgroup_free) 5958 bool qgroup_free)
6094{ 5959{
6095 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 5960 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6096 5961
6097 num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 5962 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6098 spin_lock(&inode->lock); 5963 spin_lock(&inode->lock);
@@ -6121,7 +5986,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6121void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, 5986void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6122 bool qgroup_free) 5987 bool qgroup_free)
6123{ 5988{
6124 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 5989 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6125 unsigned num_extents; 5990 unsigned num_extents;
6126 5991
6127 spin_lock(&inode->lock); 5992 spin_lock(&inode->lock);
@@ -6219,12 +6084,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6219 cache = btrfs_lookup_block_group(info, bytenr); 6084 cache = btrfs_lookup_block_group(info, bytenr);
6220 if (!cache) 6085 if (!cache)
6221 return -ENOENT; 6086 return -ENOENT;
6222 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 6087 factor = btrfs_bg_type_to_factor(cache->flags);
6223 BTRFS_BLOCK_GROUP_RAID1 | 6088
6224 BTRFS_BLOCK_GROUP_RAID10))
6225 factor = 2;
6226 else
6227 factor = 1;
6228 /* 6089 /*
6229 * If this block group has free space cache written out, we 6090 * If this block group has free space cache written out, we
6230 * need to make sure to load it if we are removing space. This 6091 * need to make sure to load it if we are removing space. This
@@ -6268,8 +6129,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6268 trace_btrfs_space_reservation(info, "pinned", 6129 trace_btrfs_space_reservation(info, "pinned",
6269 cache->space_info->flags, 6130 cache->space_info->flags,
6270 num_bytes, 1); 6131 num_bytes, 1);
6271 percpu_counter_add(&cache->space_info->total_bytes_pinned, 6132 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6272 num_bytes); 6133 num_bytes,
6134 BTRFS_TOTAL_BYTES_PINNED_BATCH);
6273 set_extent_dirty(info->pinned_extents, 6135 set_extent_dirty(info->pinned_extents,
6274 bytenr, bytenr + num_bytes - 1, 6136 bytenr, bytenr + num_bytes - 1,
6275 GFP_NOFS | __GFP_NOFAIL); 6137 GFP_NOFS | __GFP_NOFAIL);
@@ -6279,7 +6141,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6279 if (list_empty(&cache->dirty_list)) { 6141 if (list_empty(&cache->dirty_list)) {
6280 list_add_tail(&cache->dirty_list, 6142 list_add_tail(&cache->dirty_list,
6281 &trans->transaction->dirty_bgs); 6143 &trans->transaction->dirty_bgs);
6282 trans->transaction->num_dirty_bgs++; 6144 trans->transaction->num_dirty_bgs++;
6283 btrfs_get_block_group(cache); 6145 btrfs_get_block_group(cache);
6284 } 6146 }
6285 spin_unlock(&trans->transaction->dirty_bgs_lock); 6147 spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -6290,16 +6152,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6290 * dirty list to avoid races between cleaner kthread and space 6152 * dirty list to avoid races between cleaner kthread and space
6291 * cache writeout. 6153 * cache writeout.
6292 */ 6154 */
6293 if (!alloc && old_val == 0) { 6155 if (!alloc && old_val == 0)
6294 spin_lock(&info->unused_bgs_lock); 6156 btrfs_mark_bg_unused(cache);
6295 if (list_empty(&cache->bg_list)) {
6296 btrfs_get_block_group(cache);
6297 trace_btrfs_add_unused_block_group(cache);
6298 list_add_tail(&cache->bg_list,
6299 &info->unused_bgs);
6300 }
6301 spin_unlock(&info->unused_bgs_lock);
6302 }
6303 6157
6304 btrfs_put_block_group(cache); 6158 btrfs_put_block_group(cache);
6305 total -= num_bytes; 6159 total -= num_bytes;
@@ -6347,7 +6201,8 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
6347 6201
6348 trace_btrfs_space_reservation(fs_info, "pinned", 6202 trace_btrfs_space_reservation(fs_info, "pinned",
6349 cache->space_info->flags, num_bytes, 1); 6203 cache->space_info->flags, num_bytes, 1);
6350 percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes); 6204 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6205 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6351 set_extent_dirty(fs_info->pinned_extents, bytenr, 6206 set_extent_dirty(fs_info->pinned_extents, bytenr,
6352 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 6207 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6353 return 0; 6208 return 0;
@@ -6711,7 +6566,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6711 trace_btrfs_space_reservation(fs_info, "pinned", 6566 trace_btrfs_space_reservation(fs_info, "pinned",
6712 space_info->flags, len, 0); 6567 space_info->flags, len, 0);
6713 space_info->max_extent_size = 0; 6568 space_info->max_extent_size = 0;
6714 percpu_counter_add(&space_info->total_bytes_pinned, -len); 6569 percpu_counter_add_batch(&space_info->total_bytes_pinned,
6570 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6715 if (cache->ro) { 6571 if (cache->ro) {
6716 space_info->bytes_readonly += len; 6572 space_info->bytes_readonly += len;
6717 readonly = true; 6573 readonly = true;
@@ -6815,12 +6671,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
6815} 6671}
6816 6672
6817static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 6673static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6818 struct btrfs_fs_info *info, 6674 struct btrfs_delayed_ref_node *node, u64 parent,
6819 struct btrfs_delayed_ref_node *node, u64 parent, 6675 u64 root_objectid, u64 owner_objectid,
6820 u64 root_objectid, u64 owner_objectid, 6676 u64 owner_offset, int refs_to_drop,
6821 u64 owner_offset, int refs_to_drop, 6677 struct btrfs_delayed_extent_op *extent_op)
6822 struct btrfs_delayed_extent_op *extent_op)
6823{ 6678{
6679 struct btrfs_fs_info *info = trans->fs_info;
6824 struct btrfs_key key; 6680 struct btrfs_key key;
6825 struct btrfs_path *path; 6681 struct btrfs_path *path;
6826 struct btrfs_root *extent_root = info->extent_root; 6682 struct btrfs_root *extent_root = info->extent_root;
@@ -6852,9 +6708,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6852 if (is_data) 6708 if (is_data)
6853 skinny_metadata = false; 6709 skinny_metadata = false;
6854 6710
6855 ret = lookup_extent_backref(trans, info, path, &iref, 6711 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
6856 bytenr, num_bytes, parent, 6712 parent, root_objectid, owner_objectid,
6857 root_objectid, owner_objectid,
6858 owner_offset); 6713 owner_offset);
6859 if (ret == 0) { 6714 if (ret == 0) {
6860 extent_slot = path->slots[0]; 6715 extent_slot = path->slots[0];
@@ -6877,14 +6732,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6877 break; 6732 break;
6878 extent_slot--; 6733 extent_slot--;
6879 } 6734 }
6880#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 6735
6881 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6882 if (found_extent && item_size < sizeof(*ei))
6883 found_extent = 0;
6884#endif
6885 if (!found_extent) { 6736 if (!found_extent) {
6886 BUG_ON(iref); 6737 BUG_ON(iref);
6887 ret = remove_extent_backref(trans, info, path, NULL, 6738 ret = remove_extent_backref(trans, path, NULL,
6888 refs_to_drop, 6739 refs_to_drop,
6889 is_data, &last_ref); 6740 is_data, &last_ref);
6890 if (ret) { 6741 if (ret) {
@@ -6957,42 +6808,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6957 6808
6958 leaf = path->nodes[0]; 6809 leaf = path->nodes[0];
6959 item_size = btrfs_item_size_nr(leaf, extent_slot); 6810 item_size = btrfs_item_size_nr(leaf, extent_slot);
6960#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 6811 if (unlikely(item_size < sizeof(*ei))) {
6961 if (item_size < sizeof(*ei)) { 6812 ret = -EINVAL;
6962 BUG_ON(found_extent || extent_slot != path->slots[0]); 6813 btrfs_print_v0_err(info);
6963 ret = convert_extent_item_v0(trans, info, path, owner_objectid, 6814 btrfs_abort_transaction(trans, ret);
6964 0); 6815 goto out;
6965 if (ret < 0) {
6966 btrfs_abort_transaction(trans, ret);
6967 goto out;
6968 }
6969
6970 btrfs_release_path(path);
6971 path->leave_spinning = 1;
6972
6973 key.objectid = bytenr;
6974 key.type = BTRFS_EXTENT_ITEM_KEY;
6975 key.offset = num_bytes;
6976
6977 ret = btrfs_search_slot(trans, extent_root, &key, path,
6978 -1, 1);
6979 if (ret) {
6980 btrfs_err(info,
6981 "umm, got %d back from search, was looking for %llu",
6982 ret, bytenr);
6983 btrfs_print_leaf(path->nodes[0]);
6984 }
6985 if (ret < 0) {
6986 btrfs_abort_transaction(trans, ret);
6987 goto out;
6988 }
6989
6990 extent_slot = path->slots[0];
6991 leaf = path->nodes[0];
6992 item_size = btrfs_item_size_nr(leaf, extent_slot);
6993 } 6816 }
6994#endif
6995 BUG_ON(item_size < sizeof(*ei));
6996 ei = btrfs_item_ptr(leaf, extent_slot, 6817 ei = btrfs_item_ptr(leaf, extent_slot,
6997 struct btrfs_extent_item); 6818 struct btrfs_extent_item);
6998 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && 6819 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
@@ -7028,9 +6849,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
7028 btrfs_mark_buffer_dirty(leaf); 6849 btrfs_mark_buffer_dirty(leaf);
7029 } 6850 }
7030 if (found_extent) { 6851 if (found_extent) {
7031 ret = remove_extent_backref(trans, info, path, 6852 ret = remove_extent_backref(trans, path, iref,
7032 iref, refs_to_drop, 6853 refs_to_drop, is_data,
7033 is_data, &last_ref); 6854 &last_ref);
7034 if (ret) { 6855 if (ret) {
7035 btrfs_abort_transaction(trans, ret); 6856 btrfs_abort_transaction(trans, ret);
7036 goto out; 6857 goto out;
@@ -7172,7 +6993,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7172 root->root_key.objectid, 6993 root->root_key.objectid,
7173 btrfs_header_level(buf), 0, 6994 btrfs_header_level(buf), 0,
7174 BTRFS_DROP_DELAYED_REF); 6995 BTRFS_DROP_DELAYED_REF);
7175 ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, 6996 ret = btrfs_add_delayed_tree_ref(trans, buf->start,
7176 buf->len, parent, 6997 buf->len, parent,
7177 root->root_key.objectid, 6998 root->root_key.objectid,
7178 btrfs_header_level(buf), 6999 btrfs_header_level(buf),
@@ -7251,13 +7072,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
7251 old_ref_mod = new_ref_mod = 0; 7072 old_ref_mod = new_ref_mod = 0;
7252 ret = 0; 7073 ret = 0;
7253 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 7074 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7254 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 7075 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
7255 num_bytes, parent, 7076 num_bytes, parent,
7256 root_objectid, (int)owner, 7077 root_objectid, (int)owner,
7257 BTRFS_DROP_DELAYED_REF, NULL, 7078 BTRFS_DROP_DELAYED_REF, NULL,
7258 &old_ref_mod, &new_ref_mod); 7079 &old_ref_mod, &new_ref_mod);
7259 } else { 7080 } else {
7260 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 7081 ret = btrfs_add_delayed_data_ref(trans, bytenr,
7261 num_bytes, parent, 7082 num_bytes, parent,
7262 root_objectid, owner, offset, 7083 root_objectid, owner, offset,
7263 0, BTRFS_DROP_DELAYED_REF, 7084 0, BTRFS_DROP_DELAYED_REF,
@@ -7534,7 +7355,7 @@ search:
7534 * for the proper type. 7355 * for the proper type.
7535 */ 7356 */
7536 if (!block_group_bits(block_group, flags)) { 7357 if (!block_group_bits(block_group, flags)) {
7537 u64 extra = BTRFS_BLOCK_GROUP_DUP | 7358 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7538 BTRFS_BLOCK_GROUP_RAID1 | 7359 BTRFS_BLOCK_GROUP_RAID1 |
7539 BTRFS_BLOCK_GROUP_RAID5 | 7360 BTRFS_BLOCK_GROUP_RAID5 |
7540 BTRFS_BLOCK_GROUP_RAID6 | 7361 BTRFS_BLOCK_GROUP_RAID6 |
@@ -7738,7 +7559,7 @@ unclustered_alloc:
7738 goto loop; 7559 goto loop;
7739 } 7560 }
7740checks: 7561checks:
7741 search_start = ALIGN(offset, fs_info->stripesize); 7562 search_start = round_up(offset, fs_info->stripesize);
7742 7563
7743 /* move on to the next group */ 7564 /* move on to the next group */
7744 if (search_start + num_bytes > 7565 if (search_start + num_bytes >
@@ -7750,7 +7571,6 @@ checks:
7750 if (offset < search_start) 7571 if (offset < search_start)
7751 btrfs_add_free_space(block_group, offset, 7572 btrfs_add_free_space(block_group, offset,
7752 search_start - offset); 7573 search_start - offset);
7753 BUG_ON(offset > search_start);
7754 7574
7755 ret = btrfs_add_reserved_bytes(block_group, ram_bytes, 7575 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7756 num_bytes, delalloc); 7576 num_bytes, delalloc);
@@ -7826,8 +7646,7 @@ loop:
7826 goto out; 7646 goto out;
7827 } 7647 }
7828 7648
7829 ret = do_chunk_alloc(trans, fs_info, flags, 7649 ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
7830 CHUNK_ALLOC_FORCE);
7831 7650
7832 /* 7651 /*
7833 * If we can't allocate a new chunk we've already looped 7652 * If we can't allocate a new chunk we've already looped
@@ -8053,11 +7872,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
8053} 7872}
8054 7873
8055static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 7874static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8056 struct btrfs_fs_info *fs_info,
8057 u64 parent, u64 root_objectid, 7875 u64 parent, u64 root_objectid,
8058 u64 flags, u64 owner, u64 offset, 7876 u64 flags, u64 owner, u64 offset,
8059 struct btrfs_key *ins, int ref_mod) 7877 struct btrfs_key *ins, int ref_mod)
8060{ 7878{
7879 struct btrfs_fs_info *fs_info = trans->fs_info;
8061 int ret; 7880 int ret;
8062 struct btrfs_extent_item *extent_item; 7881 struct btrfs_extent_item *extent_item;
8063 struct btrfs_extent_inline_ref *iref; 7882 struct btrfs_extent_inline_ref *iref;
@@ -8231,7 +8050,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8231 u64 offset, u64 ram_bytes, 8050 u64 offset, u64 ram_bytes,
8232 struct btrfs_key *ins) 8051 struct btrfs_key *ins)
8233{ 8052{
8234 struct btrfs_fs_info *fs_info = root->fs_info;
8235 int ret; 8053 int ret;
8236 8054
8237 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 8055 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
@@ -8240,7 +8058,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8240 root->root_key.objectid, owner, offset, 8058 root->root_key.objectid, owner, offset,
8241 BTRFS_ADD_DELAYED_EXTENT); 8059 BTRFS_ADD_DELAYED_EXTENT);
8242 8060
8243 ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, 8061 ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
8244 ins->offset, 0, 8062 ins->offset, 0,
8245 root->root_key.objectid, owner, 8063 root->root_key.objectid, owner,
8246 offset, ram_bytes, 8064 offset, ram_bytes,
@@ -8254,10 +8072,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8254 * space cache bits as well 8072 * space cache bits as well
8255 */ 8073 */
8256int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 8074int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8257 struct btrfs_fs_info *fs_info,
8258 u64 root_objectid, u64 owner, u64 offset, 8075 u64 root_objectid, u64 owner, u64 offset,
8259 struct btrfs_key *ins) 8076 struct btrfs_key *ins)
8260{ 8077{
8078 struct btrfs_fs_info *fs_info = trans->fs_info;
8261 int ret; 8079 int ret;
8262 struct btrfs_block_group_cache *block_group; 8080 struct btrfs_block_group_cache *block_group;
8263 struct btrfs_space_info *space_info; 8081 struct btrfs_space_info *space_info;
@@ -8285,15 +8103,15 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8285 spin_unlock(&block_group->lock); 8103 spin_unlock(&block_group->lock);
8286 spin_unlock(&space_info->lock); 8104 spin_unlock(&space_info->lock);
8287 8105
8288 ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid, 8106 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
8289 0, owner, offset, ins, 1); 8107 offset, ins, 1);
8290 btrfs_put_block_group(block_group); 8108 btrfs_put_block_group(block_group);
8291 return ret; 8109 return ret;
8292} 8110}
8293 8111
8294static struct extent_buffer * 8112static struct extent_buffer *
8295btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, 8113btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8296 u64 bytenr, int level) 8114 u64 bytenr, int level, u64 owner)
8297{ 8115{
8298 struct btrfs_fs_info *fs_info = root->fs_info; 8116 struct btrfs_fs_info *fs_info = root->fs_info;
8299 struct extent_buffer *buf; 8117 struct extent_buffer *buf;
@@ -8302,7 +8120,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8302 if (IS_ERR(buf)) 8120 if (IS_ERR(buf))
8303 return buf; 8121 return buf;
8304 8122
8305 btrfs_set_header_generation(buf, trans->transid);
8306 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 8123 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8307 btrfs_tree_lock(buf); 8124 btrfs_tree_lock(buf);
8308 clean_tree_block(fs_info, buf); 8125 clean_tree_block(fs_info, buf);
@@ -8311,6 +8128,14 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8311 btrfs_set_lock_blocking(buf); 8128 btrfs_set_lock_blocking(buf);
8312 set_extent_buffer_uptodate(buf); 8129 set_extent_buffer_uptodate(buf);
8313 8130
8131 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
8132 btrfs_set_header_level(buf, level);
8133 btrfs_set_header_bytenr(buf, buf->start);
8134 btrfs_set_header_generation(buf, trans->transid);
8135 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
8136 btrfs_set_header_owner(buf, owner);
8137 write_extent_buffer_fsid(buf, fs_info->fsid);
8138 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
8314 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 8139 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8315 buf->log_index = root->log_transid % 2; 8140 buf->log_index = root->log_transid % 2;
8316 /* 8141 /*
@@ -8419,7 +8244,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8419#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 8244#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8420 if (btrfs_is_testing(fs_info)) { 8245 if (btrfs_is_testing(fs_info)) {
8421 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, 8246 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8422 level); 8247 level, root_objectid);
8423 if (!IS_ERR(buf)) 8248 if (!IS_ERR(buf))
8424 root->alloc_bytenr += blocksize; 8249 root->alloc_bytenr += blocksize;
8425 return buf; 8250 return buf;
@@ -8435,7 +8260,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8435 if (ret) 8260 if (ret)
8436 goto out_unuse; 8261 goto out_unuse;
8437 8262
8438 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); 8263 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
8264 root_objectid);
8439 if (IS_ERR(buf)) { 8265 if (IS_ERR(buf)) {
8440 ret = PTR_ERR(buf); 8266 ret = PTR_ERR(buf);
8441 goto out_free_reserved; 8267 goto out_free_reserved;
@@ -8467,7 +8293,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8467 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, 8293 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
8468 root_objectid, level, 0, 8294 root_objectid, level, 0,
8469 BTRFS_ADD_DELAYED_EXTENT); 8295 BTRFS_ADD_DELAYED_EXTENT);
8470 ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, 8296 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
8471 ins.offset, parent, 8297 ins.offset, parent,
8472 root_objectid, level, 8298 root_objectid, level,
8473 BTRFS_ADD_DELAYED_EXTENT, 8299 BTRFS_ADD_DELAYED_EXTENT,
@@ -8499,7 +8325,6 @@ struct walk_control {
8499 int keep_locks; 8325 int keep_locks;
8500 int reada_slot; 8326 int reada_slot;
8501 int reada_count; 8327 int reada_count;
8502 int for_reloc;
8503}; 8328};
8504 8329
8505#define DROP_REFERENCE 1 8330#define DROP_REFERENCE 1
@@ -8819,7 +8644,7 @@ skip:
8819 } 8644 }
8820 8645
8821 if (need_account) { 8646 if (need_account) {
8822 ret = btrfs_qgroup_trace_subtree(trans, root, next, 8647 ret = btrfs_qgroup_trace_subtree(trans, next,
8823 generation, level - 1); 8648 generation, level - 1);
8824 if (ret) { 8649 if (ret) {
8825 btrfs_err_rl(fs_info, 8650 btrfs_err_rl(fs_info,
@@ -8919,7 +8744,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8919 else 8744 else
8920 ret = btrfs_dec_ref(trans, root, eb, 0); 8745 ret = btrfs_dec_ref(trans, root, eb, 0);
8921 BUG_ON(ret); /* -ENOMEM */ 8746 BUG_ON(ret); /* -ENOMEM */
8922 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb); 8747 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
8923 if (ret) { 8748 if (ret) {
8924 btrfs_err_rl(fs_info, 8749 btrfs_err_rl(fs_info,
8925 "error %d accounting leaf items. Quota is out of sync, rescan required.", 8750 "error %d accounting leaf items. Quota is out of sync, rescan required.",
@@ -9136,7 +8961,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
9136 wc->stage = DROP_REFERENCE; 8961 wc->stage = DROP_REFERENCE;
9137 wc->update_ref = update_ref; 8962 wc->update_ref = update_ref;
9138 wc->keep_locks = 0; 8963 wc->keep_locks = 0;
9139 wc->for_reloc = for_reloc;
9140 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 8964 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9141 8965
9142 while (1) { 8966 while (1) {
@@ -9199,7 +9023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
9199 if (err) 9023 if (err)
9200 goto out_end_trans; 9024 goto out_end_trans;
9201 9025
9202 ret = btrfs_del_root(trans, fs_info, &root->root_key); 9026 ret = btrfs_del_root(trans, &root->root_key);
9203 if (ret) { 9027 if (ret) {
9204 btrfs_abort_transaction(trans, ret); 9028 btrfs_abort_transaction(trans, ret);
9205 err = ret; 9029 err = ret;
@@ -9302,7 +9126,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9302 wc->stage = DROP_REFERENCE; 9126 wc->stage = DROP_REFERENCE;
9303 wc->update_ref = 0; 9127 wc->update_ref = 0;
9304 wc->keep_locks = 1; 9128 wc->keep_locks = 1;
9305 wc->for_reloc = 1;
9306 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 9129 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9307 9130
9308 while (1) { 9131 while (1) {
@@ -9417,10 +9240,10 @@ out:
9417 return ret; 9240 return ret;
9418} 9241}
9419 9242
9420int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info, 9243int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
9421 struct btrfs_block_group_cache *cache)
9422 9244
9423{ 9245{
9246 struct btrfs_fs_info *fs_info = cache->fs_info;
9424 struct btrfs_trans_handle *trans; 9247 struct btrfs_trans_handle *trans;
9425 u64 alloc_flags; 9248 u64 alloc_flags;
9426 int ret; 9249 int ret;
@@ -9454,7 +9277,7 @@ again:
9454 */ 9277 */
9455 alloc_flags = update_block_group_flags(fs_info, cache->flags); 9278 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9456 if (alloc_flags != cache->flags) { 9279 if (alloc_flags != cache->flags) {
9457 ret = do_chunk_alloc(trans, fs_info, alloc_flags, 9280 ret = do_chunk_alloc(trans, alloc_flags,
9458 CHUNK_ALLOC_FORCE); 9281 CHUNK_ALLOC_FORCE);
9459 /* 9282 /*
9460 * ENOSPC is allowed here, we may have enough space 9283 * ENOSPC is allowed here, we may have enough space
@@ -9471,8 +9294,7 @@ again:
9471 if (!ret) 9294 if (!ret)
9472 goto out; 9295 goto out;
9473 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); 9296 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9474 ret = do_chunk_alloc(trans, fs_info, alloc_flags, 9297 ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9475 CHUNK_ALLOC_FORCE);
9476 if (ret < 0) 9298 if (ret < 0)
9477 goto out; 9299 goto out;
9478 ret = inc_block_group_ro(cache, 0); 9300 ret = inc_block_group_ro(cache, 0);
@@ -9480,7 +9302,7 @@ out:
9480 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { 9302 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9481 alloc_flags = update_block_group_flags(fs_info, cache->flags); 9303 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9482 mutex_lock(&fs_info->chunk_mutex); 9304 mutex_lock(&fs_info->chunk_mutex);
9483 check_system_chunk(trans, fs_info, alloc_flags); 9305 check_system_chunk(trans, alloc_flags);
9484 mutex_unlock(&fs_info->chunk_mutex); 9306 mutex_unlock(&fs_info->chunk_mutex);
9485 } 9307 }
9486 mutex_unlock(&fs_info->ro_block_group_mutex); 9308 mutex_unlock(&fs_info->ro_block_group_mutex);
@@ -9489,12 +9311,11 @@ out:
9489 return ret; 9311 return ret;
9490} 9312}
9491 9313
9492int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 9314int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
9493 struct btrfs_fs_info *fs_info, u64 type)
9494{ 9315{
9495 u64 alloc_flags = get_alloc_profile(fs_info, type); 9316 u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
9496 9317
9497 return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE); 9318 return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9498} 9319}
9499 9320
9500/* 9321/*
@@ -9520,13 +9341,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9520 continue; 9341 continue;
9521 } 9342 }
9522 9343
9523 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 9344 factor = btrfs_bg_type_to_factor(block_group->flags);
9524 BTRFS_BLOCK_GROUP_RAID10 |
9525 BTRFS_BLOCK_GROUP_DUP))
9526 factor = 2;
9527 else
9528 factor = 1;
9529
9530 free_bytes += (block_group->key.offset - 9345 free_bytes += (block_group->key.offset -
9531 btrfs_block_group_used(&block_group->item)) * 9346 btrfs_block_group_used(&block_group->item)) *
9532 factor; 9347 factor;
@@ -9717,6 +9532,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
9717 int ret = 0; 9532 int ret = 0;
9718 struct btrfs_key found_key; 9533 struct btrfs_key found_key;
9719 struct extent_buffer *leaf; 9534 struct extent_buffer *leaf;
9535 struct btrfs_block_group_item bg;
9536 u64 flags;
9720 int slot; 9537 int slot;
9721 9538
9722 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 9539 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9751,8 +9568,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
9751 "logical %llu len %llu found bg but no related chunk", 9568 "logical %llu len %llu found bg but no related chunk",
9752 found_key.objectid, found_key.offset); 9569 found_key.objectid, found_key.offset);
9753 ret = -ENOENT; 9570 ret = -ENOENT;
9571 } else if (em->start != found_key.objectid ||
9572 em->len != found_key.offset) {
9573 btrfs_err(fs_info,
9574 "block group %llu len %llu mismatch with chunk %llu len %llu",
9575 found_key.objectid, found_key.offset,
9576 em->start, em->len);
9577 ret = -EUCLEAN;
9754 } else { 9578 } else {
9755 ret = 0; 9579 read_extent_buffer(leaf, &bg,
9580 btrfs_item_ptr_offset(leaf, slot),
9581 sizeof(bg));
9582 flags = btrfs_block_group_flags(&bg) &
9583 BTRFS_BLOCK_GROUP_TYPE_MASK;
9584
9585 if (flags != (em->map_lookup->type &
9586 BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9587 btrfs_err(fs_info,
9588"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
9589 found_key.objectid,
9590 found_key.offset, flags,
9591 (BTRFS_BLOCK_GROUP_TYPE_MASK &
9592 em->map_lookup->type));
9593 ret = -EUCLEAN;
9594 } else {
9595 ret = 0;
9596 }
9756 } 9597 }
9757 free_extent_map(em); 9598 free_extent_map(em);
9758 goto out; 9599 goto out;
@@ -9847,7 +9688,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
9847 */ 9688 */
9848 if (block_group->cached == BTRFS_CACHE_NO || 9689 if (block_group->cached == BTRFS_CACHE_NO ||
9849 block_group->cached == BTRFS_CACHE_ERROR) 9690 block_group->cached == BTRFS_CACHE_ERROR)
9850 free_excluded_extents(info, block_group); 9691 free_excluded_extents(block_group);
9851 9692
9852 btrfs_remove_free_space_cache(block_group); 9693 btrfs_remove_free_space_cache(block_group);
9853 ASSERT(block_group->cached != BTRFS_CACHE_STARTED); 9694 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
@@ -10003,6 +9844,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10003 return cache; 9844 return cache;
10004} 9845}
10005 9846
9847
9848/*
9849 * Iterate all chunks and verify that each of them has the corresponding block
9850 * group
9851 */
9852static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
9853{
9854 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
9855 struct extent_map *em;
9856 struct btrfs_block_group_cache *bg;
9857 u64 start = 0;
9858 int ret = 0;
9859
9860 while (1) {
9861 read_lock(&map_tree->map_tree.lock);
9862 /*
9863 * lookup_extent_mapping will return the first extent map
9864 * intersecting the range, so setting @len to 1 is enough to
9865 * get the first chunk.
9866 */
9867 em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
9868 read_unlock(&map_tree->map_tree.lock);
9869 if (!em)
9870 break;
9871
9872 bg = btrfs_lookup_block_group(fs_info, em->start);
9873 if (!bg) {
9874 btrfs_err(fs_info,
9875 "chunk start=%llu len=%llu doesn't have corresponding block group",
9876 em->start, em->len);
9877 ret = -EUCLEAN;
9878 free_extent_map(em);
9879 break;
9880 }
9881 if (bg->key.objectid != em->start ||
9882 bg->key.offset != em->len ||
9883 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
9884 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9885 btrfs_err(fs_info,
9886"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
9887 em->start, em->len,
9888 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
9889 bg->key.objectid, bg->key.offset,
9890 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
9891 ret = -EUCLEAN;
9892 free_extent_map(em);
9893 btrfs_put_block_group(bg);
9894 break;
9895 }
9896 start = em->start + em->len;
9897 free_extent_map(em);
9898 btrfs_put_block_group(bg);
9899 }
9900 return ret;
9901}
9902
10006int btrfs_read_block_groups(struct btrfs_fs_info *info) 9903int btrfs_read_block_groups(struct btrfs_fs_info *info)
10007{ 9904{
10008 struct btrfs_path *path; 9905 struct btrfs_path *path;
@@ -10089,13 +9986,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10089 * info has super bytes accounted for, otherwise we'll think 9986 * info has super bytes accounted for, otherwise we'll think
10090 * we have more space than we actually do. 9987 * we have more space than we actually do.
10091 */ 9988 */
10092 ret = exclude_super_stripes(info, cache); 9989 ret = exclude_super_stripes(cache);
10093 if (ret) { 9990 if (ret) {
10094 /* 9991 /*
10095 * We may have excluded something, so call this just in 9992 * We may have excluded something, so call this just in
10096 * case. 9993 * case.
10097 */ 9994 */
10098 free_excluded_extents(info, cache); 9995 free_excluded_extents(cache);
10099 btrfs_put_block_group(cache); 9996 btrfs_put_block_group(cache);
10100 goto error; 9997 goto error;
10101 } 9998 }
@@ -10110,14 +10007,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10110 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 10007 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10111 cache->last_byte_to_unpin = (u64)-1; 10008 cache->last_byte_to_unpin = (u64)-1;
10112 cache->cached = BTRFS_CACHE_FINISHED; 10009 cache->cached = BTRFS_CACHE_FINISHED;
10113 free_excluded_extents(info, cache); 10010 free_excluded_extents(cache);
10114 } else if (btrfs_block_group_used(&cache->item) == 0) { 10011 } else if (btrfs_block_group_used(&cache->item) == 0) {
10115 cache->last_byte_to_unpin = (u64)-1; 10012 cache->last_byte_to_unpin = (u64)-1;
10116 cache->cached = BTRFS_CACHE_FINISHED; 10013 cache->cached = BTRFS_CACHE_FINISHED;
10117 add_new_free_space(cache, found_key.objectid, 10014 add_new_free_space(cache, found_key.objectid,
10118 found_key.objectid + 10015 found_key.objectid +
10119 found_key.offset); 10016 found_key.offset);
10120 free_excluded_extents(info, cache); 10017 free_excluded_extents(cache);
10121 } 10018 }
10122 10019
10123 ret = btrfs_add_block_group_cache(info, cache); 10020 ret = btrfs_add_block_group_cache(info, cache);
@@ -10140,15 +10037,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10140 if (btrfs_chunk_readonly(info, cache->key.objectid)) { 10037 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
10141 inc_block_group_ro(cache, 1); 10038 inc_block_group_ro(cache, 1);
10142 } else if (btrfs_block_group_used(&cache->item) == 0) { 10039 } else if (btrfs_block_group_used(&cache->item) == 0) {
10143 spin_lock(&info->unused_bgs_lock); 10040 ASSERT(list_empty(&cache->bg_list));
10144 /* Should always be true but just in case. */ 10041 btrfs_mark_bg_unused(cache);
10145 if (list_empty(&cache->bg_list)) {
10146 btrfs_get_block_group(cache);
10147 trace_btrfs_add_unused_block_group(cache);
10148 list_add_tail(&cache->bg_list,
10149 &info->unused_bgs);
10150 }
10151 spin_unlock(&info->unused_bgs_lock);
10152 } 10042 }
10153 } 10043 }
10154 10044
@@ -10176,7 +10066,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
10176 10066
10177 btrfs_add_raid_kobjects(info); 10067 btrfs_add_raid_kobjects(info);
10178 init_global_block_rsv(info); 10068 init_global_block_rsv(info);
10179 ret = 0; 10069 ret = check_chunk_block_group_mappings(info);
10180error: 10070error:
10181 btrfs_free_path(path); 10071 btrfs_free_path(path);
10182 return ret; 10072 return ret;
@@ -10206,8 +10096,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
10206 sizeof(item)); 10096 sizeof(item));
10207 if (ret) 10097 if (ret)
10208 btrfs_abort_transaction(trans, ret); 10098 btrfs_abort_transaction(trans, ret);
10209 ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid, 10099 ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
10210 key.offset);
10211 if (ret) 10100 if (ret)
10212 btrfs_abort_transaction(trans, ret); 10101 btrfs_abort_transaction(trans, ret);
10213 add_block_group_free_space(trans, block_group); 10102 add_block_group_free_space(trans, block_group);
@@ -10218,10 +10107,10 @@ next:
10218 trans->can_flush_pending_bgs = can_flush_pending_bgs; 10107 trans->can_flush_pending_bgs = can_flush_pending_bgs;
10219} 10108}
10220 10109
10221int btrfs_make_block_group(struct btrfs_trans_handle *trans, 10110int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10222 struct btrfs_fs_info *fs_info, u64 bytes_used,
10223 u64 type, u64 chunk_offset, u64 size) 10111 u64 type, u64 chunk_offset, u64 size)
10224{ 10112{
10113 struct btrfs_fs_info *fs_info = trans->fs_info;
10225 struct btrfs_block_group_cache *cache; 10114 struct btrfs_block_group_cache *cache;
10226 int ret; 10115 int ret;
10227 10116
@@ -10240,20 +10129,20 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10240 cache->last_byte_to_unpin = (u64)-1; 10129 cache->last_byte_to_unpin = (u64)-1;
10241 cache->cached = BTRFS_CACHE_FINISHED; 10130 cache->cached = BTRFS_CACHE_FINISHED;
10242 cache->needs_free_space = 1; 10131 cache->needs_free_space = 1;
10243 ret = exclude_super_stripes(fs_info, cache); 10132 ret = exclude_super_stripes(cache);
10244 if (ret) { 10133 if (ret) {
10245 /* 10134 /*
10246 * We may have excluded something, so call this just in 10135 * We may have excluded something, so call this just in
10247 * case. 10136 * case.
10248 */ 10137 */
10249 free_excluded_extents(fs_info, cache); 10138 free_excluded_extents(cache);
10250 btrfs_put_block_group(cache); 10139 btrfs_put_block_group(cache);
10251 return ret; 10140 return ret;
10252 } 10141 }
10253 10142
10254 add_new_free_space(cache, chunk_offset, chunk_offset + size); 10143 add_new_free_space(cache, chunk_offset, chunk_offset + size);
10255 10144
10256 free_excluded_extents(fs_info, cache); 10145 free_excluded_extents(cache);
10257 10146
10258#ifdef CONFIG_BTRFS_DEBUG 10147#ifdef CONFIG_BTRFS_DEBUG
10259 if (btrfs_should_fragment_free_space(cache)) { 10148 if (btrfs_should_fragment_free_space(cache)) {
@@ -10311,9 +10200,9 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10311} 10200}
10312 10201
10313int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 10202int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10314 struct btrfs_fs_info *fs_info, u64 group_start, 10203 u64 group_start, struct extent_map *em)
10315 struct extent_map *em)
10316{ 10204{
10205 struct btrfs_fs_info *fs_info = trans->fs_info;
10317 struct btrfs_root *root = fs_info->extent_root; 10206 struct btrfs_root *root = fs_info->extent_root;
10318 struct btrfs_path *path; 10207 struct btrfs_path *path;
10319 struct btrfs_block_group_cache *block_group; 10208 struct btrfs_block_group_cache *block_group;
@@ -10337,18 +10226,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10337 * Free the reserved super bytes from this block group before 10226 * Free the reserved super bytes from this block group before
10338 * remove it. 10227 * remove it.
10339 */ 10228 */
10340 free_excluded_extents(fs_info, block_group); 10229 free_excluded_extents(block_group);
10341 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, 10230 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10342 block_group->key.offset); 10231 block_group->key.offset);
10343 10232
10344 memcpy(&key, &block_group->key, sizeof(key)); 10233 memcpy(&key, &block_group->key, sizeof(key));
10345 index = btrfs_bg_flags_to_raid_index(block_group->flags); 10234 index = btrfs_bg_flags_to_raid_index(block_group->flags);
10346 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 10235 factor = btrfs_bg_type_to_factor(block_group->flags);
10347 BTRFS_BLOCK_GROUP_RAID1 |
10348 BTRFS_BLOCK_GROUP_RAID10))
10349 factor = 2;
10350 else
10351 factor = 1;
10352 10236
10353 /* make sure this block group isn't part of an allocation cluster */ 10237 /* make sure this block group isn't part of an allocation cluster */
10354 cluster = &fs_info->data_alloc_cluster; 10238 cluster = &fs_info->data_alloc_cluster;
@@ -10687,7 +10571,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10687 /* Don't want to race with allocators so take the groups_sem */ 10571 /* Don't want to race with allocators so take the groups_sem */
10688 down_write(&space_info->groups_sem); 10572 down_write(&space_info->groups_sem);
10689 spin_lock(&block_group->lock); 10573 spin_lock(&block_group->lock);
10690 if (block_group->reserved || 10574 if (block_group->reserved || block_group->pinned ||
10691 btrfs_block_group_used(&block_group->item) || 10575 btrfs_block_group_used(&block_group->item) ||
10692 block_group->ro || 10576 block_group->ro ||
10693 list_is_singular(&block_group->list)) { 10577 list_is_singular(&block_group->list)) {
@@ -10764,8 +10648,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10764 10648
10765 space_info->bytes_pinned -= block_group->pinned; 10649 space_info->bytes_pinned -= block_group->pinned;
10766 space_info->bytes_readonly += block_group->pinned; 10650 space_info->bytes_readonly += block_group->pinned;
10767 percpu_counter_add(&space_info->total_bytes_pinned, 10651 percpu_counter_add_batch(&space_info->total_bytes_pinned,
10768 -block_group->pinned); 10652 -block_group->pinned,
10653 BTRFS_TOTAL_BYTES_PINNED_BATCH);
10769 block_group->pinned = 0; 10654 block_group->pinned = 0;
10770 10655
10771 spin_unlock(&block_group->lock); 10656 spin_unlock(&block_group->lock);
@@ -10782,8 +10667,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10782 * Btrfs_remove_chunk will abort the transaction if things go 10667 * Btrfs_remove_chunk will abort the transaction if things go
10783 * horribly wrong. 10668 * horribly wrong.
10784 */ 10669 */
10785 ret = btrfs_remove_chunk(trans, fs_info, 10670 ret = btrfs_remove_chunk(trans, block_group->key.objectid);
10786 block_group->key.objectid);
10787 10671
10788 if (ret) { 10672 if (ret) {
10789 if (trimming) 10673 if (trimming)
@@ -11066,3 +10950,16 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11066 !atomic_read(&root->will_be_snapshotted)); 10950 !atomic_read(&root->will_be_snapshotted));
11067 } 10951 }
11068} 10952}
10953
10954void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
10955{
10956 struct btrfs_fs_info *fs_info = bg->fs_info;
10957
10958 spin_lock(&fs_info->unused_bgs_lock);
10959 if (list_empty(&bg->bg_list)) {
10960 btrfs_get_block_group(bg);
10961 trace_btrfs_add_unused_block_group(bg);
10962 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
10963 }
10964 spin_unlock(&fs_info->unused_bgs_lock);
10965}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b3e45714d28f..628f1aef34b0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -140,14 +140,6 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits,
140 140
141static void flush_write_bio(struct extent_page_data *epd); 141static void flush_write_bio(struct extent_page_data *epd);
142 142
143static inline struct btrfs_fs_info *
144tree_fs_info(struct extent_io_tree *tree)
145{
146 if (tree->ops)
147 return tree->ops->tree_fs_info(tree->private_data);
148 return NULL;
149}
150
151int __init extent_io_init(void) 143int __init extent_io_init(void)
152{ 144{
153 extent_state_cache = kmem_cache_create("btrfs_extent_state", 145 extent_state_cache = kmem_cache_create("btrfs_extent_state",
@@ -564,8 +556,10 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
564 556
565static void extent_io_tree_panic(struct extent_io_tree *tree, int err) 557static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
566{ 558{
567 btrfs_panic(tree_fs_info(tree), err, 559 struct inode *inode = tree->private_data;
568 "Locking error: Extent tree was modified by another thread while locked."); 560
561 btrfs_panic(btrfs_sb(inode->i_sb), err,
562 "locking error: extent tree was modified by another thread while locked");
569} 563}
570 564
571/* 565/*
@@ -1386,14 +1380,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1386 } 1380 }
1387} 1381}
1388 1382
1389/*
1390 * helper function to set both pages and extents in the tree writeback
1391 */
1392static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1393{
1394 tree->ops->set_range_writeback(tree->private_data, start, end);
1395}
1396
1397/* find the first state struct with 'bits' set after 'start', and 1383/* find the first state struct with 'bits' set after 'start', and
1398 * return it. tree->lock must be held. NULL will returned if 1384 * return it. tree->lock must be held. NULL will returned if
1399 * nothing was found after 'start' 1385 * nothing was found after 'start'
@@ -2059,7 +2045,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2059 struct extent_buffer *eb, int mirror_num) 2045 struct extent_buffer *eb, int mirror_num)
2060{ 2046{
2061 u64 start = eb->start; 2047 u64 start = eb->start;
2062 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); 2048 int i, num_pages = num_extent_pages(eb);
2063 int ret = 0; 2049 int ret = 0;
2064 2050
2065 if (sb_rdonly(fs_info->sb)) 2051 if (sb_rdonly(fs_info->sb))
@@ -2398,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2398 start - page_offset(page), 2384 start - page_offset(page),
2399 (int)phy_offset, failed_bio->bi_end_io, 2385 (int)phy_offset, failed_bio->bi_end_io,
2400 NULL); 2386 NULL);
2401 bio_set_op_attrs(bio, REQ_OP_READ, read_mode); 2387 bio->bi_opf = REQ_OP_READ | read_mode;
2402 2388
2403 btrfs_debug(btrfs_sb(inode->i_sb), 2389 btrfs_debug(btrfs_sb(inode->i_sb),
2404 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", 2390 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
@@ -2790,8 +2776,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2790 else 2776 else
2791 contig = bio_end_sector(bio) == sector; 2777 contig = bio_end_sector(bio) == sector;
2792 2778
2793 if (tree->ops && tree->ops->merge_bio_hook(page, offset, 2779 if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
2794 page_size, bio, bio_flags)) 2780 bio, bio_flags))
2795 can_merge = false; 2781 can_merge = false;
2796 2782
2797 if (prev_bio_flags != bio_flags || !contig || !can_merge || 2783 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
@@ -3422,7 +3408,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3422 continue; 3408 continue;
3423 } 3409 }
3424 3410
3425 set_range_writeback(tree, cur, cur + iosize - 1); 3411 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3426 if (!PageWriteback(page)) { 3412 if (!PageWriteback(page)) {
3427 btrfs_err(BTRFS_I(inode)->root->fs_info, 3413 btrfs_err(BTRFS_I(inode)->root->fs_info,
3428 "page %lu not writeback, cur %llu end %llu", 3414 "page %lu not writeback, cur %llu end %llu",
@@ -3538,7 +3524,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
3538 struct btrfs_fs_info *fs_info, 3524 struct btrfs_fs_info *fs_info,
3539 struct extent_page_data *epd) 3525 struct extent_page_data *epd)
3540{ 3526{
3541 unsigned long i, num_pages; 3527 int i, num_pages;
3542 int flush = 0; 3528 int flush = 0;
3543 int ret = 0; 3529 int ret = 0;
3544 3530
@@ -3588,7 +3574,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
3588 if (!ret) 3574 if (!ret)
3589 return ret; 3575 return ret;
3590 3576
3591 num_pages = num_extent_pages(eb->start, eb->len); 3577 num_pages = num_extent_pages(eb);
3592 for (i = 0; i < num_pages; i++) { 3578 for (i = 0; i < num_pages; i++) {
3593 struct page *p = eb->pages[i]; 3579 struct page *p = eb->pages[i];
3594 3580
@@ -3712,13 +3698,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3712 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree; 3698 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3713 u64 offset = eb->start; 3699 u64 offset = eb->start;
3714 u32 nritems; 3700 u32 nritems;
3715 unsigned long i, num_pages; 3701 int i, num_pages;
3716 unsigned long start, end; 3702 unsigned long start, end;
3717 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; 3703 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3718 int ret = 0; 3704 int ret = 0;
3719 3705
3720 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); 3706 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3721 num_pages = num_extent_pages(eb->start, eb->len); 3707 num_pages = num_extent_pages(eb);
3722 atomic_set(&eb->io_pages, num_pages); 3708 atomic_set(&eb->io_pages, num_pages);
3723 3709
3724 /* set btree blocks beyond nritems with 0 to avoid stale content. */ 3710 /* set btree blocks beyond nritems with 0 to avoid stale content. */
@@ -4643,23 +4629,20 @@ int extent_buffer_under_io(struct extent_buffer *eb)
4643} 4629}
4644 4630
4645/* 4631/*
4646 * Helper for releasing extent buffer page. 4632 * Release all pages attached to the extent buffer.
4647 */ 4633 */
4648static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) 4634static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4649{ 4635{
4650 unsigned long index; 4636 int i;
4651 struct page *page; 4637 int num_pages;
4652 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4638 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4653 4639
4654 BUG_ON(extent_buffer_under_io(eb)); 4640 BUG_ON(extent_buffer_under_io(eb));
4655 4641
4656 index = num_extent_pages(eb->start, eb->len); 4642 num_pages = num_extent_pages(eb);
4657 if (index == 0) 4643 for (i = 0; i < num_pages; i++) {
4658 return; 4644 struct page *page = eb->pages[i];
4659 4645
4660 do {
4661 index--;
4662 page = eb->pages[index];
4663 if (!page) 4646 if (!page)
4664 continue; 4647 continue;
4665 if (mapped) 4648 if (mapped)
@@ -4691,7 +4674,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4691 4674
4692 /* One for when we allocated the page */ 4675 /* One for when we allocated the page */
4693 put_page(page); 4676 put_page(page);
4694 } while (index != 0); 4677 }
4695} 4678}
4696 4679
4697/* 4680/*
@@ -4699,7 +4682,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4699 */ 4682 */
4700static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) 4683static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4701{ 4684{
4702 btrfs_release_extent_buffer_page(eb); 4685 btrfs_release_extent_buffer_pages(eb);
4703 __free_extent_buffer(eb); 4686 __free_extent_buffer(eb);
4704} 4687}
4705 4688
@@ -4743,10 +4726,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4743 4726
4744struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) 4727struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4745{ 4728{
4746 unsigned long i; 4729 int i;
4747 struct page *p; 4730 struct page *p;
4748 struct extent_buffer *new; 4731 struct extent_buffer *new;
4749 unsigned long num_pages = num_extent_pages(src->start, src->len); 4732 int num_pages = num_extent_pages(src);
4750 4733
4751 new = __alloc_extent_buffer(src->fs_info, src->start, src->len); 4734 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4752 if (new == NULL) 4735 if (new == NULL)
@@ -4766,7 +4749,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4766 } 4749 }
4767 4750
4768 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); 4751 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4769 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); 4752 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
4770 4753
4771 return new; 4754 return new;
4772} 4755}
@@ -4775,15 +4758,14 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4775 u64 start, unsigned long len) 4758 u64 start, unsigned long len)
4776{ 4759{
4777 struct extent_buffer *eb; 4760 struct extent_buffer *eb;
4778 unsigned long num_pages; 4761 int num_pages;
4779 unsigned long i; 4762 int i;
4780
4781 num_pages = num_extent_pages(start, len);
4782 4763
4783 eb = __alloc_extent_buffer(fs_info, start, len); 4764 eb = __alloc_extent_buffer(fs_info, start, len);
4784 if (!eb) 4765 if (!eb)
4785 return NULL; 4766 return NULL;
4786 4767
4768 num_pages = num_extent_pages(eb);
4787 for (i = 0; i < num_pages; i++) { 4769 for (i = 0; i < num_pages; i++) {
4788 eb->pages[i] = alloc_page(GFP_NOFS); 4770 eb->pages[i] = alloc_page(GFP_NOFS);
4789 if (!eb->pages[i]) 4771 if (!eb->pages[i])
@@ -4791,7 +4773,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4791 } 4773 }
4792 set_extent_buffer_uptodate(eb); 4774 set_extent_buffer_uptodate(eb);
4793 btrfs_set_header_nritems(eb, 0); 4775 btrfs_set_header_nritems(eb, 0);
4794 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); 4776 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4795 4777
4796 return eb; 4778 return eb;
4797err: 4779err:
@@ -4843,11 +4825,11 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
4843static void mark_extent_buffer_accessed(struct extent_buffer *eb, 4825static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4844 struct page *accessed) 4826 struct page *accessed)
4845{ 4827{
4846 unsigned long num_pages, i; 4828 int num_pages, i;
4847 4829
4848 check_buffer_tree_ref(eb); 4830 check_buffer_tree_ref(eb);
4849 4831
4850 num_pages = num_extent_pages(eb->start, eb->len); 4832 num_pages = num_extent_pages(eb);
4851 for (i = 0; i < num_pages; i++) { 4833 for (i = 0; i < num_pages; i++) {
4852 struct page *p = eb->pages[i]; 4834 struct page *p = eb->pages[i];
4853 4835
@@ -4944,8 +4926,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4944 u64 start) 4926 u64 start)
4945{ 4927{
4946 unsigned long len = fs_info->nodesize; 4928 unsigned long len = fs_info->nodesize;
4947 unsigned long num_pages = num_extent_pages(start, len); 4929 int num_pages;
4948 unsigned long i; 4930 int i;
4949 unsigned long index = start >> PAGE_SHIFT; 4931 unsigned long index = start >> PAGE_SHIFT;
4950 struct extent_buffer *eb; 4932 struct extent_buffer *eb;
4951 struct extent_buffer *exists = NULL; 4933 struct extent_buffer *exists = NULL;
@@ -4967,6 +4949,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4967 if (!eb) 4949 if (!eb)
4968 return ERR_PTR(-ENOMEM); 4950 return ERR_PTR(-ENOMEM);
4969 4951
4952 num_pages = num_extent_pages(eb);
4970 for (i = 0; i < num_pages; i++, index++) { 4953 for (i = 0; i < num_pages; i++, index++) {
4971 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); 4954 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4972 if (!p) { 4955 if (!p) {
@@ -5009,8 +4992,11 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
5009 uptodate = 0; 4992 uptodate = 0;
5010 4993
5011 /* 4994 /*
5012 * see below about how we avoid a nasty race with release page 4995 * We can't unlock the pages just yet since the extent buffer
5013 * and why we unlock later 4996 * hasn't been properly inserted in the radix tree, this
4997 * opens a race with btree_releasepage which can free a page
4998 * while we are still filling in all pages for the buffer and
4999 * we could crash.
5014 */ 5000 */
5015 } 5001 }
5016 if (uptodate) 5002 if (uptodate)
@@ -5039,21 +5025,12 @@ again:
5039 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); 5025 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5040 5026
5041 /* 5027 /*
5042 * there is a race where release page may have 5028 * Now it's safe to unlock the pages because any calls to
5043 * tried to find this extent buffer in the radix 5029 * btree_releasepage will correctly detect that a page belongs to a
5044 * but failed. It will tell the VM it is safe to 5030 * live buffer and won't free them prematurely.
5045 * reclaim the, and it will clear the page private bit.
5046 * We must make sure to set the page private bit properly
5047 * after the extent buffer is in the radix tree so
5048 * it doesn't get lost
5049 */ 5031 */
5050 SetPageChecked(eb->pages[0]); 5032 for (i = 0; i < num_pages; i++)
5051 for (i = 1; i < num_pages; i++) { 5033 unlock_page(eb->pages[i]);
5052 p = eb->pages[i];
5053 ClearPageChecked(p);
5054 unlock_page(p);
5055 }
5056 unlock_page(eb->pages[0]);
5057 return eb; 5034 return eb;
5058 5035
5059free_eb: 5036free_eb:
@@ -5075,9 +5052,10 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5075 __free_extent_buffer(eb); 5052 __free_extent_buffer(eb);
5076} 5053}
5077 5054
5078/* Expects to have eb->eb_lock already held */
5079static int release_extent_buffer(struct extent_buffer *eb) 5055static int release_extent_buffer(struct extent_buffer *eb)
5080{ 5056{
5057 lockdep_assert_held(&eb->refs_lock);
5058
5081 WARN_ON(atomic_read(&eb->refs) == 0); 5059 WARN_ON(atomic_read(&eb->refs) == 0);
5082 if (atomic_dec_and_test(&eb->refs)) { 5060 if (atomic_dec_and_test(&eb->refs)) {
5083 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { 5061 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
@@ -5094,9 +5072,9 @@ static int release_extent_buffer(struct extent_buffer *eb)
5094 } 5072 }
5095 5073
5096 /* Should be safe to release our pages at this point */ 5074 /* Should be safe to release our pages at this point */
5097 btrfs_release_extent_buffer_page(eb); 5075 btrfs_release_extent_buffer_pages(eb);
5098#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 5076#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5099 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { 5077 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5100 __free_extent_buffer(eb); 5078 __free_extent_buffer(eb);
5101 return 1; 5079 return 1;
5102 } 5080 }
@@ -5127,7 +5105,7 @@ void free_extent_buffer(struct extent_buffer *eb)
5127 5105
5128 spin_lock(&eb->refs_lock); 5106 spin_lock(&eb->refs_lock);
5129 if (atomic_read(&eb->refs) == 2 && 5107 if (atomic_read(&eb->refs) == 2 &&
5130 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) 5108 test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
5131 atomic_dec(&eb->refs); 5109 atomic_dec(&eb->refs);
5132 5110
5133 if (atomic_read(&eb->refs) == 2 && 5111 if (atomic_read(&eb->refs) == 2 &&
@@ -5159,11 +5137,11 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
5159 5137
5160void clear_extent_buffer_dirty(struct extent_buffer *eb) 5138void clear_extent_buffer_dirty(struct extent_buffer *eb)
5161{ 5139{
5162 unsigned long i; 5140 int i;
5163 unsigned long num_pages; 5141 int num_pages;
5164 struct page *page; 5142 struct page *page;
5165 5143
5166 num_pages = num_extent_pages(eb->start, eb->len); 5144 num_pages = num_extent_pages(eb);
5167 5145
5168 for (i = 0; i < num_pages; i++) { 5146 for (i = 0; i < num_pages; i++) {
5169 page = eb->pages[i]; 5147 page = eb->pages[i];
@@ -5189,15 +5167,15 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
5189 5167
5190int set_extent_buffer_dirty(struct extent_buffer *eb) 5168int set_extent_buffer_dirty(struct extent_buffer *eb)
5191{ 5169{
5192 unsigned long i; 5170 int i;
5193 unsigned long num_pages; 5171 int num_pages;
5194 int was_dirty = 0; 5172 int was_dirty = 0;
5195 5173
5196 check_buffer_tree_ref(eb); 5174 check_buffer_tree_ref(eb);
5197 5175
5198 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 5176 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5199 5177
5200 num_pages = num_extent_pages(eb->start, eb->len); 5178 num_pages = num_extent_pages(eb);
5201 WARN_ON(atomic_read(&eb->refs) == 0); 5179 WARN_ON(atomic_read(&eb->refs) == 0);
5202 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); 5180 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5203 5181
@@ -5208,12 +5186,12 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
5208 5186
5209void clear_extent_buffer_uptodate(struct extent_buffer *eb) 5187void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5210{ 5188{
5211 unsigned long i; 5189 int i;
5212 struct page *page; 5190 struct page *page;
5213 unsigned long num_pages; 5191 int num_pages;
5214 5192
5215 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 5193 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5216 num_pages = num_extent_pages(eb->start, eb->len); 5194 num_pages = num_extent_pages(eb);
5217 for (i = 0; i < num_pages; i++) { 5195 for (i = 0; i < num_pages; i++) {
5218 page = eb->pages[i]; 5196 page = eb->pages[i];
5219 if (page) 5197 if (page)
@@ -5223,12 +5201,12 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5223 5201
5224void set_extent_buffer_uptodate(struct extent_buffer *eb) 5202void set_extent_buffer_uptodate(struct extent_buffer *eb)
5225{ 5203{
5226 unsigned long i; 5204 int i;
5227 struct page *page; 5205 struct page *page;
5228 unsigned long num_pages; 5206 int num_pages;
5229 5207
5230 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 5208 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5231 num_pages = num_extent_pages(eb->start, eb->len); 5209 num_pages = num_extent_pages(eb);
5232 for (i = 0; i < num_pages; i++) { 5210 for (i = 0; i < num_pages; i++) {
5233 page = eb->pages[i]; 5211 page = eb->pages[i];
5234 SetPageUptodate(page); 5212 SetPageUptodate(page);
@@ -5238,13 +5216,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
5238int read_extent_buffer_pages(struct extent_io_tree *tree, 5216int read_extent_buffer_pages(struct extent_io_tree *tree,
5239 struct extent_buffer *eb, int wait, int mirror_num) 5217 struct extent_buffer *eb, int wait, int mirror_num)
5240{ 5218{
5241 unsigned long i; 5219 int i;
5242 struct page *page; 5220 struct page *page;
5243 int err; 5221 int err;
5244 int ret = 0; 5222 int ret = 0;
5245 int locked_pages = 0; 5223 int locked_pages = 0;
5246 int all_uptodate = 1; 5224 int all_uptodate = 1;
5247 unsigned long num_pages; 5225 int num_pages;
5248 unsigned long num_reads = 0; 5226 unsigned long num_reads = 0;
5249 struct bio *bio = NULL; 5227 struct bio *bio = NULL;
5250 unsigned long bio_flags = 0; 5228 unsigned long bio_flags = 0;
@@ -5252,7 +5230,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
5252 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 5230 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5253 return 0; 5231 return 0;
5254 5232
5255 num_pages = num_extent_pages(eb->start, eb->len); 5233 num_pages = num_extent_pages(eb);
5256 for (i = 0; i < num_pages; i++) { 5234 for (i = 0; i < num_pages; i++) {
5257 page = eb->pages[i]; 5235 page = eb->pages[i];
5258 if (wait == WAIT_NONE) { 5236 if (wait == WAIT_NONE) {
@@ -5576,11 +5554,11 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
5576 struct extent_buffer *src) 5554 struct extent_buffer *src)
5577{ 5555{
5578 int i; 5556 int i;
5579 unsigned num_pages; 5557 int num_pages;
5580 5558
5581 ASSERT(dst->len == src->len); 5559 ASSERT(dst->len == src->len);
5582 5560
5583 num_pages = num_extent_pages(dst->start, dst->len); 5561 num_pages = num_extent_pages(dst);
5584 for (i = 0; i < num_pages; i++) 5562 for (i = 0; i < num_pages; i++)
5585 copy_page(page_address(dst->pages[i]), 5563 copy_page(page_address(dst->pages[i]),
5586 page_address(src->pages[i])); 5564 page_address(src->pages[i]));
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0bfd4aeb822d..b4d03e677e1d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -46,7 +46,7 @@
46#define EXTENT_BUFFER_STALE 6 46#define EXTENT_BUFFER_STALE 6
47#define EXTENT_BUFFER_WRITEBACK 7 47#define EXTENT_BUFFER_WRITEBACK 7
48#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ 48#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */
49#define EXTENT_BUFFER_DUMMY 9 49#define EXTENT_BUFFER_UNMAPPED 9
50#define EXTENT_BUFFER_IN_TREE 10 50#define EXTENT_BUFFER_IN_TREE 10
51#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ 51#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
52 52
@@ -92,9 +92,6 @@ typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *
92typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, 92typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
93 struct bio *bio, u64 bio_offset); 93 struct bio *bio, u64 bio_offset);
94 94
95typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
96 struct bio *bio, int mirror_num);
97
98struct extent_io_ops { 95struct extent_io_ops {
99 /* 96 /*
100 * The following callbacks must be allways defined, the function 97 * The following callbacks must be allways defined, the function
@@ -104,12 +101,7 @@ struct extent_io_ops {
104 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, 101 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
105 struct page *page, u64 start, u64 end, 102 struct page *page, u64 start, u64 end,
106 int mirror); 103 int mirror);
107 int (*merge_bio_hook)(struct page *page, unsigned long offset,
108 size_t size, struct bio *bio,
109 unsigned long bio_flags);
110 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 104 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
111 struct btrfs_fs_info *(*tree_fs_info)(void *private_data);
112 void (*set_range_writeback)(void *private_data, u64 start, u64 end);
113 105
114 /* 106 /*
115 * Optional hooks, called if the pointer is not NULL 107 * Optional hooks, called if the pointer is not NULL
@@ -440,10 +432,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
440 int mirror_num); 432 int mirror_num);
441void wait_on_extent_buffer_writeback(struct extent_buffer *eb); 433void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
442 434
443static inline unsigned long num_extent_pages(u64 start, u64 len) 435static inline int num_extent_pages(const struct extent_buffer *eb)
444{ 436{
445 return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 437 return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
446 (start >> PAGE_SHIFT); 438 (eb->start >> PAGE_SHIFT);
447} 439}
448 440
449static inline void extent_buffer_get(struct extent_buffer *eb) 441static inline void extent_buffer_get(struct extent_buffer *eb)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f9dd6d1836a3..ba74827beb32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -922,7 +922,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
922 const bool new_inline, 922 const bool new_inline,
923 struct extent_map *em) 923 struct extent_map *em)
924{ 924{
925 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 925 struct btrfs_fs_info *fs_info = inode->root->fs_info;
926 struct btrfs_root *root = inode->root; 926 struct btrfs_root *root = inode->root;
927 struct extent_buffer *leaf = path->nodes[0]; 927 struct extent_buffer *leaf = path->nodes[0];
928 const int slot = path->slots[0]; 928 const int slot = path->slots[0];
@@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
942 btrfs_file_extent_num_bytes(leaf, fi); 942 btrfs_file_extent_num_bytes(leaf, fi);
943 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 943 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
944 size_t size; 944 size_t size;
945 size = btrfs_file_extent_inline_len(leaf, slot, fi); 945 size = btrfs_file_extent_ram_bytes(leaf, fi);
946 extent_end = ALIGN(extent_start + size, 946 extent_end = ALIGN(extent_start + size,
947 fs_info->sectorsize); 947 fs_info->sectorsize);
948 } 948 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 51e77d72068a..2be00e873e92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -5,14 +5,11 @@
5 5
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/highmem.h>
9#include <linux/time.h> 8#include <linux/time.h>
10#include <linux/init.h> 9#include <linux/init.h>
11#include <linux/string.h> 10#include <linux/string.h>
12#include <linux/backing-dev.h> 11#include <linux/backing-dev.h>
13#include <linux/mpage.h>
14#include <linux/falloc.h> 12#include <linux/falloc.h>
15#include <linux/swap.h>
16#include <linux/writeback.h> 13#include <linux/writeback.h>
17#include <linux/compat.h> 14#include <linux/compat.h>
18#include <linux/slab.h> 15#include <linux/slab.h>
@@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
83static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, 80static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
84 struct inode_defrag *defrag) 81 struct inode_defrag *defrag)
85{ 82{
86 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 83 struct btrfs_fs_info *fs_info = inode->root->fs_info;
87 struct inode_defrag *entry; 84 struct inode_defrag *entry;
88 struct rb_node **p; 85 struct rb_node **p;
89 struct rb_node *parent = NULL; 86 struct rb_node *parent = NULL;
@@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
135int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 132int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
136 struct btrfs_inode *inode) 133 struct btrfs_inode *inode)
137{ 134{
138 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
139 struct btrfs_root *root = inode->root; 135 struct btrfs_root *root = inode->root;
136 struct btrfs_fs_info *fs_info = root->fs_info;
140 struct inode_defrag *defrag; 137 struct inode_defrag *defrag;
141 u64 transid; 138 u64 transid;
142 int ret; 139 int ret;
@@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
185static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, 182static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
186 struct inode_defrag *defrag) 183 struct inode_defrag *defrag)
187{ 184{
188 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 185 struct btrfs_fs_info *fs_info = inode->root->fs_info;
189 int ret; 186 int ret;
190 187
191 if (!__need_auto_defrag(fs_info)) 188 if (!__need_auto_defrag(fs_info))
@@ -833,8 +830,7 @@ next_slot:
833 btrfs_file_extent_num_bytes(leaf, fi); 830 btrfs_file_extent_num_bytes(leaf, fi);
834 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 831 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
835 extent_end = key.offset + 832 extent_end = key.offset +
836 btrfs_file_extent_inline_len(leaf, 833 btrfs_file_extent_ram_bytes(leaf, fi);
837 path->slots[0], fi);
838 } else { 834 } else {
839 /* can't happen */ 835 /* can't happen */
840 BUG(); 836 BUG();
@@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
1133int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 1129int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1134 struct btrfs_inode *inode, u64 start, u64 end) 1130 struct btrfs_inode *inode, u64 start, u64 end)
1135{ 1131{
1136 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1132 struct btrfs_fs_info *fs_info = trans->fs_info;
1137 struct btrfs_root *root = inode->root; 1133 struct btrfs_root *root = inode->root;
1138 struct extent_buffer *leaf; 1134 struct extent_buffer *leaf;
1139 struct btrfs_path *path; 1135 struct btrfs_path *path;
@@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1470 u64 *lockstart, u64 *lockend, 1466 u64 *lockstart, u64 *lockend,
1471 struct extent_state **cached_state) 1467 struct extent_state **cached_state)
1472{ 1468{
1473 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1469 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1474 u64 start_pos; 1470 u64 start_pos;
1475 u64 last_pos; 1471 u64 last_pos;
1476 int i; 1472 int i;
@@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1526static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, 1522static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1527 size_t *write_bytes) 1523 size_t *write_bytes)
1528{ 1524{
1529 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1525 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1530 struct btrfs_root *root = inode->root; 1526 struct btrfs_root *root = inode->root;
1531 struct btrfs_ordered_extent *ordered; 1527 struct btrfs_ordered_extent *ordered;
1532 u64 lockstart, lockend; 1528 u64 lockstart, lockend;
@@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1569 return ret; 1565 return ret;
1570} 1566}
1571 1567
1572static noinline ssize_t __btrfs_buffered_write(struct file *file, 1568static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
1573 struct iov_iter *i, 1569 struct iov_iter *i)
1574 loff_t pos)
1575{ 1570{
1571 struct file *file = iocb->ki_filp;
1572 loff_t pos = iocb->ki_pos;
1576 struct inode *inode = file_inode(file); 1573 struct inode *inode = file_inode(file);
1577 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1574 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1578 struct btrfs_root *root = BTRFS_I(inode)->root; 1575 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1804{ 1801{
1805 struct file *file = iocb->ki_filp; 1802 struct file *file = iocb->ki_filp;
1806 struct inode *inode = file_inode(file); 1803 struct inode *inode = file_inode(file);
1807 loff_t pos = iocb->ki_pos; 1804 loff_t pos;
1808 ssize_t written; 1805 ssize_t written;
1809 ssize_t written_buffered; 1806 ssize_t written_buffered;
1810 loff_t endbyte; 1807 loff_t endbyte;
@@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1815 if (written < 0 || !iov_iter_count(from)) 1812 if (written < 0 || !iov_iter_count(from))
1816 return written; 1813 return written;
1817 1814
1818 pos += written; 1815 pos = iocb->ki_pos;
1819 written_buffered = __btrfs_buffered_write(file, from, pos); 1816 written_buffered = btrfs_buffered_write(iocb, from);
1820 if (written_buffered < 0) { 1817 if (written_buffered < 0) {
1821 err = written_buffered; 1818 err = written_buffered;
1822 goto out; 1819 goto out;
@@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1953 if (iocb->ki_flags & IOCB_DIRECT) { 1950 if (iocb->ki_flags & IOCB_DIRECT) {
1954 num_written = __btrfs_direct_write(iocb, from); 1951 num_written = __btrfs_direct_write(iocb, from);
1955 } else { 1952 } else {
1956 num_written = __btrfs_buffered_write(file, from, pos); 1953 num_written = btrfs_buffered_write(iocb, from);
1957 if (num_written > 0) 1954 if (num_written > 0)
1958 iocb->ki_pos = pos + num_written; 1955 iocb->ki_pos = pos + num_written;
1959 if (clean_page) 1956 if (clean_page)
@@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2042 struct btrfs_trans_handle *trans; 2039 struct btrfs_trans_handle *trans;
2043 struct btrfs_log_ctx ctx; 2040 struct btrfs_log_ctx ctx;
2044 int ret = 0, err; 2041 int ret = 0, err;
2045 bool full_sync = false;
2046 u64 len; 2042 u64 len;
2047 2043
2048 /* 2044 /*
@@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2066 2062
2067 inode_lock(inode); 2063 inode_lock(inode);
2068 atomic_inc(&root->log_batch); 2064 atomic_inc(&root->log_batch);
2069 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 2065
2070 &BTRFS_I(inode)->runtime_flags);
2071 /* 2066 /*
2072 * We might have have had more pages made dirty after calling 2067 * We have to do this here to avoid the priority inversion of waiting on
2073 * start_ordered_ops and before acquiring the inode's i_mutex. 2068 * IO of a lower priority task while holding a transaciton open.
2074 */ 2069 */
2075 if (full_sync) { 2070 ret = btrfs_wait_ordered_range(inode, start, len);
2076 /*
2077 * For a full sync, we need to make sure any ordered operations
2078 * start and finish before we start logging the inode, so that
2079 * all extents are persisted and the respective file extent
2080 * items are in the fs/subvol btree.
2081 */
2082 ret = btrfs_wait_ordered_range(inode, start, len);
2083 } else {
2084 /*
2085 * Start any new ordered operations before starting to log the
2086 * inode. We will wait for them to finish in btrfs_sync_log().
2087 *
2088 * Right before acquiring the inode's mutex, we might have new
2089 * writes dirtying pages, which won't immediately start the
2090 * respective ordered operations - that is done through the
2091 * fill_delalloc callbacks invoked from the writepage and
2092 * writepages address space operations. So make sure we start
2093 * all ordered operations before starting to log our inode. Not
2094 * doing this means that while logging the inode, writeback
2095 * could start and invoke writepage/writepages, which would call
2096 * the fill_delalloc callbacks (cow_file_range,
2097 * submit_compressed_extents). These callbacks add first an
2098 * extent map to the modified list of extents and then create
2099 * the respective ordered operation, which means in
2100 * tree-log.c:btrfs_log_inode() we might capture all existing
2101 * ordered operations (with btrfs_get_logged_extents()) before
2102 * the fill_delalloc callback adds its ordered operation, and by
2103 * the time we visit the modified list of extent maps (with
2104 * btrfs_log_changed_extents()), we see and process the extent
2105 * map they created. We then use the extent map to construct a
2106 * file extent item for logging without waiting for the
2107 * respective ordered operation to finish - this file extent
2108 * item points to a disk location that might not have yet been
2109 * written to, containing random data - so after a crash a log
2110 * replay will make our inode have file extent items that point
2111 * to disk locations containing invalid data, as we returned
2112 * success to userspace without waiting for the respective
2113 * ordered operation to finish, because it wasn't captured by
2114 * btrfs_get_logged_extents().
2115 */
2116 ret = start_ordered_ops(inode, start, end);
2117 }
2118 if (ret) { 2071 if (ret) {
2119 inode_unlock(inode); 2072 inode_unlock(inode);
2120 goto out; 2073 goto out;
2121 } 2074 }
2122 atomic_inc(&root->log_batch); 2075 atomic_inc(&root->log_batch);
2123 2076
2124 /*
2125 * If the last transaction that changed this file was before the current
2126 * transaction and we have the full sync flag set in our inode, we can
2127 * bail out now without any syncing.
2128 *
2129 * Note that we can't bail out if the full sync flag isn't set. This is
2130 * because when the full sync flag is set we start all ordered extents
2131 * and wait for them to fully complete - when they complete they update
2132 * the inode's last_trans field through:
2133 *
2134 * btrfs_finish_ordered_io() ->
2135 * btrfs_update_inode_fallback() ->
2136 * btrfs_update_inode() ->
2137 * btrfs_set_inode_last_trans()
2138 *
2139 * So we are sure that last_trans is up to date and can do this check to
2140 * bail out safely. For the fast path, when the full sync flag is not
2141 * set in our inode, we can not do it because we start only our ordered
2142 * extents and don't wait for them to complete (that is when
2143 * btrfs_finish_ordered_io runs), so here at this point their last_trans
2144 * value might be less than or equals to fs_info->last_trans_committed,
2145 * and setting a speculative last_trans for an inode when a buffered
2146 * write is made (such as fs_info->generation + 1 for example) would not
2147 * be reliable since after setting the value and before fsync is called
2148 * any number of transactions can start and commit (transaction kthread
2149 * commits the current transaction periodically), and a transaction
2150 * commit does not start nor waits for ordered extents to complete.
2151 */
2152 smp_mb(); 2077 smp_mb();
2153 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || 2078 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
2154 (full_sync && BTRFS_I(inode)->last_trans <= 2079 BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {
2155 fs_info->last_trans_committed) ||
2156 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2157 BTRFS_I(inode)->last_trans
2158 <= fs_info->last_trans_committed)) {
2159 /* 2080 /*
2160 * We've had everything committed since the last time we were 2081 * We've had everything committed since the last time we were
2161 * modified so clear this flag in case it was set for whatever 2082 * modified so clear this flag in case it was set for whatever
@@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2239 goto out; 2160 goto out;
2240 } 2161 }
2241 } 2162 }
2242 if (!full_sync) {
2243 ret = btrfs_wait_ordered_range(inode, start, len);
2244 if (ret) {
2245 btrfs_end_transaction(trans);
2246 goto out;
2247 }
2248 }
2249 ret = btrfs_commit_transaction(trans); 2163 ret = btrfs_commit_transaction(trans);
2250 } else { 2164 } else {
2251 ret = btrfs_end_transaction(trans); 2165 ret = btrfs_end_transaction(trans);
@@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
2310 struct btrfs_inode *inode, 2224 struct btrfs_inode *inode,
2311 struct btrfs_path *path, u64 offset, u64 end) 2225 struct btrfs_path *path, u64 offset, u64 end)
2312{ 2226{
2313 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 2227 struct btrfs_fs_info *fs_info = trans->fs_info;
2314 struct btrfs_root *root = inode->root; 2228 struct btrfs_root *root = inode->root;
2315 struct extent_buffer *leaf; 2229 struct extent_buffer *leaf;
2316 struct btrfs_file_extent_item *fi; 2230 struct btrfs_file_extent_item *fi;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d5f80cb300be..0adf38b00fa0 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -71,10 +71,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
71 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 71 inode = btrfs_iget(fs_info->sb, &location, root, NULL);
72 if (IS_ERR(inode)) 72 if (IS_ERR(inode))
73 return inode; 73 return inode;
74 if (is_bad_inode(inode)) {
75 iput(inode);
76 return ERR_PTR(-ENOENT);
77 }
78 74
79 mapping_set_gfp_mask(inode->i_mapping, 75 mapping_set_gfp_mask(inode->i_mapping,
80 mapping_gfp_constraint(inode->i_mapping, 76 mapping_gfp_constraint(inode->i_mapping,
@@ -300,9 +296,9 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
300 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 296 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
301 check_crcs = 1; 297 check_crcs = 1;
302 298
303 /* Make sure we can fit our crcs into the first page */ 299 /* Make sure we can fit our crcs and generation into the first page */
304 if (write && check_crcs && 300 if (write && check_crcs &&
305 (num_pages * sizeof(u32)) >= PAGE_SIZE) 301 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
306 return -ENOSPC; 302 return -ENOSPC;
307 303
308 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 304 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -547,7 +543,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
547 io_ctl_map_page(io_ctl, 0); 543 io_ctl_map_page(io_ctl, 0);
548 } 544 }
549 545
550 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 546 copy_page(io_ctl->cur, bitmap);
551 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 547 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
552 if (io_ctl->index < io_ctl->num_pages) 548 if (io_ctl->index < io_ctl->num_pages)
553 io_ctl_map_page(io_ctl, 0); 549 io_ctl_map_page(io_ctl, 0);
@@ -607,7 +603,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
607 if (ret) 603 if (ret)
608 return ret; 604 return ret;
609 605
610 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 606 copy_page(entry->bitmap, io_ctl->cur);
611 io_ctl_unmap_page(io_ctl); 607 io_ctl_unmap_page(io_ctl);
612 608
613 return 0; 609 return 0;
@@ -655,7 +651,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
655 struct btrfs_free_space_ctl *ctl, 651 struct btrfs_free_space_ctl *ctl,
656 struct btrfs_path *path, u64 offset) 652 struct btrfs_path *path, u64 offset)
657{ 653{
658 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 654 struct btrfs_fs_info *fs_info = root->fs_info;
659 struct btrfs_free_space_header *header; 655 struct btrfs_free_space_header *header;
660 struct extent_buffer *leaf; 656 struct extent_buffer *leaf;
661 struct btrfs_io_ctl io_ctl; 657 struct btrfs_io_ctl io_ctl;
@@ -1123,13 +1119,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
1123{ 1119{
1124 int ret; 1120 int ret;
1125 struct inode *inode = io_ctl->inode; 1121 struct inode *inode = io_ctl->inode;
1126 struct btrfs_fs_info *fs_info;
1127 1122
1128 if (!inode) 1123 if (!inode)
1129 return 0; 1124 return 0;
1130 1125
1131 fs_info = btrfs_sb(inode->i_sb);
1132
1133 /* Flush the dirty pages in the cache file. */ 1126 /* Flush the dirty pages in the cache file. */
1134 ret = flush_dirty_cache(inode); 1127 ret = flush_dirty_cache(inode);
1135 if (ret) 1128 if (ret)
@@ -1145,7 +1138,7 @@ out:
1145 BTRFS_I(inode)->generation = 0; 1138 BTRFS_I(inode)->generation = 0;
1146 if (block_group) { 1139 if (block_group) {
1147#ifdef DEBUG 1140#ifdef DEBUG
1148 btrfs_err(fs_info, 1141 btrfs_err(root->fs_info,
1149 "failed to write free space cache for block group %llu", 1142 "failed to write free space cache for block group %llu",
1150 block_group->key.objectid); 1143 block_group->key.objectid);
1151#endif 1144#endif
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index b5950aacd697..d6736595ec57 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1236,7 +1236,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1236 if (ret) 1236 if (ret)
1237 goto abort; 1237 goto abort;
1238 1238
1239 ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key); 1239 ret = btrfs_del_root(trans, &free_space_root->root_key);
1240 if (ret) 1240 if (ret)
1241 goto abort; 1241 goto abort;
1242 1242
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 12fcd8897c33..ffca2abf13d0 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 2007 Oracle. All rights reserved. 3 * Copyright (C) 2007 Oracle. All rights reserved.
4 */ 4 */
5 5
6#include <linux/delay.h>
7#include <linux/kthread.h> 6#include <linux/kthread.h>
8#include <linux/pagemap.h> 7#include <linux/pagemap.h>
9 8
@@ -244,8 +243,6 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
244 return; 243 return;
245 244
246 while (1) { 245 while (1) {
247 bool add_to_ctl = true;
248
249 spin_lock(rbroot_lock); 246 spin_lock(rbroot_lock);
250 n = rb_first(rbroot); 247 n = rb_first(rbroot);
251 if (!n) { 248 if (!n) {
@@ -257,15 +254,14 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
257 BUG_ON(info->bitmap); /* Logic error */ 254 BUG_ON(info->bitmap); /* Logic error */
258 255
259 if (info->offset > root->ino_cache_progress) 256 if (info->offset > root->ino_cache_progress)
260 add_to_ctl = false; 257 count = 0;
261 else if (info->offset + info->bytes > root->ino_cache_progress)
262 count = root->ino_cache_progress - info->offset + 1;
263 else 258 else
264 count = info->bytes; 259 count = min(root->ino_cache_progress - info->offset + 1,
260 info->bytes);
265 261
266 rb_erase(&info->offset_index, rbroot); 262 rb_erase(&info->offset_index, rbroot);
267 spin_unlock(rbroot_lock); 263 spin_unlock(rbroot_lock);
268 if (add_to_ctl) 264 if (count)
269 __btrfs_add_free_space(root->fs_info, ctl, 265 __btrfs_add_free_space(root->fs_info, ctl,
270 info->offset, count); 266 info->offset, count);
271 kmem_cache_free(btrfs_free_space_cachep, info); 267 kmem_cache_free(btrfs_free_space_cachep, info);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index def3ada0f0b8..9357a19d2bff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -14,17 +14,13 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/mpage.h>
18#include <linux/swap.h>
19#include <linux/writeback.h> 17#include <linux/writeback.h>
20#include <linux/compat.h> 18#include <linux/compat.h>
21#include <linux/bit_spinlock.h>
22#include <linux/xattr.h> 19#include <linux/xattr.h>
23#include <linux/posix_acl.h> 20#include <linux/posix_acl.h>
24#include <linux/falloc.h> 21#include <linux/falloc.h>
25#include <linux/slab.h> 22#include <linux/slab.h>
26#include <linux/ratelimit.h> 23#include <linux/ratelimit.h>
27#include <linux/mount.h>
28#include <linux/btrfs.h> 24#include <linux/btrfs.h>
29#include <linux/blkdev.h> 25#include <linux/blkdev.h>
30#include <linux/posix_acl_xattr.h> 26#include <linux/posix_acl_xattr.h>
@@ -1443,8 +1439,7 @@ next_slot:
1443 nocow = 1; 1439 nocow = 1;
1444 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1440 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1445 extent_end = found_key.offset + 1441 extent_end = found_key.offset +
1446 btrfs_file_extent_inline_len(leaf, 1442 btrfs_file_extent_ram_bytes(leaf, fi);
1447 path->slots[0], fi);
1448 extent_end = ALIGN(extent_end, 1443 extent_end = ALIGN(extent_end,
1449 fs_info->sectorsize); 1444 fs_info->sectorsize);
1450 } else { 1445 } else {
@@ -1752,7 +1747,7 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1752void __btrfs_del_delalloc_inode(struct btrfs_root *root, 1747void __btrfs_del_delalloc_inode(struct btrfs_root *root,
1753 struct btrfs_inode *inode) 1748 struct btrfs_inode *inode)
1754{ 1749{
1755 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1750 struct btrfs_fs_info *fs_info = root->fs_info;
1756 1751
1757 if (!list_empty(&inode->delalloc_inodes)) { 1752 if (!list_empty(&inode->delalloc_inodes)) {
1758 list_del_init(&inode->delalloc_inodes); 1753 list_del_init(&inode->delalloc_inodes);
@@ -1903,8 +1898,8 @@ static void btrfs_clear_bit_hook(void *private_data,
1903} 1898}
1904 1899
1905/* 1900/*
1906 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure 1901 * Merge bio hook, this must check the chunk tree to make sure we don't create
1907 * we don't create bios that span stripes or chunks 1902 * bios that span stripes or chunks
1908 * 1903 *
1909 * return 1 if page cannot be merged to bio 1904 * return 1 if page cannot be merged to bio
1910 * return 0 if page can be merged to bio 1905 * return 0 if page can be merged to bio
@@ -1962,7 +1957,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
1962 * At IO completion time the cums attached on the ordered extent record 1957 * At IO completion time the cums attached on the ordered extent record
1963 * are inserted into the btree 1958 * are inserted into the btree
1964 */ 1959 */
1965static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio, 1960blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
1966 int mirror_num) 1961 int mirror_num)
1967{ 1962{
1968 struct inode *inode = private_data; 1963 struct inode *inode = private_data;
@@ -2035,8 +2030,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
2035 /* we're doing a write, do the async checksumming */ 2030 /* we're doing a write, do the async checksumming */
2036 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags, 2031 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2037 bio_offset, inode, 2032 bio_offset, inode,
2038 btrfs_submit_bio_start, 2033 btrfs_submit_bio_start);
2039 btrfs_submit_bio_done);
2040 goto out; 2034 goto out;
2041 } else if (!skip_sum) { 2035 } else if (!skip_sum) {
2042 ret = btrfs_csum_one_bio(inode, bio, 0, 0); 2036 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -3610,18 +3604,15 @@ static int btrfs_read_locked_inode(struct inode *inode)
3610 filled = true; 3604 filled = true;
3611 3605
3612 path = btrfs_alloc_path(); 3606 path = btrfs_alloc_path();
3613 if (!path) { 3607 if (!path)
3614 ret = -ENOMEM; 3608 return -ENOMEM;
3615 goto make_bad;
3616 }
3617 3609
3618 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3610 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3619 3611
3620 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3612 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3621 if (ret) { 3613 if (ret) {
3622 if (ret > 0) 3614 btrfs_free_path(path);
3623 ret = -ENOENT; 3615 return ret;
3624 goto make_bad;
3625 } 3616 }
3626 3617
3627 leaf = path->nodes[0]; 3618 leaf = path->nodes[0];
@@ -3774,11 +3765,6 @@ cache_acl:
3774 3765
3775 btrfs_sync_inode_flags_to_i_flags(inode); 3766 btrfs_sync_inode_flags_to_i_flags(inode);
3776 return 0; 3767 return 0;
3777
3778make_bad:
3779 btrfs_free_path(path);
3780 make_bad_inode(inode);
3781 return ret;
3782} 3768}
3783 3769
3784/* 3770/*
@@ -3984,7 +3970,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3984 goto err; 3970 goto err;
3985 } 3971 }
3986skip_backref: 3972skip_backref:
3987 ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index); 3973 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
3988 if (ret) { 3974 if (ret) {
3989 btrfs_abort_transaction(trans, ret); 3975 btrfs_abort_transaction(trans, ret);
3990 goto err; 3976 goto err;
@@ -4087,11 +4073,10 @@ out:
4087} 4073}
4088 4074
4089static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, 4075static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4090 struct btrfs_root *root, 4076 struct inode *dir, u64 objectid,
4091 struct inode *dir, u64 objectid, 4077 const char *name, int name_len)
4092 const char *name, int name_len)
4093{ 4078{
4094 struct btrfs_fs_info *fs_info = root->fs_info; 4079 struct btrfs_root *root = BTRFS_I(dir)->root;
4095 struct btrfs_path *path; 4080 struct btrfs_path *path;
4096 struct extent_buffer *leaf; 4081 struct extent_buffer *leaf;
4097 struct btrfs_dir_item *di; 4082 struct btrfs_dir_item *di;
@@ -4124,9 +4109,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4124 } 4109 }
4125 btrfs_release_path(path); 4110 btrfs_release_path(path);
4126 4111
4127 ret = btrfs_del_root_ref(trans, fs_info, objectid, 4112 ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
4128 root->root_key.objectid, dir_ino, 4113 dir_ino, &index, name, name_len);
4129 &index, name, name_len);
4130 if (ret < 0) { 4114 if (ret < 0) {
4131 if (ret != -ENOENT) { 4115 if (ret != -ENOENT) {
4132 btrfs_abort_transaction(trans, ret); 4116 btrfs_abort_transaction(trans, ret);
@@ -4145,12 +4129,11 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4145 4129
4146 leaf = path->nodes[0]; 4130 leaf = path->nodes[0];
4147 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4131 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4148 btrfs_release_path(path);
4149 index = key.offset; 4132 index = key.offset;
4150 } 4133 }
4151 btrfs_release_path(path); 4134 btrfs_release_path(path);
4152 4135
4153 ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index); 4136 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4154 if (ret) { 4137 if (ret) {
4155 btrfs_abort_transaction(trans, ret); 4138 btrfs_abort_transaction(trans, ret);
4156 goto out; 4139 goto out;
@@ -4243,9 +4226,9 @@ again:
4243 prev = node; 4226 prev = node;
4244 entry = rb_entry(node, struct btrfs_inode, rb_node); 4227 entry = rb_entry(node, struct btrfs_inode, rb_node);
4245 4228
4246 if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode))) 4229 if (objectid < btrfs_ino(entry))
4247 node = node->rb_left; 4230 node = node->rb_left;
4248 else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode))) 4231 else if (objectid > btrfs_ino(entry))
4249 node = node->rb_right; 4232 node = node->rb_right;
4250 else 4233 else
4251 break; 4234 break;
@@ -4253,7 +4236,7 @@ again:
4253 if (!node) { 4236 if (!node) {
4254 while (prev) { 4237 while (prev) {
4255 entry = rb_entry(prev, struct btrfs_inode, rb_node); 4238 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4256 if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) { 4239 if (objectid <= btrfs_ino(entry)) {
4257 node = prev; 4240 node = prev;
4258 break; 4241 break;
4259 } 4242 }
@@ -4262,7 +4245,7 @@ again:
4262 } 4245 }
4263 while (node) { 4246 while (node) {
4264 entry = rb_entry(node, struct btrfs_inode, rb_node); 4247 entry = rb_entry(node, struct btrfs_inode, rb_node);
4265 objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1; 4248 objectid = btrfs_ino(entry) + 1;
4266 inode = igrab(&entry->vfs_inode); 4249 inode = igrab(&entry->vfs_inode);
4267 if (inode) { 4250 if (inode) {
4268 spin_unlock(&root->inode_lock); 4251 spin_unlock(&root->inode_lock);
@@ -4343,10 +4326,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4343 4326
4344 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); 4327 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4345 4328
4346 ret = btrfs_unlink_subvol(trans, root, dir, 4329 ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
4347 dest->root_key.objectid, 4330 dentry->d_name.name, dentry->d_name.len);
4348 dentry->d_name.name,
4349 dentry->d_name.len);
4350 if (ret) { 4331 if (ret) {
4351 err = ret; 4332 err = ret;
4352 btrfs_abort_transaction(trans, ret); 4333 btrfs_abort_transaction(trans, ret);
@@ -4441,7 +4422,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4441 return PTR_ERR(trans); 4422 return PTR_ERR(trans);
4442 4423
4443 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 4424 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4444 err = btrfs_unlink_subvol(trans, root, dir, 4425 err = btrfs_unlink_subvol(trans, dir,
4445 BTRFS_I(inode)->location.objectid, 4426 BTRFS_I(inode)->location.objectid,
4446 dentry->d_name.name, 4427 dentry->d_name.name,
4447 dentry->d_name.len); 4428 dentry->d_name.len);
@@ -4643,8 +4624,8 @@ search_again:
4643 BTRFS_I(inode), leaf, fi, 4624 BTRFS_I(inode), leaf, fi,
4644 found_key.offset); 4625 found_key.offset);
4645 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 4626 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4646 item_end += btrfs_file_extent_inline_len(leaf, 4627 item_end += btrfs_file_extent_ram_bytes(leaf,
4647 path->slots[0], fi); 4628 fi);
4648 4629
4649 trace_btrfs_truncate_show_fi_inline( 4630 trace_btrfs_truncate_show_fi_inline(
4650 BTRFS_I(inode), leaf, fi, path->slots[0], 4631 BTRFS_I(inode), leaf, fi, path->slots[0],
@@ -5615,9 +5596,9 @@ static void inode_tree_add(struct inode *inode)
5615 parent = *p; 5596 parent = *p;
5616 entry = rb_entry(parent, struct btrfs_inode, rb_node); 5597 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5617 5598
5618 if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode))) 5599 if (ino < btrfs_ino(entry))
5619 p = &parent->rb_left; 5600 p = &parent->rb_left;
5620 else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode))) 5601 else if (ino > btrfs_ino(entry))
5621 p = &parent->rb_right; 5602 p = &parent->rb_right;
5622 else { 5603 else {
5623 WARN_ON(!(entry->vfs_inode.i_state & 5604 WARN_ON(!(entry->vfs_inode.i_state &
@@ -5708,16 +5689,21 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5708 int ret; 5689 int ret;
5709 5690
5710 ret = btrfs_read_locked_inode(inode); 5691 ret = btrfs_read_locked_inode(inode);
5711 if (!is_bad_inode(inode)) { 5692 if (!ret) {
5712 inode_tree_add(inode); 5693 inode_tree_add(inode);
5713 unlock_new_inode(inode); 5694 unlock_new_inode(inode);
5714 if (new) 5695 if (new)
5715 *new = 1; 5696 *new = 1;
5716 } else { 5697 } else {
5717 unlock_new_inode(inode); 5698 iget_failed(inode);
5718 iput(inode); 5699 /*
5719 ASSERT(ret < 0); 5700 * ret > 0 can come from btrfs_search_slot called by
5720 inode = ERR_PTR(ret < 0 ? ret : -ESTALE); 5701 * btrfs_read_locked_inode, this means the inode item
5702 * was not found.
5703 */
5704 if (ret > 0)
5705 ret = -ENOENT;
5706 inode = ERR_PTR(ret);
5721 } 5707 }
5722 } 5708 }
5723 5709
@@ -5745,7 +5731,7 @@ static struct inode *new_simple_dir(struct super_block *s,
5745 inode->i_mtime = current_time(inode); 5731 inode->i_mtime = current_time(inode);
5746 inode->i_atime = inode->i_mtime; 5732 inode->i_atime = inode->i_mtime;
5747 inode->i_ctime = inode->i_mtime; 5733 inode->i_ctime = inode->i_mtime;
5748 BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); 5734 BTRFS_I(inode)->i_otime = inode->i_mtime;
5749 5735
5750 return inode; 5736 return inode;
5751} 5737}
@@ -6027,32 +6013,6 @@ err:
6027 return ret; 6013 return ret;
6028} 6014}
6029 6015
6030int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
6031{
6032 struct btrfs_root *root = BTRFS_I(inode)->root;
6033 struct btrfs_trans_handle *trans;
6034 int ret = 0;
6035 bool nolock = false;
6036
6037 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6038 return 0;
6039
6040 if (btrfs_fs_closing(root->fs_info) &&
6041 btrfs_is_free_space_inode(BTRFS_I(inode)))
6042 nolock = true;
6043
6044 if (wbc->sync_mode == WB_SYNC_ALL) {
6045 if (nolock)
6046 trans = btrfs_join_transaction_nolock(root);
6047 else
6048 trans = btrfs_join_transaction(root);
6049 if (IS_ERR(trans))
6050 return PTR_ERR(trans);
6051 ret = btrfs_commit_transaction(trans);
6052 }
6053 return ret;
6054}
6055
6056/* 6016/*
6057 * This is somewhat expensive, updating the tree every time the 6017 * This is somewhat expensive, updating the tree every time the
6058 * inode changes. But, it is most likely to find the inode in cache. 6018 * inode changes. But, it is most likely to find the inode in cache.
@@ -6351,7 +6311,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6351 inode->i_mtime = current_time(inode); 6311 inode->i_mtime = current_time(inode);
6352 inode->i_atime = inode->i_mtime; 6312 inode->i_atime = inode->i_mtime;
6353 inode->i_ctime = inode->i_mtime; 6313 inode->i_ctime = inode->i_mtime;
6354 BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); 6314 BTRFS_I(inode)->i_otime = inode->i_mtime;
6355 6315
6356 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 6316 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6357 struct btrfs_inode_item); 6317 struct btrfs_inode_item);
@@ -6420,7 +6380,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
6420 struct btrfs_inode *parent_inode, struct btrfs_inode *inode, 6380 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6421 const char *name, int name_len, int add_backref, u64 index) 6381 const char *name, int name_len, int add_backref, u64 index)
6422{ 6382{
6423 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6424 int ret = 0; 6383 int ret = 0;
6425 struct btrfs_key key; 6384 struct btrfs_key key;
6426 struct btrfs_root *root = parent_inode->root; 6385 struct btrfs_root *root = parent_inode->root;
@@ -6436,7 +6395,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
6436 } 6395 }
6437 6396
6438 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { 6397 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6439 ret = btrfs_add_root_ref(trans, fs_info, key.objectid, 6398 ret = btrfs_add_root_ref(trans, key.objectid,
6440 root->root_key.objectid, parent_ino, 6399 root->root_key.objectid, parent_ino,
6441 index, name, name_len); 6400 index, name, name_len);
6442 } else if (add_backref) { 6401 } else if (add_backref) {
@@ -6472,7 +6431,7 @@ fail_dir_item:
6472 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { 6431 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6473 u64 local_index; 6432 u64 local_index;
6474 int err; 6433 int err;
6475 err = btrfs_del_root_ref(trans, fs_info, key.objectid, 6434 err = btrfs_del_root_ref(trans, key.objectid,
6476 root->root_key.objectid, parent_ino, 6435 root->root_key.objectid, parent_ino,
6477 &local_index, name, name_len); 6436 &local_index, name, name_len);
6478 6437
@@ -6832,7 +6791,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6832 size_t pg_offset, u64 start, u64 len, 6791 size_t pg_offset, u64 start, u64 len,
6833 int create) 6792 int create)
6834{ 6793{
6835 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6794 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6836 int ret; 6795 int ret;
6837 int err = 0; 6796 int err = 0;
6838 u64 extent_start = 0; 6797 u64 extent_start = 0;
@@ -6928,7 +6887,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6928 extent_start); 6887 extent_start);
6929 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6888 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6930 size_t size; 6889 size_t size;
6931 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); 6890
6891 size = btrfs_file_extent_ram_bytes(leaf, item);
6932 extent_end = ALIGN(extent_start + size, 6892 extent_end = ALIGN(extent_start + size,
6933 fs_info->sectorsize); 6893 fs_info->sectorsize);
6934 6894
@@ -6979,7 +6939,7 @@ next:
6979 if (new_inline) 6939 if (new_inline)
6980 goto out; 6940 goto out;
6981 6941
6982 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); 6942 size = btrfs_file_extent_ram_bytes(leaf, item);
6983 extent_offset = page_offset(page) + pg_offset - extent_start; 6943 extent_offset = page_offset(page) + pg_offset - extent_start;
6984 copy_size = min_t(u64, PAGE_SIZE - pg_offset, 6944 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6985 size - extent_offset); 6945 size - extent_offset);
@@ -7850,7 +7810,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
7850 isector >>= inode->i_sb->s_blocksize_bits; 7810 isector >>= inode->i_sb->s_blocksize_bits;
7851 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, 7811 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7852 pgoff, isector, repair_endio, repair_arg); 7812 pgoff, isector, repair_endio, repair_arg);
7853 bio_set_op_attrs(bio, REQ_OP_READ, read_mode); 7813 bio->bi_opf = REQ_OP_READ | read_mode;
7854 7814
7855 btrfs_debug(BTRFS_I(inode)->root->fs_info, 7815 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7856 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", 7816 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
@@ -8284,8 +8244,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
8284 if (write && async_submit) { 8244 if (write && async_submit) {
8285 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0, 8245 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
8286 file_offset, inode, 8246 file_offset, inode,
8287 btrfs_submit_bio_start_direct_io, 8247 btrfs_submit_bio_start_direct_io);
8288 btrfs_submit_bio_done);
8289 goto err; 8248 goto err;
8290 } else if (write) { 8249 } else if (write) {
8291 /* 8250 /*
@@ -9525,8 +9484,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9525 /* src is a subvolume */ 9484 /* src is a subvolume */
9526 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { 9485 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9527 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 9486 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9528 ret = btrfs_unlink_subvol(trans, root, old_dir, 9487 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9529 root_objectid,
9530 old_dentry->d_name.name, 9488 old_dentry->d_name.name,
9531 old_dentry->d_name.len); 9489 old_dentry->d_name.len);
9532 } else { /* src is an inode */ 9490 } else { /* src is an inode */
@@ -9545,8 +9503,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9545 /* dest is a subvolume */ 9503 /* dest is a subvolume */
9546 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { 9504 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9547 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; 9505 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
9548 ret = btrfs_unlink_subvol(trans, dest, new_dir, 9506 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9549 root_objectid,
9550 new_dentry->d_name.name, 9507 new_dentry->d_name.name,
9551 new_dentry->d_name.len); 9508 new_dentry->d_name.len);
9552 } else { /* dest is an inode */ 9509 } else { /* dest is an inode */
@@ -9806,7 +9763,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9806 9763
9807 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 9764 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9808 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; 9765 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9809 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, 9766 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9810 old_dentry->d_name.name, 9767 old_dentry->d_name.name,
9811 old_dentry->d_name.len); 9768 old_dentry->d_name.len);
9812 } else { 9769 } else {
@@ -9828,8 +9785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9828 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) == 9785 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9829 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 9786 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9830 root_objectid = BTRFS_I(new_inode)->location.objectid; 9787 root_objectid = BTRFS_I(new_inode)->location.objectid;
9831 ret = btrfs_unlink_subvol(trans, dest, new_dir, 9788 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9832 root_objectid,
9833 new_dentry->d_name.name, 9789 new_dentry->d_name.name,
9834 new_dentry->d_name.len); 9790 new_dentry->d_name.len);
9835 BUG_ON(new_inode->i_nlink == 0); 9791 BUG_ON(new_inode->i_nlink == 0);
@@ -10451,12 +10407,6 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
10451 return -EAGAIN; 10407 return -EAGAIN;
10452} 10408}
10453 10409
10454static struct btrfs_fs_info *iotree_fs_info(void *private_data)
10455{
10456 struct inode *inode = private_data;
10457 return btrfs_sb(inode->i_sb);
10458}
10459
10460static void btrfs_check_extent_io_range(void *private_data, const char *caller, 10410static void btrfs_check_extent_io_range(void *private_data, const char *caller,
10461 u64 start, u64 end) 10411 u64 start, u64 end)
10462{ 10412{
@@ -10471,9 +10421,9 @@ static void btrfs_check_extent_io_range(void *private_data, const char *caller,
10471 } 10421 }
10472} 10422}
10473 10423
10474void btrfs_set_range_writeback(void *private_data, u64 start, u64 end) 10424void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
10475{ 10425{
10476 struct inode *inode = private_data; 10426 struct inode *inode = tree->private_data;
10477 unsigned long index = start >> PAGE_SHIFT; 10427 unsigned long index = start >> PAGE_SHIFT;
10478 unsigned long end_index = end >> PAGE_SHIFT; 10428 unsigned long end_index = end >> PAGE_SHIFT;
10479 struct page *page; 10429 struct page *page;
@@ -10529,10 +10479,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
10529 /* mandatory callbacks */ 10479 /* mandatory callbacks */
10530 .submit_bio_hook = btrfs_submit_bio_hook, 10480 .submit_bio_hook = btrfs_submit_bio_hook,
10531 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 10481 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10532 .merge_bio_hook = btrfs_merge_bio_hook,
10533 .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, 10482 .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
10534 .tree_fs_info = iotree_fs_info,
10535 .set_range_writeback = btrfs_set_range_writeback,
10536 10483
10537 /* optional callbacks */ 10484 /* optional callbacks */
10538 .fill_delalloc = run_delalloc_range, 10485 .fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b077544b5232..d3a5d2a41e5f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5,23 +5,18 @@
5 5
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/bio.h> 7#include <linux/bio.h>
8#include <linux/buffer_head.h>
9#include <linux/file.h> 8#include <linux/file.h>
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/fsnotify.h> 10#include <linux/fsnotify.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
13#include <linux/highmem.h> 12#include <linux/highmem.h>
14#include <linux/time.h> 13#include <linux/time.h>
15#include <linux/init.h>
16#include <linux/string.h> 14#include <linux/string.h>
17#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <linux/mpage.h>
20#include <linux/namei.h> 17#include <linux/namei.h>
21#include <linux/swap.h>
22#include <linux/writeback.h> 18#include <linux/writeback.h>
23#include <linux/compat.h> 19#include <linux/compat.h>
24#include <linux/bit_spinlock.h>
25#include <linux/security.h> 20#include <linux/security.h>
26#include <linux/xattr.h> 21#include <linux/xattr.h>
27#include <linux/mm.h> 22#include <linux/mm.h>
@@ -606,7 +601,7 @@ static noinline int create_subvol(struct inode *dir,
606 trans->block_rsv = &block_rsv; 601 trans->block_rsv = &block_rsv;
607 trans->bytes_reserved = block_rsv.size; 602 trans->bytes_reserved = block_rsv.size;
608 603
609 ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit); 604 ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
610 if (ret) 605 if (ret)
611 goto fail; 606 goto fail;
612 607
@@ -616,14 +611,6 @@ static noinline int create_subvol(struct inode *dir,
616 goto fail; 611 goto fail;
617 } 612 }
618 613
619 memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
620 btrfs_set_header_bytenr(leaf, leaf->start);
621 btrfs_set_header_generation(leaf, trans->transid);
622 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
623 btrfs_set_header_owner(leaf, objectid);
624
625 write_extent_buffer_fsid(leaf, fs_info->fsid);
626 write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
627 btrfs_mark_buffer_dirty(leaf); 614 btrfs_mark_buffer_dirty(leaf);
628 615
629 inode_item = &root_item->inode; 616 inode_item = &root_item->inode;
@@ -711,8 +698,7 @@ static noinline int create_subvol(struct inode *dir,
711 ret = btrfs_update_inode(trans, root, dir); 698 ret = btrfs_update_inode(trans, root, dir);
712 BUG_ON(ret); 699 BUG_ON(ret);
713 700
714 ret = btrfs_add_root_ref(trans, fs_info, 701 ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
715 objectid, root->root_key.objectid,
716 btrfs_ino(BTRFS_I(dir)), index, name, namelen); 702 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
717 BUG_ON(ret); 703 BUG_ON(ret);
718 704
@@ -2507,8 +2493,8 @@ out:
2507static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2493static noinline int btrfs_ioctl_ino_lookup(struct file *file,
2508 void __user *argp) 2494 void __user *argp)
2509{ 2495{
2510 struct btrfs_ioctl_ino_lookup_args *args; 2496 struct btrfs_ioctl_ino_lookup_args *args;
2511 struct inode *inode; 2497 struct inode *inode;
2512 int ret = 0; 2498 int ret = 0;
2513 2499
2514 args = memdup_user(argp, sizeof(*args)); 2500 args = memdup_user(argp, sizeof(*args));
@@ -2941,8 +2927,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2941 ret = btrfs_defrag_root(root); 2927 ret = btrfs_defrag_root(root);
2942 break; 2928 break;
2943 case S_IFREG: 2929 case S_IFREG:
2944 if (!(file->f_mode & FMODE_WRITE)) { 2930 /*
2945 ret = -EINVAL; 2931 * Note that this does not check the file descriptor for write
2932 * access. This prevents defragmenting executables that are
2933 * running and allows defrag on files open in read-only mode.
2934 */
2935 if (!capable(CAP_SYS_ADMIN) &&
2936 inode_permission(inode, MAY_WRITE)) {
2937 ret = -EPERM;
2946 goto out; 2938 goto out;
2947 } 2939 }
2948 2940
@@ -3165,10 +3157,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
3165 di_args->total_bytes = btrfs_device_get_total_bytes(dev); 3157 di_args->total_bytes = btrfs_device_get_total_bytes(dev);
3166 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 3158 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
3167 if (dev->name) { 3159 if (dev->name) {
3168 struct rcu_string *name; 3160 strncpy(di_args->path, rcu_str_deref(dev->name),
3169 3161 sizeof(di_args->path) - 1);
3170 name = rcu_dereference(dev->name);
3171 strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
3172 di_args->path[sizeof(di_args->path) - 1] = 0; 3162 di_args->path[sizeof(di_args->path) - 1] = 0;
3173 } else { 3163 } else {
3174 di_args->path[0] = '\0'; 3164 di_args->path[0] = '\0';
@@ -5118,9 +5108,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
5118 struct inode *inode = file_inode(file); 5108 struct inode *inode = file_inode(file);
5119 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5109 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5120 struct btrfs_ioctl_quota_ctl_args *sa; 5110 struct btrfs_ioctl_quota_ctl_args *sa;
5121 struct btrfs_trans_handle *trans = NULL;
5122 int ret; 5111 int ret;
5123 int err;
5124 5112
5125 if (!capable(CAP_SYS_ADMIN)) 5113 if (!capable(CAP_SYS_ADMIN))
5126 return -EPERM; 5114 return -EPERM;
@@ -5136,28 +5124,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
5136 } 5124 }
5137 5125
5138 down_write(&fs_info->subvol_sem); 5126 down_write(&fs_info->subvol_sem);
5139 trans = btrfs_start_transaction(fs_info->tree_root, 2);
5140 if (IS_ERR(trans)) {
5141 ret = PTR_ERR(trans);
5142 goto out;
5143 }
5144 5127
5145 switch (sa->cmd) { 5128 switch (sa->cmd) {
5146 case BTRFS_QUOTA_CTL_ENABLE: 5129 case BTRFS_QUOTA_CTL_ENABLE:
5147 ret = btrfs_quota_enable(trans, fs_info); 5130 ret = btrfs_quota_enable(fs_info);
5148 break; 5131 break;
5149 case BTRFS_QUOTA_CTL_DISABLE: 5132 case BTRFS_QUOTA_CTL_DISABLE:
5150 ret = btrfs_quota_disable(trans, fs_info); 5133 ret = btrfs_quota_disable(fs_info);
5151 break; 5134 break;
5152 default: 5135 default:
5153 ret = -EINVAL; 5136 ret = -EINVAL;
5154 break; 5137 break;
5155 } 5138 }
5156 5139
5157 err = btrfs_commit_transaction(trans);
5158 if (err && !ret)
5159 ret = err;
5160out:
5161 kfree(sa); 5140 kfree(sa);
5162 up_write(&fs_info->subvol_sem); 5141 up_write(&fs_info->subvol_sem);
5163drop_write: 5142drop_write:
@@ -5195,15 +5174,13 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
5195 } 5174 }
5196 5175
5197 if (sa->assign) { 5176 if (sa->assign) {
5198 ret = btrfs_add_qgroup_relation(trans, fs_info, 5177 ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
5199 sa->src, sa->dst);
5200 } else { 5178 } else {
5201 ret = btrfs_del_qgroup_relation(trans, fs_info, 5179 ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
5202 sa->src, sa->dst);
5203 } 5180 }
5204 5181
5205 /* update qgroup status and info */ 5182 /* update qgroup status and info */
5206 err = btrfs_run_qgroups(trans, fs_info); 5183 err = btrfs_run_qgroups(trans);
5207 if (err < 0) 5184 if (err < 0)
5208 btrfs_handle_fs_error(fs_info, err, 5185 btrfs_handle_fs_error(fs_info, err,
5209 "failed to update qgroup status and info"); 5186 "failed to update qgroup status and info");
@@ -5221,7 +5198,6 @@ drop_write:
5221static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 5198static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
5222{ 5199{
5223 struct inode *inode = file_inode(file); 5200 struct inode *inode = file_inode(file);
5224 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5225 struct btrfs_root *root = BTRFS_I(inode)->root; 5201 struct btrfs_root *root = BTRFS_I(inode)->root;
5226 struct btrfs_ioctl_qgroup_create_args *sa; 5202 struct btrfs_ioctl_qgroup_create_args *sa;
5227 struct btrfs_trans_handle *trans; 5203 struct btrfs_trans_handle *trans;
@@ -5253,9 +5229,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
5253 } 5229 }
5254 5230
5255 if (sa->create) { 5231 if (sa->create) {
5256 ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); 5232 ret = btrfs_create_qgroup(trans, sa->qgroupid);
5257 } else { 5233 } else {
5258 ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid); 5234 ret = btrfs_remove_qgroup(trans, sa->qgroupid);
5259 } 5235 }
5260 5236
5261 err = btrfs_end_transaction(trans); 5237 err = btrfs_end_transaction(trans);
@@ -5272,7 +5248,6 @@ drop_write:
5272static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 5248static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
5273{ 5249{
5274 struct inode *inode = file_inode(file); 5250 struct inode *inode = file_inode(file);
5275 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5276 struct btrfs_root *root = BTRFS_I(inode)->root; 5251 struct btrfs_root *root = BTRFS_I(inode)->root;
5277 struct btrfs_ioctl_qgroup_limit_args *sa; 5252 struct btrfs_ioctl_qgroup_limit_args *sa;
5278 struct btrfs_trans_handle *trans; 5253 struct btrfs_trans_handle *trans;
@@ -5305,7 +5280,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
5305 qgroupid = root->root_key.objectid; 5280 qgroupid = root->root_key.objectid;
5306 } 5281 }
5307 5282
5308 ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); 5283 ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
5309 5284
5310 err = btrfs_end_transaction(trans); 5285 err = btrfs_end_transaction(trans);
5311 if (err && !ret) 5286 if (err && !ret)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2e1a1694a33d..0c4ef208b8b9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,7 +6,6 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <linux/blkdev.h> 7#include <linux/blkdev.h>
8#include <linux/writeback.h> 8#include <linux/writeback.h>
9#include <linux/pagevec.h>
10#include "ctree.h" 9#include "ctree.h"
11#include "transaction.h" 10#include "transaction.h"
12#include "btrfs_inode.h" 11#include "btrfs_inode.h"
@@ -421,129 +420,6 @@ out:
421 return ret == 0; 420 return ret == 0;
422} 421}
423 422
424/* Needs to either be called under a log transaction or the log_mutex */
425void btrfs_get_logged_extents(struct btrfs_inode *inode,
426 struct list_head *logged_list,
427 const loff_t start,
428 const loff_t end)
429{
430 struct btrfs_ordered_inode_tree *tree;
431 struct btrfs_ordered_extent *ordered;
432 struct rb_node *n;
433 struct rb_node *prev;
434
435 tree = &inode->ordered_tree;
436 spin_lock_irq(&tree->lock);
437 n = __tree_search(&tree->tree, end, &prev);
438 if (!n)
439 n = prev;
440 for (; n; n = rb_prev(n)) {
441 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
442 if (ordered->file_offset > end)
443 continue;
444 if (entry_end(ordered) <= start)
445 break;
446 if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
447 continue;
448 list_add(&ordered->log_list, logged_list);
449 refcount_inc(&ordered->refs);
450 }
451 spin_unlock_irq(&tree->lock);
452}
453
454void btrfs_put_logged_extents(struct list_head *logged_list)
455{
456 struct btrfs_ordered_extent *ordered;
457
458 while (!list_empty(logged_list)) {
459 ordered = list_first_entry(logged_list,
460 struct btrfs_ordered_extent,
461 log_list);
462 list_del_init(&ordered->log_list);
463 btrfs_put_ordered_extent(ordered);
464 }
465}
466
467void btrfs_submit_logged_extents(struct list_head *logged_list,
468 struct btrfs_root *log)
469{
470 int index = log->log_transid % 2;
471
472 spin_lock_irq(&log->log_extents_lock[index]);
473 list_splice_tail(logged_list, &log->logged_list[index]);
474 spin_unlock_irq(&log->log_extents_lock[index]);
475}
476
477void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
478 struct btrfs_root *log, u64 transid)
479{
480 struct btrfs_ordered_extent *ordered;
481 int index = transid % 2;
482
483 spin_lock_irq(&log->log_extents_lock[index]);
484 while (!list_empty(&log->logged_list[index])) {
485 struct inode *inode;
486 ordered = list_first_entry(&log->logged_list[index],
487 struct btrfs_ordered_extent,
488 log_list);
489 list_del_init(&ordered->log_list);
490 inode = ordered->inode;
491 spin_unlock_irq(&log->log_extents_lock[index]);
492
493 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
494 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
495 u64 start = ordered->file_offset;
496 u64 end = ordered->file_offset + ordered->len - 1;
497
498 WARN_ON(!inode);
499 filemap_fdatawrite_range(inode->i_mapping, start, end);
500 }
501 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
502 &ordered->flags));
503
504 /*
505 * In order to keep us from losing our ordered extent
506 * information when committing the transaction we have to make
507 * sure that any logged extents are completed when we go to
508 * commit the transaction. To do this we simply increase the
509 * current transactions pending_ordered counter and decrement it
510 * when the ordered extent completes.
511 */
512 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
513 struct btrfs_ordered_inode_tree *tree;
514
515 tree = &BTRFS_I(inode)->ordered_tree;
516 spin_lock_irq(&tree->lock);
517 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
518 set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
519 atomic_inc(&trans->transaction->pending_ordered);
520 }
521 spin_unlock_irq(&tree->lock);
522 }
523 btrfs_put_ordered_extent(ordered);
524 spin_lock_irq(&log->log_extents_lock[index]);
525 }
526 spin_unlock_irq(&log->log_extents_lock[index]);
527}
528
529void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
530{
531 struct btrfs_ordered_extent *ordered;
532 int index = transid % 2;
533
534 spin_lock_irq(&log->log_extents_lock[index]);
535 while (!list_empty(&log->logged_list[index])) {
536 ordered = list_first_entry(&log->logged_list[index],
537 struct btrfs_ordered_extent,
538 log_list);
539 list_del_init(&ordered->log_list);
540 spin_unlock_irq(&log->log_extents_lock[index]);
541 btrfs_put_ordered_extent(ordered);
542 spin_lock_irq(&log->log_extents_lock[index]);
543 }
544 spin_unlock_irq(&log->log_extents_lock[index]);
545}
546
547/* 423/*
548 * used to drop a reference on an ordered extent. This will free 424 * used to drop a reference on an ordered extent. This will free
549 * the extent if the last reference is dropped 425 * the extent if the last reference is dropped
@@ -913,20 +789,6 @@ out:
913 return entry; 789 return entry;
914} 790}
915 791
916bool btrfs_have_ordered_extents_in_range(struct inode *inode,
917 u64 file_offset,
918 u64 len)
919{
920 struct btrfs_ordered_extent *oe;
921
922 oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
923 if (oe) {
924 btrfs_put_ordered_extent(oe);
925 return true;
926 }
927 return false;
928}
929
930/* 792/*
931 * lookup and return any extent before 'file_offset'. NULL is returned 793 * lookup and return any extent before 'file_offset'. NULL is returned
932 * if none is found 794 * if none is found
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3be443fb3001..02d813aaa261 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -54,15 +54,11 @@ struct btrfs_ordered_sum {
54#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent 54#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
55 * has done its due diligence in updating 55 * has done its due diligence in updating
56 * the isize. */ 56 * the isize. */
57#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered 57#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */
58 ordered extent */
59#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
60 58
61#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent 59#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to
62 * in the logging code. */
63#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
64 * complete in the current transaction. */ 60 * complete in the current transaction. */
65#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */ 61#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */
66 62
67struct btrfs_ordered_extent { 63struct btrfs_ordered_extent {
68 /* logical offset in the file */ 64 /* logical offset in the file */
@@ -182,9 +178,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
182 struct btrfs_inode *inode, 178 struct btrfs_inode *inode,
183 u64 file_offset, 179 u64 file_offset,
184 u64 len); 180 u64 len);
185bool btrfs_have_ordered_extents_in_range(struct inode *inode,
186 u64 file_offset,
187 u64 len);
188int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 181int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
189 struct btrfs_ordered_extent *ordered); 182 struct btrfs_ordered_extent *ordered);
190int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 183int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
@@ -193,16 +186,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
193 const u64 range_start, const u64 range_len); 186 const u64 range_start, const u64 range_len);
194u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, 187u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
195 const u64 range_start, const u64 range_len); 188 const u64 range_start, const u64 range_len);
196void btrfs_get_logged_extents(struct btrfs_inode *inode,
197 struct list_head *logged_list,
198 const loff_t start,
199 const loff_t end);
200void btrfs_put_logged_extents(struct list_head *logged_list);
201void btrfs_submit_logged_extents(struct list_head *logged_list,
202 struct btrfs_root *log);
203void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
204 struct btrfs_root *log, u64 transid);
205void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
206int __init ordered_data_init(void); 189int __init ordered_data_init(void);
207void __cold ordered_data_exit(void); 190void __cold ordered_data_exit(void);
208 191
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index a4e11cf04671..df49931ffe92 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,17 +52,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
52 u64 offset; 52 u64 offset;
53 int ref_index = 0; 53 int ref_index = 0;
54 54
55 if (item_size < sizeof(*ei)) { 55 if (unlikely(item_size < sizeof(*ei))) {
56#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 56 btrfs_print_v0_err(eb->fs_info);
57 struct btrfs_extent_item_v0 *ei0; 57 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
58 BUG_ON(item_size != sizeof(*ei0));
59 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
60 pr_info("\t\textent refs %u\n",
61 btrfs_extent_refs_v0(eb, ei0));
62 return;
63#else
64 BUG();
65#endif
66 } 58 }
67 59
68 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); 60 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -133,20 +125,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
133 WARN_ON(ptr > end); 125 WARN_ON(ptr > end);
134} 126}
135 127
136#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
137static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
138{
139 struct btrfs_extent_ref_v0 *ref0;
140
141 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
142 printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n",
143 btrfs_ref_root_v0(eb, ref0),
144 btrfs_ref_generation_v0(eb, ref0),
145 btrfs_ref_objectid_v0(eb, ref0),
146 (unsigned long)btrfs_ref_count_v0(eb, ref0));
147}
148#endif
149
150static void print_uuid_item(struct extent_buffer *l, unsigned long offset, 128static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
151 u32 item_size) 129 u32 item_size)
152{ 130{
@@ -267,8 +245,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
267 struct btrfs_file_extent_item); 245 struct btrfs_file_extent_item);
268 if (btrfs_file_extent_type(l, fi) == 246 if (btrfs_file_extent_type(l, fi) ==
269 BTRFS_FILE_EXTENT_INLINE) { 247 BTRFS_FILE_EXTENT_INLINE) {
270 pr_info("\t\tinline extent data size %u\n", 248 pr_info("\t\tinline extent data size %llu\n",
271 btrfs_file_extent_inline_len(l, i, fi)); 249 btrfs_file_extent_ram_bytes(l, fi));
272 break; 250 break;
273 } 251 }
274 pr_info("\t\textent data disk bytenr %llu nr %llu\n", 252 pr_info("\t\textent data disk bytenr %llu nr %llu\n",
@@ -280,11 +258,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
280 btrfs_file_extent_ram_bytes(l, fi)); 258 btrfs_file_extent_ram_bytes(l, fi));
281 break; 259 break;
282 case BTRFS_EXTENT_REF_V0_KEY: 260 case BTRFS_EXTENT_REF_V0_KEY:
283#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 261 btrfs_print_v0_err(fs_info);
284 print_extent_ref_v0(l, i); 262 btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
285#else
286 BUG();
287#endif
288 break; 263 break;
289 case BTRFS_BLOCK_GROUP_ITEM_KEY: 264 case BTRFS_BLOCK_GROUP_ITEM_KEY:
290 bi = btrfs_item_ptr(l, i, 265 bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c25dc47210a3..4353bb69bb86 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -530,11 +530,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
530 fs_info->qgroup_ulist = NULL; 530 fs_info->qgroup_ulist = NULL;
531} 531}
532 532
533static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 533static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
534 struct btrfs_root *quota_root, 534 u64 dst)
535 u64 src, u64 dst)
536{ 535{
537 int ret; 536 int ret;
537 struct btrfs_root *quota_root = trans->fs_info->quota_root;
538 struct btrfs_path *path; 538 struct btrfs_path *path;
539 struct btrfs_key key; 539 struct btrfs_key key;
540 540
@@ -554,11 +554,11 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
554 return ret; 554 return ret;
555} 555}
556 556
557static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 557static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
558 struct btrfs_root *quota_root, 558 u64 dst)
559 u64 src, u64 dst)
560{ 559{
561 int ret; 560 int ret;
561 struct btrfs_root *quota_root = trans->fs_info->quota_root;
562 struct btrfs_path *path; 562 struct btrfs_path *path;
563 struct btrfs_key key; 563 struct btrfs_key key;
564 564
@@ -653,10 +653,10 @@ out:
653 return ret; 653 return ret;
654} 654}
655 655
656static int del_qgroup_item(struct btrfs_trans_handle *trans, 656static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
657 struct btrfs_root *quota_root, u64 qgroupid)
658{ 657{
659 int ret; 658 int ret;
659 struct btrfs_root *quota_root = trans->fs_info->quota_root;
660 struct btrfs_path *path; 660 struct btrfs_path *path;
661 struct btrfs_key key; 661 struct btrfs_key key;
662 662
@@ -700,9 +700,9 @@ out:
700} 700}
701 701
702static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 702static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
703 struct btrfs_root *root,
704 struct btrfs_qgroup *qgroup) 703 struct btrfs_qgroup *qgroup)
705{ 704{
705 struct btrfs_root *quota_root = trans->fs_info->quota_root;
706 struct btrfs_path *path; 706 struct btrfs_path *path;
707 struct btrfs_key key; 707 struct btrfs_key key;
708 struct extent_buffer *l; 708 struct extent_buffer *l;
@@ -718,7 +718,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
718 if (!path) 718 if (!path)
719 return -ENOMEM; 719 return -ENOMEM;
720 720
721 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 721 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
722 if (ret > 0) 722 if (ret > 0)
723 ret = -ENOENT; 723 ret = -ENOENT;
724 724
@@ -742,9 +742,10 @@ out:
742} 742}
743 743
744static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 744static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
745 struct btrfs_root *root,
746 struct btrfs_qgroup *qgroup) 745 struct btrfs_qgroup *qgroup)
747{ 746{
747 struct btrfs_fs_info *fs_info = trans->fs_info;
748 struct btrfs_root *quota_root = fs_info->quota_root;
748 struct btrfs_path *path; 749 struct btrfs_path *path;
749 struct btrfs_key key; 750 struct btrfs_key key;
750 struct extent_buffer *l; 751 struct extent_buffer *l;
@@ -752,7 +753,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
752 int ret; 753 int ret;
753 int slot; 754 int slot;
754 755
755 if (btrfs_is_testing(root->fs_info)) 756 if (btrfs_is_testing(fs_info))
756 return 0; 757 return 0;
757 758
758 key.objectid = 0; 759 key.objectid = 0;
@@ -763,7 +764,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
763 if (!path) 764 if (!path)
764 return -ENOMEM; 765 return -ENOMEM;
765 766
766 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 767 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
767 if (ret > 0) 768 if (ret > 0)
768 ret = -ENOENT; 769 ret = -ENOENT;
769 770
@@ -786,10 +787,10 @@ out:
786 return ret; 787 return ret;
787} 788}
788 789
789static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 790static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
790 struct btrfs_fs_info *fs_info,
791 struct btrfs_root *root)
792{ 791{
792 struct btrfs_fs_info *fs_info = trans->fs_info;
793 struct btrfs_root *quota_root = fs_info->quota_root;
793 struct btrfs_path *path; 794 struct btrfs_path *path;
794 struct btrfs_key key; 795 struct btrfs_key key;
795 struct extent_buffer *l; 796 struct extent_buffer *l;
@@ -805,7 +806,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
805 if (!path) 806 if (!path)
806 return -ENOMEM; 807 return -ENOMEM;
807 808
808 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 809 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
809 if (ret > 0) 810 if (ret > 0)
810 ret = -ENOENT; 811 ret = -ENOENT;
811 812
@@ -875,8 +876,7 @@ out:
875 return ret; 876 return ret;
876} 877}
877 878
878int btrfs_quota_enable(struct btrfs_trans_handle *trans, 879int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
879 struct btrfs_fs_info *fs_info)
880{ 880{
881 struct btrfs_root *quota_root; 881 struct btrfs_root *quota_root;
882 struct btrfs_root *tree_root = fs_info->tree_root; 882 struct btrfs_root *tree_root = fs_info->tree_root;
@@ -886,6 +886,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
886 struct btrfs_key key; 886 struct btrfs_key key;
887 struct btrfs_key found_key; 887 struct btrfs_key found_key;
888 struct btrfs_qgroup *qgroup = NULL; 888 struct btrfs_qgroup *qgroup = NULL;
889 struct btrfs_trans_handle *trans = NULL;
889 int ret = 0; 890 int ret = 0;
890 int slot; 891 int slot;
891 892
@@ -893,9 +894,25 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
893 if (fs_info->quota_root) 894 if (fs_info->quota_root)
894 goto out; 895 goto out;
895 896
897 /*
898 * 1 for quota root item
899 * 1 for BTRFS_QGROUP_STATUS item
900 *
901 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
902 * per subvolume. However those are not currently reserved since it
903 * would be a lot of overkill.
904 */
905 trans = btrfs_start_transaction(tree_root, 2);
906 if (IS_ERR(trans)) {
907 ret = PTR_ERR(trans);
908 trans = NULL;
909 goto out;
910 }
911
896 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 912 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
897 if (!fs_info->qgroup_ulist) { 913 if (!fs_info->qgroup_ulist) {
898 ret = -ENOMEM; 914 ret = -ENOMEM;
915 btrfs_abort_transaction(trans, ret);
899 goto out; 916 goto out;
900 } 917 }
901 918
@@ -906,12 +923,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
906 BTRFS_QUOTA_TREE_OBJECTID); 923 BTRFS_QUOTA_TREE_OBJECTID);
907 if (IS_ERR(quota_root)) { 924 if (IS_ERR(quota_root)) {
908 ret = PTR_ERR(quota_root); 925 ret = PTR_ERR(quota_root);
926 btrfs_abort_transaction(trans, ret);
909 goto out; 927 goto out;
910 } 928 }
911 929
912 path = btrfs_alloc_path(); 930 path = btrfs_alloc_path();
913 if (!path) { 931 if (!path) {
914 ret = -ENOMEM; 932 ret = -ENOMEM;
933 btrfs_abort_transaction(trans, ret);
915 goto out_free_root; 934 goto out_free_root;
916 } 935 }
917 936
@@ -921,8 +940,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
921 940
922 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 941 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
923 sizeof(*ptr)); 942 sizeof(*ptr));
924 if (ret) 943 if (ret) {
944 btrfs_abort_transaction(trans, ret);
925 goto out_free_path; 945 goto out_free_path;
946 }
926 947
927 leaf = path->nodes[0]; 948 leaf = path->nodes[0];
928 ptr = btrfs_item_ptr(leaf, path->slots[0], 949 ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -944,9 +965,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
944 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 965 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
945 if (ret > 0) 966 if (ret > 0)
946 goto out_add_root; 967 goto out_add_root;
947 if (ret < 0) 968 if (ret < 0) {
969 btrfs_abort_transaction(trans, ret);
948 goto out_free_path; 970 goto out_free_path;
949 971 }
950 972
951 while (1) { 973 while (1) {
952 slot = path->slots[0]; 974 slot = path->slots[0];
@@ -956,18 +978,23 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
956 if (found_key.type == BTRFS_ROOT_REF_KEY) { 978 if (found_key.type == BTRFS_ROOT_REF_KEY) {
957 ret = add_qgroup_item(trans, quota_root, 979 ret = add_qgroup_item(trans, quota_root,
958 found_key.offset); 980 found_key.offset);
959 if (ret) 981 if (ret) {
982 btrfs_abort_transaction(trans, ret);
960 goto out_free_path; 983 goto out_free_path;
984 }
961 985
962 qgroup = add_qgroup_rb(fs_info, found_key.offset); 986 qgroup = add_qgroup_rb(fs_info, found_key.offset);
963 if (IS_ERR(qgroup)) { 987 if (IS_ERR(qgroup)) {
964 ret = PTR_ERR(qgroup); 988 ret = PTR_ERR(qgroup);
989 btrfs_abort_transaction(trans, ret);
965 goto out_free_path; 990 goto out_free_path;
966 } 991 }
967 } 992 }
968 ret = btrfs_next_item(tree_root, path); 993 ret = btrfs_next_item(tree_root, path);
969 if (ret < 0) 994 if (ret < 0) {
995 btrfs_abort_transaction(trans, ret);
970 goto out_free_path; 996 goto out_free_path;
997 }
971 if (ret) 998 if (ret)
972 break; 999 break;
973 } 1000 }
@@ -975,18 +1002,28 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
975out_add_root: 1002out_add_root:
976 btrfs_release_path(path); 1003 btrfs_release_path(path);
977 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1004 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
978 if (ret) 1005 if (ret) {
1006 btrfs_abort_transaction(trans, ret);
979 goto out_free_path; 1007 goto out_free_path;
1008 }
980 1009
981 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 1010 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
982 if (IS_ERR(qgroup)) { 1011 if (IS_ERR(qgroup)) {
983 ret = PTR_ERR(qgroup); 1012 ret = PTR_ERR(qgroup);
1013 btrfs_abort_transaction(trans, ret);
984 goto out_free_path; 1014 goto out_free_path;
985 } 1015 }
986 spin_lock(&fs_info->qgroup_lock); 1016 spin_lock(&fs_info->qgroup_lock);
987 fs_info->quota_root = quota_root; 1017 fs_info->quota_root = quota_root;
988 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1018 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
989 spin_unlock(&fs_info->qgroup_lock); 1019 spin_unlock(&fs_info->qgroup_lock);
1020
1021 ret = btrfs_commit_transaction(trans);
1022 if (ret) {
1023 trans = NULL;
1024 goto out_free_path;
1025 }
1026
990 ret = qgroup_rescan_init(fs_info, 0, 1); 1027 ret = qgroup_rescan_init(fs_info, 0, 1);
991 if (!ret) { 1028 if (!ret) {
992 qgroup_rescan_zero_tracking(fs_info); 1029 qgroup_rescan_zero_tracking(fs_info);
@@ -1006,20 +1043,35 @@ out:
1006 if (ret) { 1043 if (ret) {
1007 ulist_free(fs_info->qgroup_ulist); 1044 ulist_free(fs_info->qgroup_ulist);
1008 fs_info->qgroup_ulist = NULL; 1045 fs_info->qgroup_ulist = NULL;
1046 if (trans)
1047 btrfs_end_transaction(trans);
1009 } 1048 }
1010 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1049 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1011 return ret; 1050 return ret;
1012} 1051}
1013 1052
1014int btrfs_quota_disable(struct btrfs_trans_handle *trans, 1053int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
1015 struct btrfs_fs_info *fs_info)
1016{ 1054{
1017 struct btrfs_root *quota_root; 1055 struct btrfs_root *quota_root;
1056 struct btrfs_trans_handle *trans = NULL;
1018 int ret = 0; 1057 int ret = 0;
1019 1058
1020 mutex_lock(&fs_info->qgroup_ioctl_lock); 1059 mutex_lock(&fs_info->qgroup_ioctl_lock);
1021 if (!fs_info->quota_root) 1060 if (!fs_info->quota_root)
1022 goto out; 1061 goto out;
1062
1063 /*
1064 * 1 For the root item
1065 *
1066 * We should also reserve enough items for the quota tree deletion in
1067 * btrfs_clean_quota_tree but this is not done.
1068 */
1069 trans = btrfs_start_transaction(fs_info->tree_root, 1);
1070 if (IS_ERR(trans)) {
1071 ret = PTR_ERR(trans);
1072 goto out;
1073 }
1074
1023 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1075 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1024 btrfs_qgroup_wait_for_completion(fs_info, false); 1076 btrfs_qgroup_wait_for_completion(fs_info, false);
1025 spin_lock(&fs_info->qgroup_lock); 1077 spin_lock(&fs_info->qgroup_lock);
@@ -1031,12 +1083,16 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
1031 btrfs_free_qgroup_config(fs_info); 1083 btrfs_free_qgroup_config(fs_info);
1032 1084
1033 ret = btrfs_clean_quota_tree(trans, quota_root); 1085 ret = btrfs_clean_quota_tree(trans, quota_root);
1034 if (ret) 1086 if (ret) {
1035 goto out; 1087 btrfs_abort_transaction(trans, ret);
1088 goto end_trans;
1089 }
1036 1090
1037 ret = btrfs_del_root(trans, fs_info, &quota_root->root_key); 1091 ret = btrfs_del_root(trans, &quota_root->root_key);
1038 if (ret) 1092 if (ret) {
1039 goto out; 1093 btrfs_abort_transaction(trans, ret);
1094 goto end_trans;
1095 }
1040 1096
1041 list_del(&quota_root->dirty_list); 1097 list_del(&quota_root->dirty_list);
1042 1098
@@ -1048,6 +1104,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
1048 free_extent_buffer(quota_root->node); 1104 free_extent_buffer(quota_root->node);
1049 free_extent_buffer(quota_root->commit_root); 1105 free_extent_buffer(quota_root->commit_root);
1050 kfree(quota_root); 1106 kfree(quota_root);
1107
1108end_trans:
1109 ret = btrfs_end_transaction(trans);
1051out: 1110out:
1052 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1111 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1053 return ret; 1112 return ret;
@@ -1177,9 +1236,10 @@ out:
1177 return ret; 1236 return ret;
1178} 1237}
1179 1238
1180int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1239int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1181 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1240 u64 dst)
1182{ 1241{
1242 struct btrfs_fs_info *fs_info = trans->fs_info;
1183 struct btrfs_root *quota_root; 1243 struct btrfs_root *quota_root;
1184 struct btrfs_qgroup *parent; 1244 struct btrfs_qgroup *parent;
1185 struct btrfs_qgroup *member; 1245 struct btrfs_qgroup *member;
@@ -1216,13 +1276,13 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1216 } 1276 }
1217 } 1277 }
1218 1278
1219 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1279 ret = add_qgroup_relation_item(trans, src, dst);
1220 if (ret) 1280 if (ret)
1221 goto out; 1281 goto out;
1222 1282
1223 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1283 ret = add_qgroup_relation_item(trans, dst, src);
1224 if (ret) { 1284 if (ret) {
1225 del_qgroup_relation_item(trans, quota_root, src, dst); 1285 del_qgroup_relation_item(trans, src, dst);
1226 goto out; 1286 goto out;
1227 } 1287 }
1228 1288
@@ -1240,9 +1300,10 @@ out:
1240 return ret; 1300 return ret;
1241} 1301}
1242 1302
1243static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1303static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1244 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1304 u64 dst)
1245{ 1305{
1306 struct btrfs_fs_info *fs_info = trans->fs_info;
1246 struct btrfs_root *quota_root; 1307 struct btrfs_root *quota_root;
1247 struct btrfs_qgroup *parent; 1308 struct btrfs_qgroup *parent;
1248 struct btrfs_qgroup *member; 1309 struct btrfs_qgroup *member;
@@ -1276,8 +1337,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1276 ret = -ENOENT; 1337 ret = -ENOENT;
1277 goto out; 1338 goto out;
1278exist: 1339exist:
1279 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1340 ret = del_qgroup_relation_item(trans, src, dst);
1280 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1341 err = del_qgroup_relation_item(trans, dst, src);
1281 if (err && !ret) 1342 if (err && !ret)
1282 ret = err; 1343 ret = err;
1283 1344
@@ -1290,21 +1351,22 @@ out:
1290 return ret; 1351 return ret;
1291} 1352}
1292 1353
1293int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1354int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1294 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1355 u64 dst)
1295{ 1356{
1357 struct btrfs_fs_info *fs_info = trans->fs_info;
1296 int ret = 0; 1358 int ret = 0;
1297 1359
1298 mutex_lock(&fs_info->qgroup_ioctl_lock); 1360 mutex_lock(&fs_info->qgroup_ioctl_lock);
1299 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1361 ret = __del_qgroup_relation(trans, src, dst);
1300 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1362 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1301 1363
1302 return ret; 1364 return ret;
1303} 1365}
1304 1366
1305int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1367int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
1306 struct btrfs_fs_info *fs_info, u64 qgroupid)
1307{ 1368{
1369 struct btrfs_fs_info *fs_info = trans->fs_info;
1308 struct btrfs_root *quota_root; 1370 struct btrfs_root *quota_root;
1309 struct btrfs_qgroup *qgroup; 1371 struct btrfs_qgroup *qgroup;
1310 int ret = 0; 1372 int ret = 0;
@@ -1336,9 +1398,9 @@ out:
1336 return ret; 1398 return ret;
1337} 1399}
1338 1400
1339int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1401int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
1340 struct btrfs_fs_info *fs_info, u64 qgroupid)
1341{ 1402{
1403 struct btrfs_fs_info *fs_info = trans->fs_info;
1342 struct btrfs_root *quota_root; 1404 struct btrfs_root *quota_root;
1343 struct btrfs_qgroup *qgroup; 1405 struct btrfs_qgroup *qgroup;
1344 struct btrfs_qgroup_list *list; 1406 struct btrfs_qgroup_list *list;
@@ -1362,16 +1424,15 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1362 goto out; 1424 goto out;
1363 } 1425 }
1364 } 1426 }
1365 ret = del_qgroup_item(trans, quota_root, qgroupid); 1427 ret = del_qgroup_item(trans, qgroupid);
1366 if (ret && ret != -ENOENT) 1428 if (ret && ret != -ENOENT)
1367 goto out; 1429 goto out;
1368 1430
1369 while (!list_empty(&qgroup->groups)) { 1431 while (!list_empty(&qgroup->groups)) {
1370 list = list_first_entry(&qgroup->groups, 1432 list = list_first_entry(&qgroup->groups,
1371 struct btrfs_qgroup_list, next_group); 1433 struct btrfs_qgroup_list, next_group);
1372 ret = __del_qgroup_relation(trans, fs_info, 1434 ret = __del_qgroup_relation(trans, qgroupid,
1373 qgroupid, 1435 list->group->qgroupid);
1374 list->group->qgroupid);
1375 if (ret) 1436 if (ret)
1376 goto out; 1437 goto out;
1377 } 1438 }
@@ -1384,10 +1445,10 @@ out:
1384 return ret; 1445 return ret;
1385} 1446}
1386 1447
1387int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1448int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
1388 struct btrfs_fs_info *fs_info, u64 qgroupid,
1389 struct btrfs_qgroup_limit *limit) 1449 struct btrfs_qgroup_limit *limit)
1390{ 1450{
1451 struct btrfs_fs_info *fs_info = trans->fs_info;
1391 struct btrfs_root *quota_root; 1452 struct btrfs_root *quota_root;
1392 struct btrfs_qgroup *qgroup; 1453 struct btrfs_qgroup *qgroup;
1393 int ret = 0; 1454 int ret = 0;
@@ -1451,7 +1512,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1451 1512
1452 spin_unlock(&fs_info->qgroup_lock); 1513 spin_unlock(&fs_info->qgroup_lock);
1453 1514
1454 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1515 ret = update_qgroup_limit_item(trans, qgroup);
1455 if (ret) { 1516 if (ret) {
1456 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1517 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1457 btrfs_info(fs_info, "unable to update quota limit for %llu", 1518 btrfs_info(fs_info, "unable to update quota limit for %llu",
@@ -1519,10 +1580,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
1519 return 0; 1580 return 0;
1520} 1581}
1521 1582
1522int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1583int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
1523 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1584 u64 num_bytes, gfp_t gfp_flag)
1524 gfp_t gfp_flag)
1525{ 1585{
1586 struct btrfs_fs_info *fs_info = trans->fs_info;
1526 struct btrfs_qgroup_extent_record *record; 1587 struct btrfs_qgroup_extent_record *record;
1527 struct btrfs_delayed_ref_root *delayed_refs; 1588 struct btrfs_delayed_ref_root *delayed_refs;
1528 int ret; 1589 int ret;
@@ -1530,8 +1591,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
1530 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1591 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1531 || bytenr == 0 || num_bytes == 0) 1592 || bytenr == 0 || num_bytes == 0)
1532 return 0; 1593 return 0;
1533 if (WARN_ON(trans == NULL))
1534 return -EINVAL;
1535 record = kmalloc(sizeof(*record), gfp_flag); 1594 record = kmalloc(sizeof(*record), gfp_flag);
1536 if (!record) 1595 if (!record)
1537 return -ENOMEM; 1596 return -ENOMEM;
@@ -1552,9 +1611,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
1552} 1611}
1553 1612
1554int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1613int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
1555 struct btrfs_fs_info *fs_info,
1556 struct extent_buffer *eb) 1614 struct extent_buffer *eb)
1557{ 1615{
1616 struct btrfs_fs_info *fs_info = trans->fs_info;
1558 int nr = btrfs_header_nritems(eb); 1617 int nr = btrfs_header_nritems(eb);
1559 int i, extent_type, ret; 1618 int i, extent_type, ret;
1560 struct btrfs_key key; 1619 struct btrfs_key key;
@@ -1584,8 +1643,8 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
1584 1643
1585 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1644 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1586 1645
1587 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1646 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
1588 num_bytes, GFP_NOFS); 1647 GFP_NOFS);
1589 if (ret) 1648 if (ret)
1590 return ret; 1649 return ret;
1591 } 1650 }
@@ -1655,11 +1714,10 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
1655} 1714}
1656 1715
1657int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1716int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1658 struct btrfs_root *root,
1659 struct extent_buffer *root_eb, 1717 struct extent_buffer *root_eb,
1660 u64 root_gen, int root_level) 1718 u64 root_gen, int root_level)
1661{ 1719{
1662 struct btrfs_fs_info *fs_info = root->fs_info; 1720 struct btrfs_fs_info *fs_info = trans->fs_info;
1663 int ret = 0; 1721 int ret = 0;
1664 int level; 1722 int level;
1665 struct extent_buffer *eb = root_eb; 1723 struct extent_buffer *eb = root_eb;
@@ -1678,7 +1736,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
1678 } 1736 }
1679 1737
1680 if (root_level == 0) { 1738 if (root_level == 0) {
1681 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1739 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
1682 goto out; 1740 goto out;
1683 } 1741 }
1684 1742
@@ -1736,8 +1794,7 @@ walk_down:
1736 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1794 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1737 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1795 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
1738 1796
1739 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1797 ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
1740 child_bytenr,
1741 fs_info->nodesize, 1798 fs_info->nodesize,
1742 GFP_NOFS); 1799 GFP_NOFS);
1743 if (ret) 1800 if (ret)
@@ -1745,8 +1802,8 @@ walk_down:
1745 } 1802 }
1746 1803
1747 if (level == 0) { 1804 if (level == 0) {
1748 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1805 ret = btrfs_qgroup_trace_leaf_items(trans,
1749 path->nodes[level]); 1806 path->nodes[level]);
1750 if (ret) 1807 if (ret)
1751 goto out; 1808 goto out;
1752 1809
@@ -1981,12 +2038,11 @@ static int maybe_fs_roots(struct ulist *roots)
1981 return is_fstree(unode->val); 2038 return is_fstree(unode->val);
1982} 2039}
1983 2040
1984int 2041int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
1985btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 2042 u64 num_bytes, struct ulist *old_roots,
1986 struct btrfs_fs_info *fs_info, 2043 struct ulist *new_roots)
1987 u64 bytenr, u64 num_bytes,
1988 struct ulist *old_roots, struct ulist *new_roots)
1989{ 2044{
2045 struct btrfs_fs_info *fs_info = trans->fs_info;
1990 struct ulist *qgroups = NULL; 2046 struct ulist *qgroups = NULL;
1991 struct ulist *tmp = NULL; 2047 struct ulist *tmp = NULL;
1992 u64 seq; 2048 u64 seq;
@@ -2116,9 +2172,10 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
2116 ulist_del(record->old_roots, qgroup_to_skip, 2172 ulist_del(record->old_roots, qgroup_to_skip,
2117 0); 2173 0);
2118 } 2174 }
2119 ret = btrfs_qgroup_account_extent(trans, fs_info, 2175 ret = btrfs_qgroup_account_extent(trans, record->bytenr,
2120 record->bytenr, record->num_bytes, 2176 record->num_bytes,
2121 record->old_roots, new_roots); 2177 record->old_roots,
2178 new_roots);
2122 record->old_roots = NULL; 2179 record->old_roots = NULL;
2123 new_roots = NULL; 2180 new_roots = NULL;
2124 } 2181 }
@@ -2136,9 +2193,9 @@ cleanup:
2136/* 2193/*
2137 * called from commit_transaction. Writes all changed qgroups to disk. 2194 * called from commit_transaction. Writes all changed qgroups to disk.
2138 */ 2195 */
2139int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2196int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
2140 struct btrfs_fs_info *fs_info)
2141{ 2197{
2198 struct btrfs_fs_info *fs_info = trans->fs_info;
2142 struct btrfs_root *quota_root = fs_info->quota_root; 2199 struct btrfs_root *quota_root = fs_info->quota_root;
2143 int ret = 0; 2200 int ret = 0;
2144 2201
@@ -2152,11 +2209,11 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2152 struct btrfs_qgroup, dirty); 2209 struct btrfs_qgroup, dirty);
2153 list_del_init(&qgroup->dirty); 2210 list_del_init(&qgroup->dirty);
2154 spin_unlock(&fs_info->qgroup_lock); 2211 spin_unlock(&fs_info->qgroup_lock);
2155 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2212 ret = update_qgroup_info_item(trans, qgroup);
2156 if (ret) 2213 if (ret)
2157 fs_info->qgroup_flags |= 2214 fs_info->qgroup_flags |=
2158 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2215 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2159 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2216 ret = update_qgroup_limit_item(trans, qgroup);
2160 if (ret) 2217 if (ret)
2161 fs_info->qgroup_flags |= 2218 fs_info->qgroup_flags |=
2162 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2219 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2168,7 +2225,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2168 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2225 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2169 spin_unlock(&fs_info->qgroup_lock); 2226 spin_unlock(&fs_info->qgroup_lock);
2170 2227
2171 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2228 ret = update_qgroup_status_item(trans);
2172 if (ret) 2229 if (ret)
2173 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2230 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2174 2231
@@ -2181,13 +2238,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2181 * cause a transaction abort so we take extra care here to only error 2238 * cause a transaction abort so we take extra care here to only error
2182 * when a readonly fs is a reasonable outcome. 2239 * when a readonly fs is a reasonable outcome.
2183 */ 2240 */
2184int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2241int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
2185 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2242 u64 objectid, struct btrfs_qgroup_inherit *inherit)
2186 struct btrfs_qgroup_inherit *inherit)
2187{ 2243{
2188 int ret = 0; 2244 int ret = 0;
2189 int i; 2245 int i;
2190 u64 *i_qgroups; 2246 u64 *i_qgroups;
2247 struct btrfs_fs_info *fs_info = trans->fs_info;
2191 struct btrfs_root *quota_root = fs_info->quota_root; 2248 struct btrfs_root *quota_root = fs_info->quota_root;
2192 struct btrfs_qgroup *srcgroup; 2249 struct btrfs_qgroup *srcgroup;
2193 struct btrfs_qgroup *dstgroup; 2250 struct btrfs_qgroup *dstgroup;
@@ -2229,22 +2286,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2229 if (ret) 2286 if (ret)
2230 goto out; 2287 goto out;
2231 2288
2232 if (srcid) {
2233 struct btrfs_root *srcroot;
2234 struct btrfs_key srckey;
2235
2236 srckey.objectid = srcid;
2237 srckey.type = BTRFS_ROOT_ITEM_KEY;
2238 srckey.offset = (u64)-1;
2239 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2240 if (IS_ERR(srcroot)) {
2241 ret = PTR_ERR(srcroot);
2242 goto out;
2243 }
2244
2245 level_size = fs_info->nodesize;
2246 }
2247
2248 /* 2289 /*
2249 * add qgroup to all inherited groups 2290 * add qgroup to all inherited groups
2250 */ 2291 */
@@ -2253,12 +2294,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2253 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2294 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2254 if (*i_qgroups == 0) 2295 if (*i_qgroups == 0)
2255 continue; 2296 continue;
2256 ret = add_qgroup_relation_item(trans, quota_root, 2297 ret = add_qgroup_relation_item(trans, objectid,
2257 objectid, *i_qgroups); 2298 *i_qgroups);
2258 if (ret && ret != -EEXIST) 2299 if (ret && ret != -EEXIST)
2259 goto out; 2300 goto out;
2260 ret = add_qgroup_relation_item(trans, quota_root, 2301 ret = add_qgroup_relation_item(trans, *i_qgroups,
2261 *i_qgroups, objectid); 2302 objectid);
2262 if (ret && ret != -EEXIST) 2303 if (ret && ret != -EEXIST)
2263 goto out; 2304 goto out;
2264 } 2305 }
@@ -2281,7 +2322,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2281 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2322 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2282 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2323 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2283 2324
2284 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2325 ret = update_qgroup_limit_item(trans, dstgroup);
2285 if (ret) { 2326 if (ret) {
2286 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2327 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2287 btrfs_info(fs_info, 2328 btrfs_info(fs_info,
@@ -2301,6 +2342,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2301 * our counts don't go crazy, so at this point the only 2342 * our counts don't go crazy, so at this point the only
2302 * difference between the two roots should be the root node. 2343 * difference between the two roots should be the root node.
2303 */ 2344 */
2345 level_size = fs_info->nodesize;
2304 dstgroup->rfer = srcgroup->rfer; 2346 dstgroup->rfer = srcgroup->rfer;
2305 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2347 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2306 dstgroup->excl = level_size; 2348 dstgroup->excl = level_size;
@@ -2598,10 +2640,10 @@ static bool is_last_leaf(struct btrfs_path *path)
2598 * returns < 0 on error, 0 when more leafs are to be scanned. 2640 * returns < 0 on error, 0 when more leafs are to be scanned.
2599 * returns 1 when done. 2641 * returns 1 when done.
2600 */ 2642 */
2601static int 2643static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
2602qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2644 struct btrfs_path *path)
2603 struct btrfs_trans_handle *trans)
2604{ 2645{
2646 struct btrfs_fs_info *fs_info = trans->fs_info;
2605 struct btrfs_key found; 2647 struct btrfs_key found;
2606 struct extent_buffer *scratch_leaf = NULL; 2648 struct extent_buffer *scratch_leaf = NULL;
2607 struct ulist *roots = NULL; 2649 struct ulist *roots = NULL;
@@ -2669,8 +2711,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2669 if (ret < 0) 2711 if (ret < 0)
2670 goto out; 2712 goto out;
2671 /* For rescan, just pass old_roots as NULL */ 2713 /* For rescan, just pass old_roots as NULL */
2672 ret = btrfs_qgroup_account_extent(trans, fs_info, 2714 ret = btrfs_qgroup_account_extent(trans, found.objectid,
2673 found.objectid, num_bytes, NULL, roots); 2715 num_bytes, NULL, roots);
2674 if (ret < 0) 2716 if (ret < 0)
2675 goto out; 2717 goto out;
2676 } 2718 }
@@ -2716,7 +2758,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2716 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2758 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
2717 err = -EINTR; 2759 err = -EINTR;
2718 } else { 2760 } else {
2719 err = qgroup_rescan_leaf(fs_info, path, trans); 2761 err = qgroup_rescan_leaf(trans, path);
2720 } 2762 }
2721 if (err > 0) 2763 if (err > 0)
2722 btrfs_commit_transaction(trans); 2764 btrfs_commit_transaction(trans);
@@ -2751,7 +2793,7 @@ out:
2751 err); 2793 err);
2752 goto done; 2794 goto done;
2753 } 2795 }
2754 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2796 ret = update_qgroup_status_item(trans);
2755 if (ret < 0) { 2797 if (ret < 0) {
2756 err = ret; 2798 err = ret;
2757 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2799 btrfs_err(fs_info, "fail to update qgroup status: %d", err);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d60dd06445ce..54b8bb282c0e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -141,24 +141,19 @@ struct btrfs_qgroup {
141#define QGROUP_RELEASE (1<<1) 141#define QGROUP_RELEASE (1<<1)
142#define QGROUP_FREE (1<<2) 142#define QGROUP_FREE (1<<2)
143 143
144int btrfs_quota_enable(struct btrfs_trans_handle *trans, 144int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
145 struct btrfs_fs_info *fs_info); 145int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
146int btrfs_quota_disable(struct btrfs_trans_handle *trans,
147 struct btrfs_fs_info *fs_info);
148int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 146int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
149void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 147void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
150int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 148int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
151 bool interruptible); 149 bool interruptible);
152int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 150int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
153 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 151 u64 dst);
154int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 152int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
155 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 153 u64 dst);
156int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 154int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
157 struct btrfs_fs_info *fs_info, u64 qgroupid); 155int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
158int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 156int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
159 struct btrfs_fs_info *fs_info, u64 qgroupid);
160int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
161 struct btrfs_fs_info *fs_info, u64 qgroupid,
162 struct btrfs_qgroup_limit *limit); 157 struct btrfs_qgroup_limit *limit);
163int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 158int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
164void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 159void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
@@ -217,9 +212,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
217 * Return <0 for error, like memory allocation failure or invalid parameter 212 * Return <0 for error, like memory allocation failure or invalid parameter
218 * (NULL trans) 213 * (NULL trans)
219 */ 214 */
220int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 215int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
221 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 216 u64 num_bytes, gfp_t gfp_flag);
222 gfp_t gfp_flag);
223 217
224/* 218/*
225 * Inform qgroup to trace all leaf items of data 219 * Inform qgroup to trace all leaf items of data
@@ -228,7 +222,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
228 * Return <0 for error(ENOMEM) 222 * Return <0 for error(ENOMEM)
229 */ 223 */
230int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 224int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
231 struct btrfs_fs_info *fs_info,
232 struct extent_buffer *eb); 225 struct extent_buffer *eb);
233/* 226/*
234 * Inform qgroup to trace a whole subtree, including all its child tree 227 * Inform qgroup to trace a whole subtree, including all its child tree
@@ -241,20 +234,15 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
241 * Return <0 for error(ENOMEM or tree search error) 234 * Return <0 for error(ENOMEM or tree search error)
242 */ 235 */
243int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 236int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
244 struct btrfs_root *root,
245 struct extent_buffer *root_eb, 237 struct extent_buffer *root_eb,
246 u64 root_gen, int root_level); 238 u64 root_gen, int root_level);
247int 239int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
248btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 240 u64 num_bytes, struct ulist *old_roots,
249 struct btrfs_fs_info *fs_info, 241 struct ulist *new_roots);
250 u64 bytenr, u64 num_bytes,
251 struct ulist *old_roots, struct ulist *new_roots);
252int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); 242int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
253int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 243int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
254 struct btrfs_fs_info *fs_info); 244int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
255int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 245 u64 objectid, struct btrfs_qgroup_inherit *inherit);
256 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
257 struct btrfs_qgroup_inherit *inherit);
258void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 246void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
259 u64 ref_root, u64 num_bytes, 247 u64 ref_root, u64 num_bytes,
260 enum btrfs_qgroup_rsv_type type); 248 enum btrfs_qgroup_rsv_type type);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5e4ad134b9ad..df41d7049936 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -5,32 +5,19 @@
5 */ 5 */
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/wait.h>
9#include <linux/bio.h> 8#include <linux/bio.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include <linux/buffer_head.h>
12#include <linux/blkdev.h> 10#include <linux/blkdev.h>
13#include <linux/random.h>
14#include <linux/iocontext.h>
15#include <linux/capability.h>
16#include <linux/ratelimit.h>
17#include <linux/kthread.h>
18#include <linux/raid/pq.h> 11#include <linux/raid/pq.h>
19#include <linux/hash.h> 12#include <linux/hash.h>
20#include <linux/list_sort.h> 13#include <linux/list_sort.h>
21#include <linux/raid/xor.h> 14#include <linux/raid/xor.h>
22#include <linux/mm.h> 15#include <linux/mm.h>
23#include <asm/div64.h>
24#include "ctree.h" 16#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h" 17#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h" 18#include "volumes.h"
30#include "raid56.h" 19#include "raid56.h"
31#include "async-thread.h" 20#include "async-thread.h"
32#include "check-integrity.h"
33#include "rcu-string.h"
34 21
35/* set when additional merges to this rbio are not allowed */ 22/* set when additional merges to this rbio are not allowed */
36#define RBIO_RMW_LOCKED_BIT 1 23#define RBIO_RMW_LOCKED_BIT 1
@@ -175,8 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
175static noinline void finish_rmw(struct btrfs_raid_bio *rbio); 162static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
176static void rmw_work(struct btrfs_work *work); 163static void rmw_work(struct btrfs_work *work);
177static void read_rebuild_work(struct btrfs_work *work); 164static void read_rebuild_work(struct btrfs_work *work);
178static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
179static void async_read_rebuild(struct btrfs_raid_bio *rbio);
180static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); 165static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
181static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); 166static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
182static void __free_raid_bio(struct btrfs_raid_bio *rbio); 167static void __free_raid_bio(struct btrfs_raid_bio *rbio);
@@ -185,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
185 170
186static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 171static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
187 int need_check); 172 int need_check);
188static void async_scrub_parity(struct btrfs_raid_bio *rbio); 173static void scrub_parity_work(struct btrfs_work *work);
174
175static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
176{
177 btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
178 btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
179}
189 180
190/* 181/*
191 * the stripe hash table is used for locking, and to collect 182 * the stripe hash table is used for locking, and to collect
@@ -260,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
260 s = kmap(rbio->bio_pages[i]); 251 s = kmap(rbio->bio_pages[i]);
261 d = kmap(rbio->stripe_pages[i]); 252 d = kmap(rbio->stripe_pages[i]);
262 253
263 memcpy(d, s, PAGE_SIZE); 254 copy_page(d, s);
264 255
265 kunmap(rbio->bio_pages[i]); 256 kunmap(rbio->bio_pages[i]);
266 kunmap(rbio->stripe_pages[i]); 257 kunmap(rbio->stripe_pages[i]);
@@ -516,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
516} 507}
517 508
518/* 509/*
519 * returns true if the bio list inside this rbio 510 * Returns true if the bio list inside this rbio covers an entire stripe (no
520 * covers an entire stripe (no rmw required). 511 * rmw required).
521 * Must be called with the bio list lock held, or
522 * at a time when you know it is impossible to add
523 * new bios into the list
524 */ 512 */
525static int __rbio_is_full(struct btrfs_raid_bio *rbio) 513static int rbio_is_full(struct btrfs_raid_bio *rbio)
526{ 514{
515 unsigned long flags;
527 unsigned long size = rbio->bio_list_bytes; 516 unsigned long size = rbio->bio_list_bytes;
528 int ret = 1; 517 int ret = 1;
529 518
519 spin_lock_irqsave(&rbio->bio_list_lock, flags);
530 if (size != rbio->nr_data * rbio->stripe_len) 520 if (size != rbio->nr_data * rbio->stripe_len)
531 ret = 0; 521 ret = 0;
532
533 BUG_ON(size > rbio->nr_data * rbio->stripe_len); 522 BUG_ON(size > rbio->nr_data * rbio->stripe_len);
534 return ret;
535}
536
537static int rbio_is_full(struct btrfs_raid_bio *rbio)
538{
539 unsigned long flags;
540 int ret;
541
542 spin_lock_irqsave(&rbio->bio_list_lock, flags);
543 ret = __rbio_is_full(rbio);
544 spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 523 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
524
545 return ret; 525 return ret;
546} 526}
547 527
@@ -812,16 +792,16 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
812 spin_unlock_irqrestore(&h->lock, flags); 792 spin_unlock_irqrestore(&h->lock, flags);
813 793
814 if (next->operation == BTRFS_RBIO_READ_REBUILD) 794 if (next->operation == BTRFS_RBIO_READ_REBUILD)
815 async_read_rebuild(next); 795 start_async_work(next, read_rebuild_work);
816 else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { 796 else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
817 steal_rbio(rbio, next); 797 steal_rbio(rbio, next);
818 async_read_rebuild(next); 798 start_async_work(next, read_rebuild_work);
819 } else if (next->operation == BTRFS_RBIO_WRITE) { 799 } else if (next->operation == BTRFS_RBIO_WRITE) {
820 steal_rbio(rbio, next); 800 steal_rbio(rbio, next);
821 async_rmw_stripe(next); 801 start_async_work(next, rmw_work);
822 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 802 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
823 steal_rbio(rbio, next); 803 steal_rbio(rbio, next);
824 async_scrub_parity(next); 804 start_async_work(next, scrub_parity_work);
825 } 805 }
826 806
827 goto done_nolock; 807 goto done_nolock;
@@ -1275,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1275 pointers); 1255 pointers);
1276 } else { 1256 } else {
1277 /* raid5 */ 1257 /* raid5 */
1278 memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); 1258 copy_page(pointers[nr_data], pointers[0]);
1279 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 1259 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
1280 } 1260 }
1281 1261
@@ -1343,7 +1323,7 @@ write_data:
1343 1323
1344 bio->bi_private = rbio; 1324 bio->bi_private = rbio;
1345 bio->bi_end_io = raid_write_end_io; 1325 bio->bi_end_io = raid_write_end_io;
1346 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1326 bio->bi_opf = REQ_OP_WRITE;
1347 1327
1348 submit_bio(bio); 1328 submit_bio(bio);
1349 } 1329 }
@@ -1508,20 +1488,6 @@ cleanup:
1508 rbio_orig_end_io(rbio, BLK_STS_IOERR); 1488 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1509} 1489}
1510 1490
1511static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1512{
1513 btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
1514 btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
1515}
1516
1517static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1518{
1519 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1520 read_rebuild_work, NULL, NULL);
1521
1522 btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
1523}
1524
1525/* 1491/*
1526 * the stripe must be locked by the caller. It will 1492 * the stripe must be locked by the caller. It will
1527 * unlock after all the writes are done 1493 * unlock after all the writes are done
@@ -1599,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1599 1565
1600 bio->bi_private = rbio; 1566 bio->bi_private = rbio;
1601 bio->bi_end_io = raid_rmw_end_io; 1567 bio->bi_end_io = raid_rmw_end_io;
1602 bio_set_op_attrs(bio, REQ_OP_READ, 0); 1568 bio->bi_opf = REQ_OP_READ;
1603 1569
1604 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 1570 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
1605 1571
@@ -1652,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1652 1618
1653 ret = lock_stripe_add(rbio); 1619 ret = lock_stripe_add(rbio);
1654 if (ret == 0) 1620 if (ret == 0)
1655 async_rmw_stripe(rbio); 1621 start_async_work(rbio, rmw_work);
1656 return 0; 1622 return 0;
1657} 1623}
1658 1624
@@ -1720,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug)
1720 list_del_init(&cur->plug_list); 1686 list_del_init(&cur->plug_list);
1721 1687
1722 if (rbio_is_full(cur)) { 1688 if (rbio_is_full(cur)) {
1689 int ret;
1690
1723 /* we have a full stripe, send it down */ 1691 /* we have a full stripe, send it down */
1724 full_stripe_write(cur); 1692 ret = full_stripe_write(cur);
1693 BUG_ON(ret);
1725 continue; 1694 continue;
1726 } 1695 }
1727 if (last) { 1696 if (last) {
@@ -1941,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1941 BUG_ON(failb != -1); 1910 BUG_ON(failb != -1);
1942pstripe: 1911pstripe:
1943 /* Copy parity block into failed block to start with */ 1912 /* Copy parity block into failed block to start with */
1944 memcpy(pointers[faila], 1913 copy_page(pointers[faila], pointers[rbio->nr_data]);
1945 pointers[rbio->nr_data],
1946 PAGE_SIZE);
1947 1914
1948 /* rearrange the pointer array */ 1915 /* rearrange the pointer array */
1949 p = pointers[faila]; 1916 p = pointers[faila];
@@ -2145,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
2145 2112
2146 bio->bi_private = rbio; 2113 bio->bi_private = rbio;
2147 bio->bi_end_io = raid_recover_end_io; 2114 bio->bi_end_io = raid_recover_end_io;
2148 bio_set_op_attrs(bio, REQ_OP_READ, 0); 2115 bio->bi_opf = REQ_OP_READ;
2149 2116
2150 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 2117 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
2151 2118
@@ -2448,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
2448 pointers); 2415 pointers);
2449 } else { 2416 } else {
2450 /* raid5 */ 2417 /* raid5 */
2451 memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); 2418 copy_page(pointers[nr_data], pointers[0]);
2452 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 2419 run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
2453 } 2420 }
2454 2421
@@ -2456,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
2456 p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 2423 p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2457 parity = kmap(p); 2424 parity = kmap(p);
2458 if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) 2425 if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
2459 memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE); 2426 copy_page(parity, pointers[rbio->scrubp]);
2460 else 2427 else
2461 /* Parity is right, needn't writeback */ 2428 /* Parity is right, needn't writeback */
2462 bitmap_clear(rbio->dbitmap, pagenr, 1); 2429 bitmap_clear(rbio->dbitmap, pagenr, 1);
@@ -2517,7 +2484,7 @@ submit_write:
2517 2484
2518 bio->bi_private = rbio; 2485 bio->bi_private = rbio;
2519 bio->bi_end_io = raid_write_end_io; 2486 bio->bi_end_io = raid_write_end_io;
2520 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 2487 bio->bi_opf = REQ_OP_WRITE;
2521 2488
2522 submit_bio(bio); 2489 submit_bio(bio);
2523 } 2490 }
@@ -2699,7 +2666,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
2699 2666
2700 bio->bi_private = rbio; 2667 bio->bi_private = rbio;
2701 bio->bi_end_io = raid56_parity_scrub_end_io; 2668 bio->bi_end_io = raid56_parity_scrub_end_io;
2702 bio_set_op_attrs(bio, REQ_OP_READ, 0); 2669 bio->bi_opf = REQ_OP_READ;
2703 2670
2704 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 2671 btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
2705 2672
@@ -2728,18 +2695,10 @@ static void scrub_parity_work(struct btrfs_work *work)
2728 raid56_parity_scrub_stripe(rbio); 2695 raid56_parity_scrub_stripe(rbio);
2729} 2696}
2730 2697
2731static void async_scrub_parity(struct btrfs_raid_bio *rbio)
2732{
2733 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
2734 scrub_parity_work, NULL, NULL);
2735
2736 btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
2737}
2738
2739void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 2698void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
2740{ 2699{
2741 if (!lock_stripe_add(rbio)) 2700 if (!lock_stripe_add(rbio))
2742 async_scrub_parity(rbio); 2701 start_async_work(rbio, scrub_parity_work);
2743} 2702}
2744 2703
2745/* The following code is used for dev replace of a missing RAID 5/6 device. */ 2704/* The following code is used for dev replace of a missing RAID 5/6 device. */
@@ -2781,5 +2740,5 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
2781void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) 2740void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
2782{ 2741{
2783 if (!lock_stripe_add(rbio)) 2742 if (!lock_stripe_add(rbio))
2784 async_read_rebuild(rbio); 2743 start_async_work(rbio, read_rebuild_work);
2785} 2744}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 40f1bcef394d..dec14b739b10 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -7,7 +7,6 @@
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/writeback.h> 8#include <linux/writeback.h>
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/workqueue.h> 11#include <linux/workqueue.h>
13#include "ctree.h" 12#include "ctree.h"
@@ -355,7 +354,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
355 dev = bbio->stripes[nzones].dev; 354 dev = bbio->stripes[nzones].dev;
356 355
357 /* cannot read ahead on missing device. */ 356 /* cannot read ahead on missing device. */
358 if (!dev->bdev) 357 if (!dev->bdev)
359 continue; 358 continue;
360 359
361 zone = reada_find_zone(dev, logical, bbio); 360 zone = reada_find_zone(dev, logical, bbio);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 879b76fa881a..8783a1776540 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -586,29 +586,6 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
586 return btrfs_get_fs_root(fs_info, &key, false); 586 return btrfs_get_fs_root(fs_info, &key, false);
587} 587}
588 588
589#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
590static noinline_for_stack
591struct btrfs_root *find_tree_root(struct reloc_control *rc,
592 struct extent_buffer *leaf,
593 struct btrfs_extent_ref_v0 *ref0)
594{
595 struct btrfs_root *root;
596 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
597 u64 generation = btrfs_ref_generation_v0(leaf, ref0);
598
599 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
600
601 root = read_fs_root(rc->extent_root->fs_info, root_objectid);
602 BUG_ON(IS_ERR(root));
603
604 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
605 generation != btrfs_root_generation(&root->root_item))
606 return NULL;
607
608 return root;
609}
610#endif
611
612static noinline_for_stack 589static noinline_for_stack
613int find_inline_backref(struct extent_buffer *leaf, int slot, 590int find_inline_backref(struct extent_buffer *leaf, int slot,
614 unsigned long *ptr, unsigned long *end) 591 unsigned long *ptr, unsigned long *end)
@@ -621,12 +598,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
621 btrfs_item_key_to_cpu(leaf, &key, slot); 598 btrfs_item_key_to_cpu(leaf, &key, slot);
622 599
623 item_size = btrfs_item_size_nr(leaf, slot); 600 item_size = btrfs_item_size_nr(leaf, slot);
624#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
625 if (item_size < sizeof(*ei)) { 601 if (item_size < sizeof(*ei)) {
626 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 602 btrfs_print_v0_err(leaf->fs_info);
603 btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
627 return 1; 604 return 1;
628 } 605 }
629#endif
630 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 606 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
631 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 607 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
632 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 608 BTRFS_EXTENT_FLAG_TREE_BLOCK));
@@ -792,7 +768,7 @@ again:
792 type = btrfs_get_extent_inline_ref_type(eb, iref, 768 type = btrfs_get_extent_inline_ref_type(eb, iref,
793 BTRFS_REF_TYPE_BLOCK); 769 BTRFS_REF_TYPE_BLOCK);
794 if (type == BTRFS_REF_TYPE_INVALID) { 770 if (type == BTRFS_REF_TYPE_INVALID) {
795 err = -EINVAL; 771 err = -EUCLEAN;
796 goto out; 772 goto out;
797 } 773 }
798 key.type = type; 774 key.type = type;
@@ -811,29 +787,7 @@ again:
811 goto next; 787 goto next;
812 } 788 }
813 789
814#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
815 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
816 key.type == BTRFS_EXTENT_REF_V0_KEY) {
817 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
818 struct btrfs_extent_ref_v0 *ref0;
819 ref0 = btrfs_item_ptr(eb, path1->slots[0],
820 struct btrfs_extent_ref_v0);
821 if (key.objectid == key.offset) {
822 root = find_tree_root(rc, eb, ref0);
823 if (root && !should_ignore_root(root))
824 cur->root = root;
825 else
826 list_add(&cur->list, &useless);
827 break;
828 }
829 if (is_cowonly_root(btrfs_ref_root_v0(eb,
830 ref0)))
831 cur->cowonly = 1;
832 }
833#else
834 ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
835 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 790 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
836#endif
837 if (key.objectid == key.offset) { 791 if (key.objectid == key.offset) {
838 /* 792 /*
839 * only root blocks of reloc trees use 793 * only root blocks of reloc trees use
@@ -876,6 +830,12 @@ again:
876 edge->node[UPPER] = upper; 830 edge->node[UPPER] = upper;
877 831
878 goto next; 832 goto next;
833 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
834 err = -EINVAL;
835 btrfs_print_v0_err(rc->extent_root->fs_info);
836 btrfs_handle_fs_error(rc->extent_root->fs_info, err,
837 NULL);
838 goto out;
879 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 839 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
880 goto next; 840 goto next;
881 } 841 }
@@ -1321,18 +1281,19 @@ static void __del_reloc_root(struct btrfs_root *root)
1321 struct mapping_node *node = NULL; 1281 struct mapping_node *node = NULL;
1322 struct reloc_control *rc = fs_info->reloc_ctl; 1282 struct reloc_control *rc = fs_info->reloc_ctl;
1323 1283
1324 spin_lock(&rc->reloc_root_tree.lock); 1284 if (rc) {
1325 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1285 spin_lock(&rc->reloc_root_tree.lock);
1326 root->node->start); 1286 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1327 if (rb_node) { 1287 root->node->start);
1328 node = rb_entry(rb_node, struct mapping_node, rb_node); 1288 if (rb_node) {
1329 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1289 node = rb_entry(rb_node, struct mapping_node, rb_node);
1290 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1291 }
1292 spin_unlock(&rc->reloc_root_tree.lock);
1293 if (!node)
1294 return;
1295 BUG_ON((struct btrfs_root *)node->data != root);
1330 } 1296 }
1331 spin_unlock(&rc->reloc_root_tree.lock);
1332
1333 if (!node)
1334 return;
1335 BUG_ON((struct btrfs_root *)node->data != root);
1336 1297
1337 spin_lock(&fs_info->trans_lock); 1298 spin_lock(&fs_info->trans_lock);
1338 list_del_init(&root->root_list); 1299 list_del_init(&root->root_list);
@@ -1918,13 +1879,12 @@ again:
1918 * and tree block numbers, if current trans doesn't free 1879 * and tree block numbers, if current trans doesn't free
1919 * data reloc tree inode. 1880 * data reloc tree inode.
1920 */ 1881 */
1921 ret = btrfs_qgroup_trace_subtree(trans, src, parent, 1882 ret = btrfs_qgroup_trace_subtree(trans, parent,
1922 btrfs_header_generation(parent), 1883 btrfs_header_generation(parent),
1923 btrfs_header_level(parent)); 1884 btrfs_header_level(parent));
1924 if (ret < 0) 1885 if (ret < 0)
1925 break; 1886 break;
1926 ret = btrfs_qgroup_trace_subtree(trans, dest, 1887 ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
1927 path->nodes[level],
1928 btrfs_header_generation(path->nodes[level]), 1888 btrfs_header_generation(path->nodes[level]),
1929 btrfs_header_level(path->nodes[level])); 1889 btrfs_header_level(path->nodes[level]));
1930 if (ret < 0) 1890 if (ret < 0)
@@ -3333,48 +3293,6 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3333 return 0; 3293 return 0;
3334} 3294}
3335 3295
3336#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3337static int get_ref_objectid_v0(struct reloc_control *rc,
3338 struct btrfs_path *path,
3339 struct btrfs_key *extent_key,
3340 u64 *ref_objectid, int *path_change)
3341{
3342 struct btrfs_key key;
3343 struct extent_buffer *leaf;
3344 struct btrfs_extent_ref_v0 *ref0;
3345 int ret;
3346 int slot;
3347
3348 leaf = path->nodes[0];
3349 slot = path->slots[0];
3350 while (1) {
3351 if (slot >= btrfs_header_nritems(leaf)) {
3352 ret = btrfs_next_leaf(rc->extent_root, path);
3353 if (ret < 0)
3354 return ret;
3355 BUG_ON(ret > 0);
3356 leaf = path->nodes[0];
3357 slot = path->slots[0];
3358 if (path_change)
3359 *path_change = 1;
3360 }
3361 btrfs_item_key_to_cpu(leaf, &key, slot);
3362 if (key.objectid != extent_key->objectid)
3363 return -ENOENT;
3364
3365 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
3366 slot++;
3367 continue;
3368 }
3369 ref0 = btrfs_item_ptr(leaf, slot,
3370 struct btrfs_extent_ref_v0);
3371 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
3372 break;
3373 }
3374 return 0;
3375}
3376#endif
3377
3378/* 3296/*
3379 * helper to add a tree block to the list. 3297 * helper to add a tree block to the list.
3380 * the major work is getting the generation and level of the block 3298 * the major work is getting the generation and level of the block
@@ -3407,23 +3325,12 @@ static int add_tree_block(struct reloc_control *rc,
3407 level = (int)extent_key->offset; 3325 level = (int)extent_key->offset;
3408 } 3326 }
3409 generation = btrfs_extent_generation(eb, ei); 3327 generation = btrfs_extent_generation(eb, ei);
3328 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3329 btrfs_print_v0_err(eb->fs_info);
3330 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3331 return -EINVAL;
3410 } else { 3332 } else {
3411#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3412 u64 ref_owner;
3413 int ret;
3414
3415 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3416 ret = get_ref_objectid_v0(rc, path, extent_key,
3417 &ref_owner, NULL);
3418 if (ret < 0)
3419 return ret;
3420 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3421 level = (int)ref_owner;
3422 /* FIXME: get real generation */
3423 generation = 0;
3424#else
3425 BUG(); 3333 BUG();
3426#endif
3427 } 3334 }
3428 3335
3429 btrfs_release_path(path); 3336 btrfs_release_path(path);
@@ -3563,11 +3470,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3563 key.offset = 0; 3470 key.offset = 0;
3564 3471
3565 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3472 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3566 if (IS_ERR(inode) || is_bad_inode(inode)) { 3473 if (IS_ERR(inode))
3567 if (!IS_ERR(inode))
3568 iput(inode);
3569 return -ENOENT; 3474 return -ENOENT;
3570 }
3571 3475
3572truncate: 3476truncate:
3573 ret = btrfs_check_trunc_cache_free_space(fs_info, 3477 ret = btrfs_check_trunc_cache_free_space(fs_info,
@@ -3781,12 +3685,7 @@ int add_data_references(struct reloc_control *rc,
3781 eb = path->nodes[0]; 3685 eb = path->nodes[0];
3782 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3686 ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3783 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3687 end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3784#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3688 ptr += sizeof(struct btrfs_extent_item);
3785 if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3786 ptr = end;
3787 else
3788#endif
3789 ptr += sizeof(struct btrfs_extent_item);
3790 3689
3791 while (ptr < end) { 3690 while (ptr < end) {
3792 iref = (struct btrfs_extent_inline_ref *)ptr; 3691 iref = (struct btrfs_extent_inline_ref *)ptr;
@@ -3801,7 +3700,7 @@ int add_data_references(struct reloc_control *rc,
3801 ret = find_data_references(rc, extent_key, 3700 ret = find_data_references(rc, extent_key,
3802 eb, dref, blocks); 3701 eb, dref, blocks);
3803 } else { 3702 } else {
3804 ret = -EINVAL; 3703 ret = -EUCLEAN;
3805 btrfs_err(rc->extent_root->fs_info, 3704 btrfs_err(rc->extent_root->fs_info,
3806 "extent %llu slot %d has an invalid inline ref type", 3705 "extent %llu slot %d has an invalid inline ref type",
3807 eb->start, path->slots[0]); 3706 eb->start, path->slots[0]);
@@ -3832,13 +3731,7 @@ int add_data_references(struct reloc_control *rc,
3832 if (key.objectid != extent_key->objectid) 3731 if (key.objectid != extent_key->objectid)
3833 break; 3732 break;
3834 3733
3835#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3836 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3837 key.type == BTRFS_EXTENT_REF_V0_KEY) {
3838#else
3839 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3840 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3734 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3841#endif
3842 ret = __add_tree_block(rc, key.offset, blocksize, 3735 ret = __add_tree_block(rc, key.offset, blocksize,
3843 blocks); 3736 blocks);
3844 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3737 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -3846,6 +3739,10 @@ int add_data_references(struct reloc_control *rc,
3846 struct btrfs_extent_data_ref); 3739 struct btrfs_extent_data_ref);
3847 ret = find_data_references(rc, extent_key, 3740 ret = find_data_references(rc, extent_key,
3848 eb, dref, blocks); 3741 eb, dref, blocks);
3742 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
3743 btrfs_print_v0_err(eb->fs_info);
3744 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3745 ret = -EINVAL;
3849 } else { 3746 } else {
3850 ret = 0; 3747 ret = 0;
3851 } 3748 }
@@ -4084,41 +3981,13 @@ restart:
4084 flags = btrfs_extent_flags(path->nodes[0], ei); 3981 flags = btrfs_extent_flags(path->nodes[0], ei);
4085 ret = check_extent_flags(flags); 3982 ret = check_extent_flags(flags);
4086 BUG_ON(ret); 3983 BUG_ON(ret);
4087 3984 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3985 err = -EINVAL;
3986 btrfs_print_v0_err(trans->fs_info);
3987 btrfs_abort_transaction(trans, err);
3988 break;
4088 } else { 3989 } else {
4089#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4090 u64 ref_owner;
4091 int path_change = 0;
4092
4093 BUG_ON(item_size !=
4094 sizeof(struct btrfs_extent_item_v0));
4095 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
4096 &path_change);
4097 if (ret < 0) {
4098 err = ret;
4099 break;
4100 }
4101 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
4102 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
4103 else
4104 flags = BTRFS_EXTENT_FLAG_DATA;
4105
4106 if (path_change) {
4107 btrfs_release_path(path);
4108
4109 path->search_commit_root = 1;
4110 path->skip_locking = 1;
4111 ret = btrfs_search_slot(NULL, rc->extent_root,
4112 &key, path, 0, 0);
4113 if (ret < 0) {
4114 err = ret;
4115 break;
4116 }
4117 BUG_ON(ret > 0);
4118 }
4119#else
4120 BUG(); 3990 BUG();
4121#endif
4122 } 3991 }
4123 3992
4124 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3993 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
@@ -4169,8 +4038,7 @@ restart:
4169 } 4038 }
4170 } 4039 }
4171 if (trans && progress && err == -ENOSPC) { 4040 if (trans && progress && err == -ENOSPC) {
4172 ret = btrfs_force_chunk_alloc(trans, fs_info, 4041 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
4173 rc->block_group->flags);
4174 if (ret == 1) { 4042 if (ret == 1) {
4175 err = 0; 4043 err = 0;
4176 progress = 0; 4044 progress = 0;
@@ -4284,7 +4152,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
4284 key.type = BTRFS_INODE_ITEM_KEY; 4152 key.type = BTRFS_INODE_ITEM_KEY;
4285 key.offset = 0; 4153 key.offset = 0;
4286 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4154 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4287 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 4155 BUG_ON(IS_ERR(inode));
4288 BTRFS_I(inode)->index_cnt = group->key.objectid; 4156 BTRFS_I(inode)->index_cnt = group->key.objectid;
4289 4157
4290 err = btrfs_orphan_add(trans, BTRFS_I(inode)); 4158 err = btrfs_orphan_add(trans, BTRFS_I(inode));
@@ -4375,7 +4243,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
4375 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 4243 rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
4376 BUG_ON(!rc->block_group); 4244 BUG_ON(!rc->block_group);
4377 4245
4378 ret = btrfs_inc_block_group_ro(fs_info, rc->block_group); 4246 ret = btrfs_inc_block_group_ro(rc->block_group);
4379 if (ret) { 4247 if (ret) {
4380 err = ret; 4248 err = ret;
4381 goto out; 4249 goto out;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index c451285976ac..65bda0682928 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -320,9 +320,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
320 320
321/* drop the root item for 'key' from the tree root */ 321/* drop the root item for 'key' from the tree root */
322int btrfs_del_root(struct btrfs_trans_handle *trans, 322int btrfs_del_root(struct btrfs_trans_handle *trans,
323 struct btrfs_fs_info *fs_info, const struct btrfs_key *key) 323 const struct btrfs_key *key)
324{ 324{
325 struct btrfs_root *root = fs_info->tree_root; 325 struct btrfs_root *root = trans->fs_info->tree_root;
326 struct btrfs_path *path; 326 struct btrfs_path *path;
327 int ret; 327 int ret;
328 328
@@ -341,13 +341,12 @@ out:
341 return ret; 341 return ret;
342} 342}
343 343
344int btrfs_del_root_ref(struct btrfs_trans_handle *trans, 344int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
345 struct btrfs_fs_info *fs_info, 345 u64 ref_id, u64 dirid, u64 *sequence, const char *name,
346 u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, 346 int name_len)
347 const char *name, int name_len)
348 347
349{ 348{
350 struct btrfs_root *tree_root = fs_info->tree_root; 349 struct btrfs_root *tree_root = trans->fs_info->tree_root;
351 struct btrfs_path *path; 350 struct btrfs_path *path;
352 struct btrfs_root_ref *ref; 351 struct btrfs_root_ref *ref;
353 struct extent_buffer *leaf; 352 struct extent_buffer *leaf;
@@ -413,12 +412,11 @@ out:
413 * 412 *
414 * Will return 0, -ENOMEM, or anything from the CoW path 413 * Will return 0, -ENOMEM, or anything from the CoW path
415 */ 414 */
416int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 415int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
417 struct btrfs_fs_info *fs_info, 416 u64 ref_id, u64 dirid, u64 sequence, const char *name,
418 u64 root_id, u64 ref_id, u64 dirid, u64 sequence, 417 int name_len)
419 const char *name, int name_len)
420{ 418{
421 struct btrfs_root *tree_root = fs_info->tree_root; 419 struct btrfs_root *tree_root = trans->fs_info->tree_root;
422 struct btrfs_key key; 420 struct btrfs_key key;
423 int ret; 421 int ret;
424 struct btrfs_path *path; 422 struct btrfs_path *path;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6702896cdb8f..3be1456b5116 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -188,32 +188,6 @@ struct scrub_ctx {
188 refcount_t refs; 188 refcount_t refs;
189}; 189};
190 190
191struct scrub_fixup_nodatasum {
192 struct scrub_ctx *sctx;
193 struct btrfs_device *dev;
194 u64 logical;
195 struct btrfs_root *root;
196 struct btrfs_work work;
197 int mirror_num;
198};
199
200struct scrub_nocow_inode {
201 u64 inum;
202 u64 offset;
203 u64 root;
204 struct list_head list;
205};
206
207struct scrub_copy_nocow_ctx {
208 struct scrub_ctx *sctx;
209 u64 logical;
210 u64 len;
211 int mirror_num;
212 u64 physical_for_dev_replace;
213 struct list_head inodes;
214 struct btrfs_work work;
215};
216
217struct scrub_warning { 191struct scrub_warning {
218 struct btrfs_path *path; 192 struct btrfs_path *path;
219 u64 extent_item_size; 193 u64 extent_item_size;
@@ -232,8 +206,6 @@ struct full_stripe_lock {
232 206
233static void scrub_pending_bio_inc(struct scrub_ctx *sctx); 207static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
234static void scrub_pending_bio_dec(struct scrub_ctx *sctx); 208static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
235static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
236static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
237static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); 209static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
238static int scrub_setup_recheck_block(struct scrub_block *original_sblock, 210static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
239 struct scrub_block *sblocks_for_recheck); 211 struct scrub_block *sblocks_for_recheck);
@@ -277,13 +249,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
277static void scrub_wr_submit(struct scrub_ctx *sctx); 249static void scrub_wr_submit(struct scrub_ctx *sctx);
278static void scrub_wr_bio_end_io(struct bio *bio); 250static void scrub_wr_bio_end_io(struct bio *bio);
279static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); 251static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
280static int write_page_nocow(struct scrub_ctx *sctx,
281 u64 physical_for_dev_replace, struct page *page);
282static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
283 struct scrub_copy_nocow_ctx *ctx);
284static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
285 int mirror_num, u64 physical_for_dev_replace);
286static void copy_nocow_pages_worker(struct btrfs_work *work);
287static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 252static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
288static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 253static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
289static void scrub_put_ctx(struct scrub_ctx *sctx); 254static void scrub_put_ctx(struct scrub_ctx *sctx);
@@ -555,60 +520,6 @@ out:
555 return ret; 520 return ret;
556} 521}
557 522
558/*
559 * used for workers that require transaction commits (i.e., for the
560 * NOCOW case)
561 */
562static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
563{
564 struct btrfs_fs_info *fs_info = sctx->fs_info;
565
566 refcount_inc(&sctx->refs);
567 /*
568 * increment scrubs_running to prevent cancel requests from
569 * completing as long as a worker is running. we must also
570 * increment scrubs_paused to prevent deadlocking on pause
571 * requests used for transactions commits (as the worker uses a
572 * transaction context). it is safe to regard the worker
573 * as paused for all matters practical. effectively, we only
574 * avoid cancellation requests from completing.
575 */
576 mutex_lock(&fs_info->scrub_lock);
577 atomic_inc(&fs_info->scrubs_running);
578 atomic_inc(&fs_info->scrubs_paused);
579 mutex_unlock(&fs_info->scrub_lock);
580
581 /*
582 * check if @scrubs_running=@scrubs_paused condition
583 * inside wait_event() is not an atomic operation.
584 * which means we may inc/dec @scrub_running/paused
585 * at any time. Let's wake up @scrub_pause_wait as
586 * much as we can to let commit transaction blocked less.
587 */
588 wake_up(&fs_info->scrub_pause_wait);
589
590 atomic_inc(&sctx->workers_pending);
591}
592
593/* used for workers that require transaction commits */
594static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
595{
596 struct btrfs_fs_info *fs_info = sctx->fs_info;
597
598 /*
599 * see scrub_pending_trans_workers_inc() why we're pretending
600 * to be paused in the scrub counters
601 */
602 mutex_lock(&fs_info->scrub_lock);
603 atomic_dec(&fs_info->scrubs_running);
604 atomic_dec(&fs_info->scrubs_paused);
605 mutex_unlock(&fs_info->scrub_lock);
606 atomic_dec(&sctx->workers_pending);
607 wake_up(&fs_info->scrub_pause_wait);
608 wake_up(&sctx->list_wait);
609 scrub_put_ctx(sctx);
610}
611
612static void scrub_free_csums(struct scrub_ctx *sctx) 523static void scrub_free_csums(struct scrub_ctx *sctx)
613{ 524{
614 while (!list_empty(&sctx->csum_list)) { 525 while (!list_empty(&sctx->csum_list)) {
@@ -882,194 +793,6 @@ out:
882 btrfs_free_path(path); 793 btrfs_free_path(path);
883} 794}
884 795
885static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
886{
887 struct page *page = NULL;
888 unsigned long index;
889 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
890 int ret;
891 int corrected = 0;
892 struct btrfs_key key;
893 struct inode *inode = NULL;
894 struct btrfs_fs_info *fs_info;
895 u64 end = offset + PAGE_SIZE - 1;
896 struct btrfs_root *local_root;
897 int srcu_index;
898
899 key.objectid = root;
900 key.type = BTRFS_ROOT_ITEM_KEY;
901 key.offset = (u64)-1;
902
903 fs_info = fixup->root->fs_info;
904 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
905
906 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
907 if (IS_ERR(local_root)) {
908 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
909 return PTR_ERR(local_root);
910 }
911
912 key.type = BTRFS_INODE_ITEM_KEY;
913 key.objectid = inum;
914 key.offset = 0;
915 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
916 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
917 if (IS_ERR(inode))
918 return PTR_ERR(inode);
919
920 index = offset >> PAGE_SHIFT;
921
922 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
923 if (!page) {
924 ret = -ENOMEM;
925 goto out;
926 }
927
928 if (PageUptodate(page)) {
929 if (PageDirty(page)) {
930 /*
931 * we need to write the data to the defect sector. the
932 * data that was in that sector is not in memory,
933 * because the page was modified. we must not write the
934 * modified page to that sector.
935 *
936 * TODO: what could be done here: wait for the delalloc
937 * runner to write out that page (might involve
938 * COW) and see whether the sector is still
939 * referenced afterwards.
940 *
941 * For the meantime, we'll treat this error
942 * incorrectable, although there is a chance that a
943 * later scrub will find the bad sector again and that
944 * there's no dirty page in memory, then.
945 */
946 ret = -EIO;
947 goto out;
948 }
949 ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
950 fixup->logical, page,
951 offset - page_offset(page),
952 fixup->mirror_num);
953 unlock_page(page);
954 corrected = !ret;
955 } else {
956 /*
957 * we need to get good data first. the general readpage path
958 * will call repair_io_failure for us, we just have to make
959 * sure we read the bad mirror.
960 */
961 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
962 EXTENT_DAMAGED);
963 if (ret) {
964 /* set_extent_bits should give proper error */
965 WARN_ON(ret > 0);
966 if (ret > 0)
967 ret = -EFAULT;
968 goto out;
969 }
970
971 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
972 btrfs_get_extent,
973 fixup->mirror_num);
974 wait_on_page_locked(page);
975
976 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
977 end, EXTENT_DAMAGED, 0, NULL);
978 if (!corrected)
979 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
980 EXTENT_DAMAGED);
981 }
982
983out:
984 if (page)
985 put_page(page);
986
987 iput(inode);
988
989 if (ret < 0)
990 return ret;
991
992 if (ret == 0 && corrected) {
993 /*
994 * we only need to call readpage for one of the inodes belonging
995 * to this extent. so make iterate_extent_inodes stop
996 */
997 return 1;
998 }
999
1000 return -EIO;
1001}
1002
1003static void scrub_fixup_nodatasum(struct btrfs_work *work)
1004{
1005 struct btrfs_fs_info *fs_info;
1006 int ret;
1007 struct scrub_fixup_nodatasum *fixup;
1008 struct scrub_ctx *sctx;
1009 struct btrfs_trans_handle *trans = NULL;
1010 struct btrfs_path *path;
1011 int uncorrectable = 0;
1012
1013 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
1014 sctx = fixup->sctx;
1015 fs_info = fixup->root->fs_info;
1016
1017 path = btrfs_alloc_path();
1018 if (!path) {
1019 spin_lock(&sctx->stat_lock);
1020 ++sctx->stat.malloc_errors;
1021 spin_unlock(&sctx->stat_lock);
1022 uncorrectable = 1;
1023 goto out;
1024 }
1025
1026 trans = btrfs_join_transaction(fixup->root);
1027 if (IS_ERR(trans)) {
1028 uncorrectable = 1;
1029 goto out;
1030 }
1031
1032 /*
1033 * the idea is to trigger a regular read through the standard path. we
1034 * read a page from the (failed) logical address by specifying the
1035 * corresponding copynum of the failed sector. thus, that readpage is
1036 * expected to fail.
1037 * that is the point where on-the-fly error correction will kick in
1038 * (once it's finished) and rewrite the failed sector if a good copy
1039 * can be found.
1040 */
1041 ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
1042 scrub_fixup_readpage, fixup, false);
1043 if (ret < 0) {
1044 uncorrectable = 1;
1045 goto out;
1046 }
1047 WARN_ON(ret != 1);
1048
1049 spin_lock(&sctx->stat_lock);
1050 ++sctx->stat.corrected_errors;
1051 spin_unlock(&sctx->stat_lock);
1052
1053out:
1054 if (trans && !IS_ERR(trans))
1055 btrfs_end_transaction(trans);
1056 if (uncorrectable) {
1057 spin_lock(&sctx->stat_lock);
1058 ++sctx->stat.uncorrectable_errors;
1059 spin_unlock(&sctx->stat_lock);
1060 btrfs_dev_replace_stats_inc(
1061 &fs_info->dev_replace.num_uncorrectable_read_errors);
1062 btrfs_err_rl_in_rcu(fs_info,
1063 "unable to fixup (nodatasum) error at logical %llu on dev %s",
1064 fixup->logical, rcu_str_deref(fixup->dev->name));
1065 }
1066
1067 btrfs_free_path(path);
1068 kfree(fixup);
1069
1070 scrub_pending_trans_workers_dec(sctx);
1071}
1072
1073static inline void scrub_get_recover(struct scrub_recover *recover) 796static inline void scrub_get_recover(struct scrub_recover *recover)
1074{ 797{
1075 refcount_inc(&recover->refs); 798 refcount_inc(&recover->refs);
@@ -1264,42 +987,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
1264 } 987 }
1265 988
1266 /* 989 /*
1267 * NOTE: Even for nodatasum case, it's still possible that it's a
1268 * compressed data extent, thus scrub_fixup_nodatasum(), which write
1269 * inode page cache onto disk, could cause serious data corruption.
1270 *
1271 * So here we could only read from disk, and hope our recovery could
1272 * reach disk before the newer write.
1273 */
1274 if (0 && !is_metadata && !have_csum) {
1275 struct scrub_fixup_nodatasum *fixup_nodatasum;
1276
1277 WARN_ON(sctx->is_dev_replace);
1278
1279 /*
1280 * !is_metadata and !have_csum, this means that the data
1281 * might not be COWed, that it might be modified
1282 * concurrently. The general strategy to work on the
1283 * commit root does not help in the case when COW is not
1284 * used.
1285 */
1286 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
1287 if (!fixup_nodatasum)
1288 goto did_not_correct_error;
1289 fixup_nodatasum->sctx = sctx;
1290 fixup_nodatasum->dev = dev;
1291 fixup_nodatasum->logical = logical;
1292 fixup_nodatasum->root = fs_info->extent_root;
1293 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1294 scrub_pending_trans_workers_inc(sctx);
1295 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1296 scrub_fixup_nodatasum, NULL, NULL);
1297 btrfs_queue_work(fs_info->scrub_workers,
1298 &fixup_nodatasum->work);
1299 goto out;
1300 }
1301
1302 /*
1303 * now build and submit the bios for the other mirrors, check 990 * now build and submit the bios for the other mirrors, check
1304 * checksums. 991 * checksums.
1305 * First try to pick the mirror which is completely without I/O 992 * First try to pick the mirror which is completely without I/O
@@ -1866,7 +1553,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1866 bio = btrfs_io_bio_alloc(1); 1553 bio = btrfs_io_bio_alloc(1);
1867 bio_set_dev(bio, page_bad->dev->bdev); 1554 bio_set_dev(bio, page_bad->dev->bdev);
1868 bio->bi_iter.bi_sector = page_bad->physical >> 9; 1555 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1869 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1556 bio->bi_opf = REQ_OP_WRITE;
1870 1557
1871 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); 1558 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1872 if (PAGE_SIZE != ret) { 1559 if (PAGE_SIZE != ret) {
@@ -1961,7 +1648,7 @@ again:
1961 bio->bi_end_io = scrub_wr_bio_end_io; 1648 bio->bi_end_io = scrub_wr_bio_end_io;
1962 bio_set_dev(bio, sbio->dev->bdev); 1649 bio_set_dev(bio, sbio->dev->bdev);
1963 bio->bi_iter.bi_sector = sbio->physical >> 9; 1650 bio->bi_iter.bi_sector = sbio->physical >> 9;
1964 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1651 bio->bi_opf = REQ_OP_WRITE;
1965 sbio->status = 0; 1652 sbio->status = 0;
1966 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1653 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1967 spage->physical_for_dev_replace || 1654 spage->physical_for_dev_replace ||
@@ -2361,7 +2048,7 @@ again:
2361 bio->bi_end_io = scrub_bio_end_io; 2048 bio->bi_end_io = scrub_bio_end_io;
2362 bio_set_dev(bio, sbio->dev->bdev); 2049 bio_set_dev(bio, sbio->dev->bdev);
2363 bio->bi_iter.bi_sector = sbio->physical >> 9; 2050 bio->bi_iter.bi_sector = sbio->physical >> 9;
2364 bio_set_op_attrs(bio, REQ_OP_READ, 0); 2051 bio->bi_opf = REQ_OP_READ;
2365 sbio->status = 0; 2052 sbio->status = 0;
2366 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 2053 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2367 spage->physical || 2054 spage->physical ||
@@ -2800,17 +2487,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2800 have_csum = scrub_find_csum(sctx, logical, csum); 2487 have_csum = scrub_find_csum(sctx, logical, csum);
2801 if (have_csum == 0) 2488 if (have_csum == 0)
2802 ++sctx->stat.no_csum; 2489 ++sctx->stat.no_csum;
2803 if (0 && sctx->is_dev_replace && !have_csum) {
2804 ret = copy_nocow_pages(sctx, logical, l,
2805 mirror_num,
2806 physical_for_dev_replace);
2807 goto behind_scrub_pages;
2808 }
2809 } 2490 }
2810 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, 2491 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2811 mirror_num, have_csum ? csum : NULL, 0, 2492 mirror_num, have_csum ? csum : NULL, 0,
2812 physical_for_dev_replace); 2493 physical_for_dev_replace);
2813behind_scrub_pages:
2814 if (ret) 2494 if (ret)
2815 return ret; 2495 return ret;
2816 len -= l; 2496 len -= l;
@@ -3863,7 +3543,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3863 * -> btrfs_scrub_pause() 3543 * -> btrfs_scrub_pause()
3864 */ 3544 */
3865 scrub_pause_on(fs_info); 3545 scrub_pause_on(fs_info);
3866 ret = btrfs_inc_block_group_ro(fs_info, cache); 3546 ret = btrfs_inc_block_group_ro(cache);
3867 if (!ret && is_dev_replace) { 3547 if (!ret && is_dev_replace) {
3868 /* 3548 /*
3869 * If we are doing a device replace wait for any tasks 3549 * If we are doing a device replace wait for any tasks
@@ -3982,14 +3662,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3982 if (!cache->removed && !cache->ro && cache->reserved == 0 && 3662 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3983 btrfs_block_group_used(&cache->item) == 0) { 3663 btrfs_block_group_used(&cache->item) == 0) {
3984 spin_unlock(&cache->lock); 3664 spin_unlock(&cache->lock);
3985 spin_lock(&fs_info->unused_bgs_lock); 3665 btrfs_mark_bg_unused(cache);
3986 if (list_empty(&cache->bg_list)) {
3987 btrfs_get_block_group(cache);
3988 trace_btrfs_add_unused_block_group(cache);
3989 list_add_tail(&cache->bg_list,
3990 &fs_info->unused_bgs);
3991 }
3992 spin_unlock(&fs_info->unused_bgs_lock);
3993 } else { 3666 } else {
3994 spin_unlock(&cache->lock); 3667 spin_unlock(&cache->lock);
3995 } 3668 }
@@ -4072,10 +3745,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
4072 if (!fs_info->scrub_wr_completion_workers) 3745 if (!fs_info->scrub_wr_completion_workers)
4073 goto fail_scrub_wr_completion_workers; 3746 goto fail_scrub_wr_completion_workers;
4074 3747
4075 fs_info->scrub_nocow_workers =
4076 btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
4077 if (!fs_info->scrub_nocow_workers)
4078 goto fail_scrub_nocow_workers;
4079 fs_info->scrub_parity_workers = 3748 fs_info->scrub_parity_workers =
4080 btrfs_alloc_workqueue(fs_info, "scrubparity", flags, 3749 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
4081 max_active, 2); 3750 max_active, 2);
@@ -4086,8 +3755,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
4086 return 0; 3755 return 0;
4087 3756
4088fail_scrub_parity_workers: 3757fail_scrub_parity_workers:
4089 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
4090fail_scrub_nocow_workers:
4091 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3758 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
4092fail_scrub_wr_completion_workers: 3759fail_scrub_wr_completion_workers:
4093 btrfs_destroy_workqueue(fs_info->scrub_workers); 3760 btrfs_destroy_workqueue(fs_info->scrub_workers);
@@ -4100,7 +3767,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
4100 if (--fs_info->scrub_workers_refcnt == 0) { 3767 if (--fs_info->scrub_workers_refcnt == 0) {
4101 btrfs_destroy_workqueue(fs_info->scrub_workers); 3768 btrfs_destroy_workqueue(fs_info->scrub_workers);
4102 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); 3769 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
4103 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
4104 btrfs_destroy_workqueue(fs_info->scrub_parity_workers); 3770 btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
4105 } 3771 }
4106 WARN_ON(fs_info->scrub_workers_refcnt < 0); 3772 WARN_ON(fs_info->scrub_workers_refcnt < 0);
@@ -4113,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
4113 struct scrub_ctx *sctx; 3779 struct scrub_ctx *sctx;
4114 int ret; 3780 int ret;
4115 struct btrfs_device *dev; 3781 struct btrfs_device *dev;
4116 struct rcu_string *name;
4117 3782
4118 if (btrfs_fs_closing(fs_info)) 3783 if (btrfs_fs_closing(fs_info))
4119 return -EINVAL; 3784 return -EINVAL;
@@ -4167,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
4167 if (!is_dev_replace && !readonly && 3832 if (!is_dev_replace && !readonly &&
4168 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { 3833 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
4169 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 3834 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4170 rcu_read_lock(); 3835 btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
4171 name = rcu_dereference(dev->name); 3836 rcu_str_deref(dev->name));
4172 btrfs_err(fs_info, "scrub: device %s is not writable",
4173 name->str);
4174 rcu_read_unlock();
4175 return -EROFS; 3837 return -EROFS;
4176 } 3838 }
4177 3839
@@ -4359,330 +4021,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4359 *extent_dev = bbio->stripes[0].dev; 4021 *extent_dev = bbio->stripes[0].dev;
4360 btrfs_put_bbio(bbio); 4022 btrfs_put_bbio(bbio);
4361} 4023}
4362
4363static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
4364 int mirror_num, u64 physical_for_dev_replace)
4365{
4366 struct scrub_copy_nocow_ctx *nocow_ctx;
4367 struct btrfs_fs_info *fs_info = sctx->fs_info;
4368
4369 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
4370 if (!nocow_ctx) {
4371 spin_lock(&sctx->stat_lock);
4372 sctx->stat.malloc_errors++;
4373 spin_unlock(&sctx->stat_lock);
4374 return -ENOMEM;
4375 }
4376
4377 scrub_pending_trans_workers_inc(sctx);
4378
4379 nocow_ctx->sctx = sctx;
4380 nocow_ctx->logical = logical;
4381 nocow_ctx->len = len;
4382 nocow_ctx->mirror_num = mirror_num;
4383 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
4384 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
4385 copy_nocow_pages_worker, NULL, NULL);
4386 INIT_LIST_HEAD(&nocow_ctx->inodes);
4387 btrfs_queue_work(fs_info->scrub_nocow_workers,
4388 &nocow_ctx->work);
4389
4390 return 0;
4391}
4392
4393static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
4394{
4395 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
4396 struct scrub_nocow_inode *nocow_inode;
4397
4398 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
4399 if (!nocow_inode)
4400 return -ENOMEM;
4401 nocow_inode->inum = inum;
4402 nocow_inode->offset = offset;
4403 nocow_inode->root = root;
4404 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
4405 return 0;
4406}
4407
4408#define COPY_COMPLETE 1
4409
4410static void copy_nocow_pages_worker(struct btrfs_work *work)
4411{
4412 struct scrub_copy_nocow_ctx *nocow_ctx =
4413 container_of(work, struct scrub_copy_nocow_ctx, work);
4414 struct scrub_ctx *sctx = nocow_ctx->sctx;
4415 struct btrfs_fs_info *fs_info = sctx->fs_info;
4416 struct btrfs_root *root = fs_info->extent_root;
4417 u64 logical = nocow_ctx->logical;
4418 u64 len = nocow_ctx->len;
4419 int mirror_num = nocow_ctx->mirror_num;
4420 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4421 int ret;
4422 struct btrfs_trans_handle *trans = NULL;
4423 struct btrfs_path *path;
4424 int not_written = 0;
4425
4426 path = btrfs_alloc_path();
4427 if (!path) {
4428 spin_lock(&sctx->stat_lock);
4429 sctx->stat.malloc_errors++;
4430 spin_unlock(&sctx->stat_lock);
4431 not_written = 1;
4432 goto out;
4433 }
4434
4435 trans = btrfs_join_transaction(root);
4436 if (IS_ERR(trans)) {
4437 not_written = 1;
4438 goto out;
4439 }
4440
4441 ret = iterate_inodes_from_logical(logical, fs_info, path,
4442 record_inode_for_nocow, nocow_ctx, false);
4443 if (ret != 0 && ret != -ENOENT) {
4444 btrfs_warn(fs_info,
4445 "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
4446 logical, physical_for_dev_replace, len, mirror_num,
4447 ret);
4448 not_written = 1;
4449 goto out;
4450 }
4451
4452 btrfs_end_transaction(trans);
4453 trans = NULL;
4454 while (!list_empty(&nocow_ctx->inodes)) {
4455 struct scrub_nocow_inode *entry;
4456 entry = list_first_entry(&nocow_ctx->inodes,
4457 struct scrub_nocow_inode,
4458 list);
4459 list_del_init(&entry->list);
4460 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
4461 entry->root, nocow_ctx);
4462 kfree(entry);
4463 if (ret == COPY_COMPLETE) {
4464 ret = 0;
4465 break;
4466 } else if (ret) {
4467 break;
4468 }
4469 }
4470out:
4471 while (!list_empty(&nocow_ctx->inodes)) {
4472 struct scrub_nocow_inode *entry;
4473 entry = list_first_entry(&nocow_ctx->inodes,
4474 struct scrub_nocow_inode,
4475 list);
4476 list_del_init(&entry->list);
4477 kfree(entry);
4478 }
4479 if (trans && !IS_ERR(trans))
4480 btrfs_end_transaction(trans);
4481 if (not_written)
4482 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
4483 num_uncorrectable_read_errors);
4484
4485 btrfs_free_path(path);
4486 kfree(nocow_ctx);
4487
4488 scrub_pending_trans_workers_dec(sctx);
4489}
4490
4491static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
4492 u64 logical)
4493{
4494 struct extent_state *cached_state = NULL;
4495 struct btrfs_ordered_extent *ordered;
4496 struct extent_io_tree *io_tree;
4497 struct extent_map *em;
4498 u64 lockstart = start, lockend = start + len - 1;
4499 int ret = 0;
4500
4501 io_tree = &inode->io_tree;
4502
4503 lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
4504 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4505 if (ordered) {
4506 btrfs_put_ordered_extent(ordered);
4507 ret = 1;
4508 goto out_unlock;
4509 }
4510
4511 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4512 if (IS_ERR(em)) {
4513 ret = PTR_ERR(em);
4514 goto out_unlock;
4515 }
4516
4517 /*
4518 * This extent does not actually cover the logical extent anymore,
4519 * move on to the next inode.
4520 */
4521 if (em->block_start > logical ||
4522 em->block_start + em->block_len < logical + len ||
4523 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4524 free_extent_map(em);
4525 ret = 1;
4526 goto out_unlock;
4527 }
4528 free_extent_map(em);
4529
4530out_unlock:
4531 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
4532 return ret;
4533}
4534
4535static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
4536 struct scrub_copy_nocow_ctx *nocow_ctx)
4537{
4538 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
4539 struct btrfs_key key;
4540 struct inode *inode;
4541 struct page *page;
4542 struct btrfs_root *local_root;
4543 struct extent_io_tree *io_tree;
4544 u64 physical_for_dev_replace;
4545 u64 nocow_ctx_logical;
4546 u64 len = nocow_ctx->len;
4547 unsigned long index;
4548 int srcu_index;
4549 int ret = 0;
4550 int err = 0;
4551
4552 key.objectid = root;
4553 key.type = BTRFS_ROOT_ITEM_KEY;
4554 key.offset = (u64)-1;
4555
4556 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
4557
4558 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
4559 if (IS_ERR(local_root)) {
4560 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4561 return PTR_ERR(local_root);
4562 }
4563
4564 key.type = BTRFS_INODE_ITEM_KEY;
4565 key.objectid = inum;
4566 key.offset = 0;
4567 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
4568 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4569 if (IS_ERR(inode))
4570 return PTR_ERR(inode);
4571
4572 /* Avoid truncate/dio/punch hole.. */
4573 inode_lock(inode);
4574 inode_dio_wait(inode);
4575
4576 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4577 io_tree = &BTRFS_I(inode)->io_tree;
4578 nocow_ctx_logical = nocow_ctx->logical;
4579
4580 ret = check_extent_to_block(BTRFS_I(inode), offset, len,
4581 nocow_ctx_logical);
4582 if (ret) {
4583 ret = ret > 0 ? 0 : ret;
4584 goto out;
4585 }
4586
4587 while (len >= PAGE_SIZE) {
4588 index = offset >> PAGE_SHIFT;
4589again:
4590 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4591 if (!page) {
4592 btrfs_err(fs_info, "find_or_create_page() failed");
4593 ret = -ENOMEM;
4594 goto out;
4595 }
4596
4597 if (PageUptodate(page)) {
4598 if (PageDirty(page))
4599 goto next_page;
4600 } else {
4601 ClearPageError(page);
4602 err = extent_read_full_page(io_tree, page,
4603 btrfs_get_extent,
4604 nocow_ctx->mirror_num);
4605 if (err) {
4606 ret = err;
4607 goto next_page;
4608 }
4609
4610 lock_page(page);
4611 /*
4612 * If the page has been remove from the page cache,
4613 * the data on it is meaningless, because it may be
4614 * old one, the new data may be written into the new
4615 * page in the page cache.
4616 */
4617 if (page->mapping != inode->i_mapping) {
4618 unlock_page(page);
4619 put_page(page);
4620 goto again;
4621 }
4622 if (!PageUptodate(page)) {
4623 ret = -EIO;
4624 goto next_page;
4625 }
4626 }
4627
4628 ret = check_extent_to_block(BTRFS_I(inode), offset, len,
4629 nocow_ctx_logical);
4630 if (ret) {
4631 ret = ret > 0 ? 0 : ret;
4632 goto next_page;
4633 }
4634
4635 err = write_page_nocow(nocow_ctx->sctx,
4636 physical_for_dev_replace, page);
4637 if (err)
4638 ret = err;
4639next_page:
4640 unlock_page(page);
4641 put_page(page);
4642
4643 if (ret)
4644 break;
4645
4646 offset += PAGE_SIZE;
4647 physical_for_dev_replace += PAGE_SIZE;
4648 nocow_ctx_logical += PAGE_SIZE;
4649 len -= PAGE_SIZE;
4650 }
4651 ret = COPY_COMPLETE;
4652out:
4653 inode_unlock(inode);
4654 iput(inode);
4655 return ret;
4656}
4657
4658static int write_page_nocow(struct scrub_ctx *sctx,
4659 u64 physical_for_dev_replace, struct page *page)
4660{
4661 struct bio *bio;
4662 struct btrfs_device *dev;
4663
4664 dev = sctx->wr_tgtdev;
4665 if (!dev)
4666 return -EIO;
4667 if (!dev->bdev) {
4668 btrfs_warn_rl(dev->fs_info,
4669 "scrub write_page_nocow(bdev == NULL) is unexpected");
4670 return -EIO;
4671 }
4672 bio = btrfs_io_bio_alloc(1);
4673 bio->bi_iter.bi_size = 0;
4674 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
4675 bio_set_dev(bio, dev->bdev);
4676 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
4677 /* bio_add_page won't fail on a freshly allocated bio */
4678 bio_add_page(bio, page, PAGE_SIZE, 0);
4679
4680 if (btrfsic_submit_bio_wait(bio)) {
4681 bio_put(bio);
4682 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
4683 return -EIO;
4684 }
4685
4686 bio_put(bio);
4687 return 0;
4688}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c47f62b19226..ba8950bfd9c7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
100 u64 cur_inode_rdev; 100 u64 cur_inode_rdev;
101 u64 cur_inode_last_extent; 101 u64 cur_inode_last_extent;
102 u64 cur_inode_next_write_offset; 102 u64 cur_inode_next_write_offset;
103 bool ignore_cur_inode;
103 104
104 u64 send_progress; 105 u64 send_progress;
105 106
@@ -1500,7 +1501,7 @@ static int read_symlink(struct btrfs_root *root,
1500 BUG_ON(compression); 1501 BUG_ON(compression);
1501 1502
1502 off = btrfs_file_extent_inline_start(ei); 1503 off = btrfs_file_extent_inline_start(ei);
1503 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1504 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
1504 1505
1505 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1506 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1506 1507
@@ -5006,6 +5007,15 @@ static int send_hole(struct send_ctx *sctx, u64 end)
5006 u64 len; 5007 u64 len;
5007 int ret = 0; 5008 int ret = 0;
5008 5009
5010 /*
5011 * A hole that starts at EOF or beyond it. Since we do not yet support
5012 * fallocate (for extent preallocation and hole punching), sending a
5013 * write of zeroes starting at EOF or beyond would later require issuing
5014 * a truncate operation which would undo the write and achieve nothing.
5015 */
5016 if (offset >= sctx->cur_inode_size)
5017 return 0;
5018
5009 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 5019 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5010 return send_update_extent(sctx, offset, end - offset); 5020 return send_update_extent(sctx, offset, end - offset);
5011 5021
@@ -5160,7 +5170,7 @@ static int clone_range(struct send_ctx *sctx,
5160 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 5170 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5161 type = btrfs_file_extent_type(leaf, ei); 5171 type = btrfs_file_extent_type(leaf, ei);
5162 if (type == BTRFS_FILE_EXTENT_INLINE) { 5172 if (type == BTRFS_FILE_EXTENT_INLINE) {
5163 ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); 5173 ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
5164 ext_len = PAGE_ALIGN(ext_len); 5174 ext_len = PAGE_ALIGN(ext_len);
5165 } else { 5175 } else {
5166 ext_len = btrfs_file_extent_num_bytes(leaf, ei); 5176 ext_len = btrfs_file_extent_num_bytes(leaf, ei);
@@ -5236,8 +5246,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
5236 struct btrfs_file_extent_item); 5246 struct btrfs_file_extent_item);
5237 type = btrfs_file_extent_type(path->nodes[0], ei); 5247 type = btrfs_file_extent_type(path->nodes[0], ei);
5238 if (type == BTRFS_FILE_EXTENT_INLINE) { 5248 if (type == BTRFS_FILE_EXTENT_INLINE) {
5239 len = btrfs_file_extent_inline_len(path->nodes[0], 5249 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
5240 path->slots[0], ei);
5241 /* 5250 /*
5242 * it is possible the inline item won't cover the whole page, 5251 * it is possible the inline item won't cover the whole page,
5243 * but there may be items after this page. Make 5252 * but there may be items after this page. Make
@@ -5375,7 +5384,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
5375 } 5384 }
5376 5385
5377 if (right_type == BTRFS_FILE_EXTENT_INLINE) { 5386 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5378 right_len = btrfs_file_extent_inline_len(eb, slot, ei); 5387 right_len = btrfs_file_extent_ram_bytes(eb, ei);
5379 right_len = PAGE_ALIGN(right_len); 5388 right_len = PAGE_ALIGN(right_len);
5380 } else { 5389 } else {
5381 right_len = btrfs_file_extent_num_bytes(eb, ei); 5390 right_len = btrfs_file_extent_num_bytes(eb, ei);
@@ -5496,8 +5505,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
5496 struct btrfs_file_extent_item); 5505 struct btrfs_file_extent_item);
5497 type = btrfs_file_extent_type(path->nodes[0], fi); 5506 type = btrfs_file_extent_type(path->nodes[0], fi);
5498 if (type == BTRFS_FILE_EXTENT_INLINE) { 5507 if (type == BTRFS_FILE_EXTENT_INLINE) {
5499 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5508 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5500 path->slots[0], fi);
5501 extent_end = ALIGN(key.offset + size, 5509 extent_end = ALIGN(key.offset + size,
5502 sctx->send_root->fs_info->sectorsize); 5510 sctx->send_root->fs_info->sectorsize);
5503 } else { 5511 } else {
@@ -5560,7 +5568,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
5560 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 5568 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5561 if (btrfs_file_extent_type(leaf, fi) == 5569 if (btrfs_file_extent_type(leaf, fi) ==
5562 BTRFS_FILE_EXTENT_INLINE) { 5570 BTRFS_FILE_EXTENT_INLINE) {
5563 u64 size = btrfs_file_extent_inline_len(leaf, slot, fi); 5571 u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
5564 5572
5565 extent_end = ALIGN(key.offset + size, 5573 extent_end = ALIGN(key.offset + size,
5566 root->fs_info->sectorsize); 5574 root->fs_info->sectorsize);
@@ -5606,8 +5614,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5606 struct btrfs_file_extent_item); 5614 struct btrfs_file_extent_item);
5607 type = btrfs_file_extent_type(path->nodes[0], fi); 5615 type = btrfs_file_extent_type(path->nodes[0], fi);
5608 if (type == BTRFS_FILE_EXTENT_INLINE) { 5616 if (type == BTRFS_FILE_EXTENT_INLINE) {
5609 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5617 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5610 path->slots[0], fi);
5611 extent_end = ALIGN(key->offset + size, 5618 extent_end = ALIGN(key->offset + size,
5612 sctx->send_root->fs_info->sectorsize); 5619 sctx->send_root->fs_info->sectorsize);
5613 } else { 5620 } else {
@@ -5799,6 +5806,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5799 int pending_move = 0; 5806 int pending_move = 0;
5800 int refs_processed = 0; 5807 int refs_processed = 0;
5801 5808
5809 if (sctx->ignore_cur_inode)
5810 return 0;
5811
5802 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5812 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5803 &refs_processed); 5813 &refs_processed);
5804 if (ret < 0) 5814 if (ret < 0)
@@ -5917,6 +5927,93 @@ out:
5917 return ret; 5927 return ret;
5918} 5928}
5919 5929
5930struct parent_paths_ctx {
5931 struct list_head *refs;
5932 struct send_ctx *sctx;
5933};
5934
5935static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
5936 void *ctx)
5937{
5938 struct parent_paths_ctx *ppctx = ctx;
5939
5940 return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
5941 ppctx->refs);
5942}
5943
5944/*
5945 * Issue unlink operations for all paths of the current inode found in the
5946 * parent snapshot.
5947 */
5948static int btrfs_unlink_all_paths(struct send_ctx *sctx)
5949{
5950 LIST_HEAD(deleted_refs);
5951 struct btrfs_path *path;
5952 struct btrfs_key key;
5953 struct parent_paths_ctx ctx;
5954 int ret;
5955
5956 path = alloc_path_for_send();
5957 if (!path)
5958 return -ENOMEM;
5959
5960 key.objectid = sctx->cur_ino;
5961 key.type = BTRFS_INODE_REF_KEY;
5962 key.offset = 0;
5963 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
5964 if (ret < 0)
5965 goto out;
5966
5967 ctx.refs = &deleted_refs;
5968 ctx.sctx = sctx;
5969
5970 while (true) {
5971 struct extent_buffer *eb = path->nodes[0];
5972 int slot = path->slots[0];
5973
5974 if (slot >= btrfs_header_nritems(eb)) {
5975 ret = btrfs_next_leaf(sctx->parent_root, path);
5976 if (ret < 0)
5977 goto out;
5978 else if (ret > 0)
5979 break;
5980 continue;
5981 }
5982
5983 btrfs_item_key_to_cpu(eb, &key, slot);
5984 if (key.objectid != sctx->cur_ino)
5985 break;
5986 if (key.type != BTRFS_INODE_REF_KEY &&
5987 key.type != BTRFS_INODE_EXTREF_KEY)
5988 break;
5989
5990 ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
5991 record_parent_ref, &ctx);
5992 if (ret < 0)
5993 goto out;
5994
5995 path->slots[0]++;
5996 }
5997
5998 while (!list_empty(&deleted_refs)) {
5999 struct recorded_ref *ref;
6000
6001 ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
6002 ret = send_unlink(sctx, ref->full_path);
6003 if (ret < 0)
6004 goto out;
6005 fs_path_free(ref->full_path);
6006 list_del(&ref->list);
6007 kfree(ref);
6008 }
6009 ret = 0;
6010out:
6011 btrfs_free_path(path);
6012 if (ret)
6013 __free_recorded_refs(&deleted_refs);
6014 return ret;
6015}
6016
5920static int changed_inode(struct send_ctx *sctx, 6017static int changed_inode(struct send_ctx *sctx,
5921 enum btrfs_compare_tree_result result) 6018 enum btrfs_compare_tree_result result)
5922{ 6019{
@@ -5931,6 +6028,7 @@ static int changed_inode(struct send_ctx *sctx,
5931 sctx->cur_inode_new_gen = 0; 6028 sctx->cur_inode_new_gen = 0;
5932 sctx->cur_inode_last_extent = (u64)-1; 6029 sctx->cur_inode_last_extent = (u64)-1;
5933 sctx->cur_inode_next_write_offset = 0; 6030 sctx->cur_inode_next_write_offset = 0;
6031 sctx->ignore_cur_inode = false;
5934 6032
5935 /* 6033 /*
5936 * Set send_progress to current inode. This will tell all get_cur_xxx 6034 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5971,6 +6069,33 @@ static int changed_inode(struct send_ctx *sctx,
5971 sctx->cur_inode_new_gen = 1; 6069 sctx->cur_inode_new_gen = 1;
5972 } 6070 }
5973 6071
6072 /*
6073 * Normally we do not find inodes with a link count of zero (orphans)
6074 * because the most common case is to create a snapshot and use it
6075 * for a send operation. However other less common use cases involve
6076 * using a subvolume and send it after turning it to RO mode just
6077 * after deleting all hard links of a file while holding an open
6078 * file descriptor against it or turning a RO snapshot into RW mode,
6079 * keep an open file descriptor against a file, delete it and then
6080 * turn the snapshot back to RO mode before using it for a send
6081 * operation. So if we find such cases, ignore the inode and all its
6082 * items completely if it's a new inode, or if it's a changed inode
6083 * make sure all its previous paths (from the parent snapshot) are all
6084 * unlinked and all other the inode items are ignored.
6085 */
6086 if (result == BTRFS_COMPARE_TREE_NEW ||
6087 result == BTRFS_COMPARE_TREE_CHANGED) {
6088 u32 nlinks;
6089
6090 nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
6091 if (nlinks == 0) {
6092 sctx->ignore_cur_inode = true;
6093 if (result == BTRFS_COMPARE_TREE_CHANGED)
6094 ret = btrfs_unlink_all_paths(sctx);
6095 goto out;
6096 }
6097 }
6098
5974 if (result == BTRFS_COMPARE_TREE_NEW) { 6099 if (result == BTRFS_COMPARE_TREE_NEW) {
5975 sctx->cur_inode_gen = left_gen; 6100 sctx->cur_inode_gen = left_gen;
5976 sctx->cur_inode_new = 1; 6101 sctx->cur_inode_new = 1;
@@ -6309,15 +6434,17 @@ static int changed_cb(struct btrfs_path *left_path,
6309 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 6434 key->objectid == BTRFS_FREE_SPACE_OBJECTID)
6310 goto out; 6435 goto out;
6311 6436
6312 if (key->type == BTRFS_INODE_ITEM_KEY) 6437 if (key->type == BTRFS_INODE_ITEM_KEY) {
6313 ret = changed_inode(sctx, result); 6438 ret = changed_inode(sctx, result);
6314 else if (key->type == BTRFS_INODE_REF_KEY || 6439 } else if (!sctx->ignore_cur_inode) {
6315 key->type == BTRFS_INODE_EXTREF_KEY) 6440 if (key->type == BTRFS_INODE_REF_KEY ||
6316 ret = changed_ref(sctx, result); 6441 key->type == BTRFS_INODE_EXTREF_KEY)
6317 else if (key->type == BTRFS_XATTR_ITEM_KEY) 6442 ret = changed_ref(sctx, result);
6318 ret = changed_xattr(sctx, result); 6443 else if (key->type == BTRFS_XATTR_ITEM_KEY)
6319 else if (key->type == BTRFS_EXTENT_DATA_KEY) 6444 ret = changed_xattr(sctx, result);
6320 ret = changed_extent(sctx, result); 6445 else if (key->type == BTRFS_EXTENT_DATA_KEY)
6446 ret = changed_extent(sctx, result);
6447 }
6321 6448
6322out: 6449out:
6323 return ret; 6450 return ret;
@@ -6328,7 +6455,6 @@ static int full_send_tree(struct send_ctx *sctx)
6328 int ret; 6455 int ret;
6329 struct btrfs_root *send_root = sctx->send_root; 6456 struct btrfs_root *send_root = sctx->send_root;
6330 struct btrfs_key key; 6457 struct btrfs_key key;
6331 struct btrfs_key found_key;
6332 struct btrfs_path *path; 6458 struct btrfs_path *path;
6333 struct extent_buffer *eb; 6459 struct extent_buffer *eb;
6334 int slot; 6460 int slot;
@@ -6350,17 +6476,13 @@ static int full_send_tree(struct send_ctx *sctx)
6350 while (1) { 6476 while (1) {
6351 eb = path->nodes[0]; 6477 eb = path->nodes[0];
6352 slot = path->slots[0]; 6478 slot = path->slots[0];
6353 btrfs_item_key_to_cpu(eb, &found_key, slot); 6479 btrfs_item_key_to_cpu(eb, &key, slot);
6354 6480
6355 ret = changed_cb(path, NULL, &found_key, 6481 ret = changed_cb(path, NULL, &key,
6356 BTRFS_COMPARE_TREE_NEW, sctx); 6482 BTRFS_COMPARE_TREE_NEW, sctx);
6357 if (ret < 0) 6483 if (ret < 0)
6358 goto out; 6484 goto out;
6359 6485
6360 key.objectid = found_key.objectid;
6361 key.type = found_key.type;
6362 key.offset = found_key.offset + 1;
6363
6364 ret = btrfs_next_item(send_root, path); 6486 ret = btrfs_next_item(send_root, path);
6365 if (ret < 0) 6487 if (ret < 0)
6366 goto out; 6488 goto out;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b7b4acb12833..4c13b737f568 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 2007 Oracle. All rights reserved. 3 * Copyright (C) 2007 Oracle. All rights reserved.
4 */ 4 */
5 5
6#include <linux/highmem.h>
7#include <asm/unaligned.h> 6#include <asm/unaligned.h>
8 7
9#include "ctree.h" 8#include "ctree.h"
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..6601c9aa5e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -5,7 +5,6 @@
5 5
6#include <linux/blkdev.h> 6#include <linux/blkdev.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/buffer_head.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
10#include <linux/pagemap.h> 9#include <linux/pagemap.h>
11#include <linux/highmem.h> 10#include <linux/highmem.h>
@@ -15,8 +14,6 @@
15#include <linux/string.h> 14#include <linux/string.h>
16#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
17#include <linux/mount.h> 16#include <linux/mount.h>
18#include <linux/mpage.h>
19#include <linux/swap.h>
20#include <linux/writeback.h> 17#include <linux/writeback.h>
21#include <linux/statfs.h> 18#include <linux/statfs.h>
22#include <linux/compat.h> 19#include <linux/compat.h>
@@ -468,9 +465,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
468 case Opt_subvolrootid: 465 case Opt_subvolrootid:
469 case Opt_device: 466 case Opt_device:
470 /* 467 /*
471 * These are parsed by btrfs_parse_subvol_options 468 * These are parsed by btrfs_parse_subvol_options or
472 * and btrfs_parse_early_options 469 * btrfs_parse_device_options and can be ignored here.
473 * and can be happily ignored here.
474 */ 470 */
475 break; 471 break;
476 case Opt_nodatasum: 472 case Opt_nodatasum:
@@ -760,6 +756,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
760 case Opt_recovery: 756 case Opt_recovery:
761 btrfs_warn(info, 757 btrfs_warn(info,
762 "'recovery' is deprecated, use 'usebackuproot' instead"); 758 "'recovery' is deprecated, use 'usebackuproot' instead");
759 /* fall through */
763 case Opt_usebackuproot: 760 case Opt_usebackuproot:
764 btrfs_info(info, 761 btrfs_info(info,
765 "trying to use backup root at mount time"); 762 "trying to use backup root at mount time");
@@ -885,13 +882,16 @@ out:
885 * All other options will be parsed on much later in the mount process and 882 * All other options will be parsed on much later in the mount process and
886 * only when we need to allocate a new super block. 883 * only when we need to allocate a new super block.
887 */ 884 */
888static int btrfs_parse_early_options(const char *options, fmode_t flags, 885static int btrfs_parse_device_options(const char *options, fmode_t flags,
889 void *holder, struct btrfs_fs_devices **fs_devices) 886 void *holder)
890{ 887{
891 substring_t args[MAX_OPT_ARGS]; 888 substring_t args[MAX_OPT_ARGS];
892 char *device_name, *opts, *orig, *p; 889 char *device_name, *opts, *orig, *p;
890 struct btrfs_device *device = NULL;
893 int error = 0; 891 int error = 0;
894 892
893 lockdep_assert_held(&uuid_mutex);
894
895 if (!options) 895 if (!options)
896 return 0; 896 return 0;
897 897
@@ -917,11 +917,13 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
917 error = -ENOMEM; 917 error = -ENOMEM;
918 goto out; 918 goto out;
919 } 919 }
920 error = btrfs_scan_one_device(device_name, 920 device = btrfs_scan_one_device(device_name, flags,
921 flags, holder, fs_devices); 921 holder);
922 kfree(device_name); 922 kfree(device_name);
923 if (error) 923 if (IS_ERR(device)) {
924 error = PTR_ERR(device);
924 goto out; 925 goto out;
926 }
925 } 927 }
926 } 928 }
927 929
@@ -935,8 +937,8 @@ out:
935 * 937 *
936 * The value is later passed to mount_subvol() 938 * The value is later passed to mount_subvol()
937 */ 939 */
938static int btrfs_parse_subvol_options(const char *options, fmode_t flags, 940static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
939 char **subvol_name, u64 *subvol_objectid) 941 u64 *subvol_objectid)
940{ 942{
941 substring_t args[MAX_OPT_ARGS]; 943 substring_t args[MAX_OPT_ARGS];
942 char *opts, *orig, *p; 944 char *opts, *orig, *p;
@@ -948,7 +950,7 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
948 950
949 /* 951 /*
950 * strsep changes the string, duplicate it because 952 * strsep changes the string, duplicate it because
951 * btrfs_parse_early_options gets called later 953 * btrfs_parse_device_options gets called later
952 */ 954 */
953 opts = kstrdup(options, GFP_KERNEL); 955 opts = kstrdup(options, GFP_KERNEL);
954 if (!opts) 956 if (!opts)
@@ -1517,6 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1517{ 1519{
1518 struct block_device *bdev = NULL; 1520 struct block_device *bdev = NULL;
1519 struct super_block *s; 1521 struct super_block *s;
1522 struct btrfs_device *device = NULL;
1520 struct btrfs_fs_devices *fs_devices = NULL; 1523 struct btrfs_fs_devices *fs_devices = NULL;
1521 struct btrfs_fs_info *fs_info = NULL; 1524 struct btrfs_fs_info *fs_info = NULL;
1522 struct security_mnt_opts new_sec_opts; 1525 struct security_mnt_opts new_sec_opts;
@@ -1526,12 +1529,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1526 if (!(flags & SB_RDONLY)) 1529 if (!(flags & SB_RDONLY))
1527 mode |= FMODE_WRITE; 1530 mode |= FMODE_WRITE;
1528 1531
1529 error = btrfs_parse_early_options(data, mode, fs_type,
1530 &fs_devices);
1531 if (error) {
1532 return ERR_PTR(error);
1533 }
1534
1535 security_init_mnt_opts(&new_sec_opts); 1532 security_init_mnt_opts(&new_sec_opts);
1536 if (data) { 1533 if (data) {
1537 error = parse_security_options(data, &new_sec_opts); 1534 error = parse_security_options(data, &new_sec_opts);
@@ -1539,10 +1536,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1539 return ERR_PTR(error); 1536 return ERR_PTR(error);
1540 } 1537 }
1541 1538
1542 error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
1543 if (error)
1544 goto error_sec_opts;
1545
1546 /* 1539 /*
1547 * Setup a dummy root and fs_info for test/set super. This is because 1540 * Setup a dummy root and fs_info for test/set super. This is because
1548 * we don't actually fill this stuff out until open_ctree, but we need 1541 * we don't actually fill this stuff out until open_ctree, but we need
@@ -1555,8 +1548,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1555 goto error_sec_opts; 1548 goto error_sec_opts;
1556 } 1549 }
1557 1550
1558 fs_info->fs_devices = fs_devices;
1559
1560 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1551 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1561 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1552 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1562 security_init_mnt_opts(&fs_info->security_opts); 1553 security_init_mnt_opts(&fs_info->security_opts);
@@ -1565,7 +1556,25 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1565 goto error_fs_info; 1556 goto error_fs_info;
1566 } 1557 }
1567 1558
1559 mutex_lock(&uuid_mutex);
1560 error = btrfs_parse_device_options(data, mode, fs_type);
1561 if (error) {
1562 mutex_unlock(&uuid_mutex);
1563 goto error_fs_info;
1564 }
1565
1566 device = btrfs_scan_one_device(device_name, mode, fs_type);
1567 if (IS_ERR(device)) {
1568 mutex_unlock(&uuid_mutex);
1569 error = PTR_ERR(device);
1570 goto error_fs_info;
1571 }
1572
1573 fs_devices = device->fs_devices;
1574 fs_info->fs_devices = fs_devices;
1575
1568 error = btrfs_open_devices(fs_devices, mode, fs_type); 1576 error = btrfs_open_devices(fs_devices, mode, fs_type);
1577 mutex_unlock(&uuid_mutex);
1569 if (error) 1578 if (error)
1570 goto error_fs_info; 1579 goto error_fs_info;
1571 1580
@@ -1650,8 +1659,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1650 if (!(flags & SB_RDONLY)) 1659 if (!(flags & SB_RDONLY))
1651 mode |= FMODE_WRITE; 1660 mode |= FMODE_WRITE;
1652 1661
1653 error = btrfs_parse_subvol_options(data, mode, 1662 error = btrfs_parse_subvol_options(data, &subvol_name,
1654 &subvol_name, &subvol_objectid); 1663 &subvol_objectid);
1655 if (error) { 1664 if (error) {
1656 kfree(subvol_name); 1665 kfree(subvol_name);
1657 return ERR_PTR(error); 1666 return ERR_PTR(error);
@@ -2098,14 +2107,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2098 btrfs_account_ro_block_groups_free_space(found); 2107 btrfs_account_ro_block_groups_free_space(found);
2099 2108
2100 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 2109 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2101 if (!list_empty(&found->block_groups[i])) { 2110 if (!list_empty(&found->block_groups[i]))
2102 switch (i) { 2111 factor = btrfs_bg_type_to_factor(
2103 case BTRFS_RAID_DUP: 2112 btrfs_raid_array[i].bg_flag);
2104 case BTRFS_RAID_RAID1:
2105 case BTRFS_RAID_RAID10:
2106 factor = 2;
2107 }
2108 }
2109 } 2113 }
2110 } 2114 }
2111 2115
@@ -2222,7 +2226,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2222 unsigned long arg) 2226 unsigned long arg)
2223{ 2227{
2224 struct btrfs_ioctl_vol_args *vol; 2228 struct btrfs_ioctl_vol_args *vol;
2225 struct btrfs_fs_devices *fs_devices; 2229 struct btrfs_device *device = NULL;
2226 int ret = -ENOTTY; 2230 int ret = -ENOTTY;
2227 2231
2228 if (!capable(CAP_SYS_ADMIN)) 2232 if (!capable(CAP_SYS_ADMIN))
@@ -2234,15 +2238,24 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2234 2238
2235 switch (cmd) { 2239 switch (cmd) {
2236 case BTRFS_IOC_SCAN_DEV: 2240 case BTRFS_IOC_SCAN_DEV:
2237 ret = btrfs_scan_one_device(vol->name, FMODE_READ, 2241 mutex_lock(&uuid_mutex);
2238 &btrfs_root_fs_type, &fs_devices); 2242 device = btrfs_scan_one_device(vol->name, FMODE_READ,
2243 &btrfs_root_fs_type);
2244 ret = PTR_ERR_OR_ZERO(device);
2245 mutex_unlock(&uuid_mutex);
2239 break; 2246 break;
2240 case BTRFS_IOC_DEVICES_READY: 2247 case BTRFS_IOC_DEVICES_READY:
2241 ret = btrfs_scan_one_device(vol->name, FMODE_READ, 2248 mutex_lock(&uuid_mutex);
2242 &btrfs_root_fs_type, &fs_devices); 2249 device = btrfs_scan_one_device(vol->name, FMODE_READ,
2243 if (ret) 2250 &btrfs_root_fs_type);
2251 if (IS_ERR(device)) {
2252 mutex_unlock(&uuid_mutex);
2253 ret = PTR_ERR(device);
2244 break; 2254 break;
2245 ret = !(fs_devices->num_devices == fs_devices->total_devices); 2255 }
2256 ret = !(device->fs_devices->num_devices ==
2257 device->fs_devices->total_devices);
2258 mutex_unlock(&uuid_mutex);
2246 break; 2259 break;
2247 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 2260 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2248 ret = btrfs_ioctl_get_supported_features((void __user*)arg); 2261 ret = btrfs_ioctl_get_supported_features((void __user*)arg);
@@ -2290,7 +2303,6 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2290 struct btrfs_fs_devices *cur_devices; 2303 struct btrfs_fs_devices *cur_devices;
2291 struct btrfs_device *dev, *first_dev = NULL; 2304 struct btrfs_device *dev, *first_dev = NULL;
2292 struct list_head *head; 2305 struct list_head *head;
2293 struct rcu_string *name;
2294 2306
2295 /* 2307 /*
2296 * Lightweight locking of the devices. We should not need 2308 * Lightweight locking of the devices. We should not need
@@ -2314,12 +2326,10 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2314 cur_devices = cur_devices->seed; 2326 cur_devices = cur_devices->seed;
2315 } 2327 }
2316 2328
2317 if (first_dev) { 2329 if (first_dev)
2318 name = rcu_dereference(first_dev->name); 2330 seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
2319 seq_escape(m, name->str, " \t\n\\"); 2331 else
2320 } else {
2321 WARN_ON(1); 2332 WARN_ON(1);
2322 }
2323 rcu_read_unlock(); 2333 rcu_read_unlock();
2324 return 0; 2334 return 0;
2325} 2335}
@@ -2331,7 +2341,6 @@ static const struct super_operations btrfs_super_ops = {
2331 .sync_fs = btrfs_sync_fs, 2341 .sync_fs = btrfs_sync_fs,
2332 .show_options = btrfs_show_options, 2342 .show_options = btrfs_show_options,
2333 .show_devname = btrfs_show_devname, 2343 .show_devname = btrfs_show_devname,
2334 .write_inode = btrfs_write_inode,
2335 .alloc_inode = btrfs_alloc_inode, 2344 .alloc_inode = btrfs_alloc_inode,
2336 .destroy_inode = btrfs_destroy_inode, 2345 .destroy_inode = btrfs_destroy_inode,
2337 .statfs = btrfs_statfs, 2346 .statfs = btrfs_statfs,
@@ -2369,7 +2378,7 @@ static __cold void btrfs_interface_exit(void)
2369 2378
2370static void __init btrfs_print_mod_info(void) 2379static void __init btrfs_print_mod_info(void)
2371{ 2380{
2372 pr_info("Btrfs loaded, crc32c=%s" 2381 static const char options[] = ""
2373#ifdef CONFIG_BTRFS_DEBUG 2382#ifdef CONFIG_BTRFS_DEBUG
2374 ", debug=on" 2383 ", debug=on"
2375#endif 2384#endif
@@ -2382,8 +2391,8 @@ static void __init btrfs_print_mod_info(void)
2382#ifdef CONFIG_BTRFS_FS_REF_VERIFY 2391#ifdef CONFIG_BTRFS_FS_REF_VERIFY
2383 ", ref-verify=on" 2392 ", ref-verify=on"
2384#endif 2393#endif
2385 "\n", 2394 ;
2386 crc32c_impl()); 2395 pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
2387} 2396}
2388 2397
2389static int __init init_btrfs_fs(void) 2398static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4a4e960c7c66..3717c864ba23 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -7,10 +7,8 @@
7#include <linux/slab.h> 7#include <linux/slab.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/completion.h> 9#include <linux/completion.h>
10#include <linux/buffer_head.h>
11#include <linux/kobject.h> 10#include <linux/kobject.h>
12#include <linux/bug.h> 11#include <linux/bug.h>
13#include <linux/genhd.h>
14#include <linux/debugfs.h> 12#include <linux/debugfs.h>
15 13
16#include "ctree.h" 14#include "ctree.h"
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ace94db09d29..412b910b04cc 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -216,7 +216,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
216 btrfs_init_dummy_trans(&trans, fs_info); 216 btrfs_init_dummy_trans(&trans, fs_info);
217 217
218 test_msg("qgroup basic add"); 218 test_msg("qgroup basic add");
219 ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID); 219 ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
220 if (ret) { 220 if (ret) {
221 test_err("couldn't create a qgroup %d", ret); 221 test_err("couldn't create a qgroup %d", ret);
222 return ret; 222 return ret;
@@ -249,8 +249,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
249 return ret; 249 return ret;
250 } 250 }
251 251
252 ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 252 ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
253 nodesize, old_roots, new_roots); 253 new_roots);
254 if (ret) { 254 if (ret) {
255 test_err("couldn't account space for a qgroup %d", ret); 255 test_err("couldn't account space for a qgroup %d", ret);
256 return ret; 256 return ret;
@@ -285,8 +285,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
285 return ret; 285 return ret;
286 } 286 }
287 287
288 ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 288 ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
289 nodesize, old_roots, new_roots); 289 new_roots);
290 if (ret) { 290 if (ret) {
291 test_err("couldn't account space for a qgroup %d", ret); 291 test_err("couldn't account space for a qgroup %d", ret);
292 return -EINVAL; 292 return -EINVAL;
@@ -322,7 +322,7 @@ static int test_multiple_refs(struct btrfs_root *root,
322 * We have BTRFS_FS_TREE_OBJECTID created already from the 322 * We have BTRFS_FS_TREE_OBJECTID created already from the
323 * previous test. 323 * previous test.
324 */ 324 */
325 ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID); 325 ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID);
326 if (ret) { 326 if (ret) {
327 test_err("couldn't create a qgroup %d", ret); 327 test_err("couldn't create a qgroup %d", ret);
328 return ret; 328 return ret;
@@ -350,8 +350,8 @@ static int test_multiple_refs(struct btrfs_root *root,
350 return ret; 350 return ret;
351 } 351 }
352 352
353 ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 353 ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
354 nodesize, old_roots, new_roots); 354 new_roots);
355 if (ret) { 355 if (ret) {
356 test_err("couldn't account space for a qgroup %d", ret); 356 test_err("couldn't account space for a qgroup %d", ret);
357 return ret; 357 return ret;
@@ -385,8 +385,8 @@ static int test_multiple_refs(struct btrfs_root *root,
385 return ret; 385 return ret;
386 } 386 }
387 387
388 ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 388 ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
389 nodesize, old_roots, new_roots); 389 new_roots);
390 if (ret) { 390 if (ret) {
391 test_err("couldn't account space for a qgroup %d", ret); 391 test_err("couldn't account space for a qgroup %d", ret);
392 return ret; 392 return ret;
@@ -426,8 +426,8 @@ static int test_multiple_refs(struct btrfs_root *root,
426 return ret; 426 return ret;
427 } 427 }
428 428
429 ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize, 429 ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
430 nodesize, old_roots, new_roots); 430 new_roots);
431 if (ret) { 431 if (ret) {
432 test_err("couldn't account space for a qgroup %d", ret); 432 test_err("couldn't account space for a qgroup %d", ret);
433 return ret; 433 return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ff5f6c719976..3b84f5015029 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -241,7 +241,7 @@ loop:
241 refcount_set(&cur_trans->use_count, 2); 241 refcount_set(&cur_trans->use_count, 2);
242 atomic_set(&cur_trans->pending_ordered, 0); 242 atomic_set(&cur_trans->pending_ordered, 0);
243 cur_trans->flags = 0; 243 cur_trans->flags = 0;
244 cur_trans->start_time = get_seconds(); 244 cur_trans->start_time = ktime_get_seconds();
245 245
246 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); 246 memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
247 247
@@ -680,7 +680,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
680 680
681 trans = start_transaction(root, 0, TRANS_ATTACH, 681 trans = start_transaction(root, 0, TRANS_ATTACH,
682 BTRFS_RESERVE_NO_FLUSH, true); 682 BTRFS_RESERVE_NO_FLUSH, true);
683 if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) 683 if (trans == ERR_PTR(-ENOENT))
684 btrfs_wait_for_commit(root->fs_info, 0); 684 btrfs_wait_for_commit(root->fs_info, 0);
685 685
686 return trans; 686 return trans;
@@ -1152,7 +1152,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
1152 ret = btrfs_run_dev_replace(trans, fs_info); 1152 ret = btrfs_run_dev_replace(trans, fs_info);
1153 if (ret) 1153 if (ret)
1154 return ret; 1154 return ret;
1155 ret = btrfs_run_qgroups(trans, fs_info); 1155 ret = btrfs_run_qgroups(trans);
1156 if (ret) 1156 if (ret)
1157 return ret; 1157 return ret;
1158 1158
@@ -1355,8 +1355,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
1355 goto out; 1355 goto out;
1356 1356
1357 /* Now qgroup are all updated, we can inherit it to new qgroups */ 1357 /* Now qgroup are all updated, we can inherit it to new qgroups */
1358 ret = btrfs_qgroup_inherit(trans, fs_info, 1358 ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
1359 src->root_key.objectid, dst_objectid,
1360 inherit); 1359 inherit);
1361 if (ret < 0) 1360 if (ret < 0)
1362 goto out; 1361 goto out;
@@ -1574,7 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1574 /* 1573 /*
1575 * insert root back/forward references 1574 * insert root back/forward references
1576 */ 1575 */
1577 ret = btrfs_add_root_ref(trans, fs_info, objectid, 1576 ret = btrfs_add_root_ref(trans, objectid,
1578 parent_root->root_key.objectid, 1577 parent_root->root_key.objectid,
1579 btrfs_ino(BTRFS_I(parent_inode)), index, 1578 btrfs_ino(BTRFS_I(parent_inode)), index,
1580 dentry->d_name.name, dentry->d_name.len); 1579 dentry->d_name.name, dentry->d_name.len);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94439482a0ec..4cbb1b55387d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -48,7 +48,7 @@ struct btrfs_transaction {
48 int aborted; 48 int aborted;
49 struct list_head list; 49 struct list_head list;
50 struct extent_io_tree dirty_pages; 50 struct extent_io_tree dirty_pages;
51 unsigned long start_time; 51 time64_t start_time;
52 wait_queue_head_t writer_wait; 52 wait_queue_head_t writer_wait;
53 wait_queue_head_t commit_wait; 53 wait_queue_head_t commit_wait;
54 wait_queue_head_t pending_wait; 54 wait_queue_head_t pending_wait;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8d40e7dd8c30..db835635372f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -19,6 +19,7 @@
19#include "tree-checker.h" 19#include "tree-checker.h"
20#include "disk-io.h" 20#include "disk-io.h"
21#include "compression.h" 21#include "compression.h"
22#include "volumes.h"
22 23
23/* 24/*
24 * Error message should follow the following format: 25 * Error message should follow the following format:
@@ -353,6 +354,102 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
353 return 0; 354 return 0;
354} 355}
355 356
357__printf(4, 5)
358__cold
359static void block_group_err(const struct btrfs_fs_info *fs_info,
360 const struct extent_buffer *eb, int slot,
361 const char *fmt, ...)
362{
363 struct btrfs_key key;
364 struct va_format vaf;
365 va_list args;
366
367 btrfs_item_key_to_cpu(eb, &key, slot);
368 va_start(args, fmt);
369
370 vaf.fmt = fmt;
371 vaf.va = &args;
372
373 btrfs_crit(fs_info,
374 "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
375 btrfs_header_level(eb) == 0 ? "leaf" : "node",
376 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
377 key.objectid, key.offset, &vaf);
378 va_end(args);
379}
380
381static int check_block_group_item(struct btrfs_fs_info *fs_info,
382 struct extent_buffer *leaf,
383 struct btrfs_key *key, int slot)
384{
385 struct btrfs_block_group_item bgi;
386 u32 item_size = btrfs_item_size_nr(leaf, slot);
387 u64 flags;
388 u64 type;
389
390 /*
391 * Here we don't really care about alignment since extent allocator can
392 * handle it. We care more about the size, as if one block group is
393 * larger than maximum size, it's must be some obvious corruption.
394 */
395 if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
396 block_group_err(fs_info, leaf, slot,
397 "invalid block group size, have %llu expect (0, %llu]",
398 key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
399 return -EUCLEAN;
400 }
401
402 if (item_size != sizeof(bgi)) {
403 block_group_err(fs_info, leaf, slot,
404 "invalid item size, have %u expect %zu",
405 item_size, sizeof(bgi));
406 return -EUCLEAN;
407 }
408
409 read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
410 sizeof(bgi));
411 if (btrfs_block_group_chunk_objectid(&bgi) !=
412 BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
413 block_group_err(fs_info, leaf, slot,
414 "invalid block group chunk objectid, have %llu expect %llu",
415 btrfs_block_group_chunk_objectid(&bgi),
416 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
417 return -EUCLEAN;
418 }
419
420 if (btrfs_block_group_used(&bgi) > key->offset) {
421 block_group_err(fs_info, leaf, slot,
422 "invalid block group used, have %llu expect [0, %llu)",
423 btrfs_block_group_used(&bgi), key->offset);
424 return -EUCLEAN;
425 }
426
427 flags = btrfs_block_group_flags(&bgi);
428 if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
429 block_group_err(fs_info, leaf, slot,
430"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
431 flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
432 hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
433 return -EUCLEAN;
434 }
435
436 type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
437 if (type != BTRFS_BLOCK_GROUP_DATA &&
438 type != BTRFS_BLOCK_GROUP_METADATA &&
439 type != BTRFS_BLOCK_GROUP_SYSTEM &&
440 type != (BTRFS_BLOCK_GROUP_METADATA |
441 BTRFS_BLOCK_GROUP_DATA)) {
442 block_group_err(fs_info, leaf, slot,
443"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
444 type, hweight64(type),
445 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
446 BTRFS_BLOCK_GROUP_SYSTEM,
447 BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
448 return -EUCLEAN;
449 }
450 return 0;
451}
452
356/* 453/*
357 * Common point to switch the item-specific validation. 454 * Common point to switch the item-specific validation.
358 */ 455 */
@@ -374,6 +471,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info,
374 case BTRFS_XATTR_ITEM_KEY: 471 case BTRFS_XATTR_ITEM_KEY:
375 ret = check_dir_item(fs_info, leaf, key, slot); 472 ret = check_dir_item(fs_info, leaf, key, slot);
376 break; 473 break;
474 case BTRFS_BLOCK_GROUP_ITEM_KEY:
475 ret = check_block_group_item(fs_info, leaf, key, slot);
476 break;
377 } 477 }
378 return ret; 478 return ret;
379} 479}
@@ -396,9 +496,22 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
396 * skip this check for relocation trees. 496 * skip this check for relocation trees.
397 */ 497 */
398 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { 498 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
499 u64 owner = btrfs_header_owner(leaf);
399 struct btrfs_root *check_root; 500 struct btrfs_root *check_root;
400 501
401 key.objectid = btrfs_header_owner(leaf); 502 /* These trees must never be empty */
503 if (owner == BTRFS_ROOT_TREE_OBJECTID ||
504 owner == BTRFS_CHUNK_TREE_OBJECTID ||
505 owner == BTRFS_EXTENT_TREE_OBJECTID ||
506 owner == BTRFS_DEV_TREE_OBJECTID ||
507 owner == BTRFS_FS_TREE_OBJECTID ||
508 owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
509 generic_err(fs_info, leaf, 0,
510 "invalid root, root %llu must never be empty",
511 owner);
512 return -EUCLEAN;
513 }
514 key.objectid = owner;
402 key.type = BTRFS_ROOT_ITEM_KEY; 515 key.type = BTRFS_ROOT_ITEM_KEY;
403 key.offset = (u64)-1; 516 key.offset = (u64)-1;
404 517
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8220ec02036..1650dc44a5e3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
545 key.type = BTRFS_INODE_ITEM_KEY; 545 key.type = BTRFS_INODE_ITEM_KEY;
546 key.offset = 0; 546 key.offset = 0;
547 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 547 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
548 if (IS_ERR(inode)) { 548 if (IS_ERR(inode))
549 inode = NULL; 549 inode = NULL;
550 } else if (is_bad_inode(inode)) {
551 iput(inode);
552 inode = NULL;
553 }
554 return inode; 550 return inode;
555} 551}
556 552
@@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
597 if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 593 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
598 nbytes = 0; 594 nbytes = 0;
599 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 595 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
600 size = btrfs_file_extent_inline_len(eb, slot, item); 596 size = btrfs_file_extent_ram_bytes(eb, item);
601 nbytes = btrfs_file_extent_ram_bytes(eb, item); 597 nbytes = btrfs_file_extent_ram_bytes(eb, item);
602 extent_end = ALIGN(start + size, 598 extent_end = ALIGN(start + size,
603 fs_info->sectorsize); 599 fs_info->sectorsize);
@@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
685 * as the owner of the file extent changed from log tree 681 * as the owner of the file extent changed from log tree
686 * (doesn't affect qgroup) to fs/file tree(affects qgroup) 682 * (doesn't affect qgroup) to fs/file tree(affects qgroup)
687 */ 683 */
688 ret = btrfs_qgroup_trace_extent(trans, fs_info, 684 ret = btrfs_qgroup_trace_extent(trans,
689 btrfs_file_extent_disk_bytenr(eb, item), 685 btrfs_file_extent_disk_bytenr(eb, item),
690 btrfs_file_extent_disk_num_bytes(eb, item), 686 btrfs_file_extent_disk_num_bytes(eb, item),
691 GFP_NOFS); 687 GFP_NOFS);
@@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
715 * allocation tree 711 * allocation tree
716 */ 712 */
717 ret = btrfs_alloc_logged_file_extent(trans, 713 ret = btrfs_alloc_logged_file_extent(trans,
718 fs_info,
719 root->root_key.objectid, 714 root->root_key.objectid,
720 key->objectid, offset, &ins); 715 key->objectid, offset, &ins);
721 if (ret) 716 if (ret)
@@ -1291,6 +1286,46 @@ again:
1291 return ret; 1286 return ret;
1292} 1287}
1293 1288
1289static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
1290 const u8 ref_type, const char *name,
1291 const int namelen)
1292{
1293 struct btrfs_key key;
1294 struct btrfs_path *path;
1295 const u64 parent_id = btrfs_ino(BTRFS_I(dir));
1296 int ret;
1297
1298 path = btrfs_alloc_path();
1299 if (!path)
1300 return -ENOMEM;
1301
1302 key.objectid = btrfs_ino(BTRFS_I(inode));
1303 key.type = ref_type;
1304 if (key.type == BTRFS_INODE_REF_KEY)
1305 key.offset = parent_id;
1306 else
1307 key.offset = btrfs_extref_hash(parent_id, name, namelen);
1308
1309 ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
1310 if (ret < 0)
1311 goto out;
1312 if (ret > 0) {
1313 ret = 0;
1314 goto out;
1315 }
1316 if (key.type == BTRFS_INODE_EXTREF_KEY)
1317 ret = btrfs_find_name_in_ext_backref(path->nodes[0],
1318 path->slots[0], parent_id,
1319 name, namelen, NULL);
1320 else
1321 ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
1322 name, namelen, NULL);
1323
1324out:
1325 btrfs_free_path(path);
1326 return ret;
1327}
1328
1294/* 1329/*
1295 * replay one inode back reference item found in the log tree. 1330 * replay one inode back reference item found in the log tree.
1296 * eb, slot and key refer to the buffer and key found in the log tree. 1331 * eb, slot and key refer to the buffer and key found in the log tree.
@@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1400 } 1435 }
1401 } 1436 }
1402 1437
1438 /*
1439 * If a reference item already exists for this inode
1440 * with the same parent and name, but different index,
1441 * drop it and the corresponding directory index entries
1442 * from the parent before adding the new reference item
1443 * and dir index entries, otherwise we would fail with
1444 * -EEXIST returned from btrfs_add_link() below.
1445 */
1446 ret = btrfs_inode_ref_exists(inode, dir, key->type,
1447 name, namelen);
1448 if (ret > 0) {
1449 ret = btrfs_unlink_inode(trans, root,
1450 BTRFS_I(dir),
1451 BTRFS_I(inode),
1452 name, namelen);
1453 /*
1454 * If we dropped the link count to 0, bump it so
1455 * that later the iput() on the inode will not
1456 * free it. We will fixup the link count later.
1457 */
1458 if (!ret && inode->i_nlink == 0)
1459 inc_nlink(inode);
1460 }
1461 if (ret < 0)
1462 goto out;
1463
1403 /* insert our name */ 1464 /* insert our name */
1404 ret = btrfs_add_link(trans, BTRFS_I(dir), 1465 ret = btrfs_add_link(trans, BTRFS_I(dir),
1405 BTRFS_I(inode), 1466 BTRFS_I(inode),
@@ -2120,7 +2181,7 @@ again:
2120 dir_key->offset, 2181 dir_key->offset,
2121 name, name_len, 0); 2182 name, name_len, 0);
2122 } 2183 }
2123 if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { 2184 if (!log_di || log_di == ERR_PTR(-ENOENT)) {
2124 btrfs_dir_item_key_to_cpu(eb, di, &location); 2185 btrfs_dir_item_key_to_cpu(eb, di, &location);
2125 btrfs_release_path(path); 2186 btrfs_release_path(path);
2126 btrfs_release_path(log_path); 2187 btrfs_release_path(log_path);
@@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2933 /* bail out if we need to do a full commit */ 2994 /* bail out if we need to do a full commit */
2934 if (btrfs_need_log_full_commit(fs_info, trans)) { 2995 if (btrfs_need_log_full_commit(fs_info, trans)) {
2935 ret = -EAGAIN; 2996 ret = -EAGAIN;
2936 btrfs_free_logged_extents(log, log_transid);
2937 mutex_unlock(&root->log_mutex); 2997 mutex_unlock(&root->log_mutex);
2938 goto out; 2998 goto out;
2939 } 2999 }
@@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2951 if (ret) { 3011 if (ret) {
2952 blk_finish_plug(&plug); 3012 blk_finish_plug(&plug);
2953 btrfs_abort_transaction(trans, ret); 3013 btrfs_abort_transaction(trans, ret);
2954 btrfs_free_logged_extents(log, log_transid);
2955 btrfs_set_log_full_commit(fs_info, trans); 3014 btrfs_set_log_full_commit(fs_info, trans);
2956 mutex_unlock(&root->log_mutex); 3015 mutex_unlock(&root->log_mutex);
2957 goto out; 3016 goto out;
@@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3002 goto out; 3061 goto out;
3003 } 3062 }
3004 btrfs_wait_tree_log_extents(log, mark); 3063 btrfs_wait_tree_log_extents(log, mark);
3005 btrfs_free_logged_extents(log, log_transid);
3006 mutex_unlock(&log_root_tree->log_mutex); 3064 mutex_unlock(&log_root_tree->log_mutex);
3007 ret = -EAGAIN; 3065 ret = -EAGAIN;
3008 goto out; 3066 goto out;
@@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3020 if (atomic_read(&log_root_tree->log_commit[index2])) { 3078 if (atomic_read(&log_root_tree->log_commit[index2])) {
3021 blk_finish_plug(&plug); 3079 blk_finish_plug(&plug);
3022 ret = btrfs_wait_tree_log_extents(log, mark); 3080 ret = btrfs_wait_tree_log_extents(log, mark);
3023 btrfs_wait_logged_extents(trans, log, log_transid);
3024 wait_log_commit(log_root_tree, 3081 wait_log_commit(log_root_tree,
3025 root_log_ctx.log_transid); 3082 root_log_ctx.log_transid);
3026 mutex_unlock(&log_root_tree->log_mutex); 3083 mutex_unlock(&log_root_tree->log_mutex);
@@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3045 if (btrfs_need_log_full_commit(fs_info, trans)) { 3102 if (btrfs_need_log_full_commit(fs_info, trans)) {
3046 blk_finish_plug(&plug); 3103 blk_finish_plug(&plug);
3047 btrfs_wait_tree_log_extents(log, mark); 3104 btrfs_wait_tree_log_extents(log, mark);
3048 btrfs_free_logged_extents(log, log_transid);
3049 mutex_unlock(&log_root_tree->log_mutex); 3105 mutex_unlock(&log_root_tree->log_mutex);
3050 ret = -EAGAIN; 3106 ret = -EAGAIN;
3051 goto out_wake_log_root; 3107 goto out_wake_log_root;
@@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3058 if (ret) { 3114 if (ret) {
3059 btrfs_set_log_full_commit(fs_info, trans); 3115 btrfs_set_log_full_commit(fs_info, trans);
3060 btrfs_abort_transaction(trans, ret); 3116 btrfs_abort_transaction(trans, ret);
3061 btrfs_free_logged_extents(log, log_transid);
3062 mutex_unlock(&log_root_tree->log_mutex); 3117 mutex_unlock(&log_root_tree->log_mutex);
3063 goto out_wake_log_root; 3118 goto out_wake_log_root;
3064 } 3119 }
@@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3068 EXTENT_NEW | EXTENT_DIRTY); 3123 EXTENT_NEW | EXTENT_DIRTY);
3069 if (ret) { 3124 if (ret) {
3070 btrfs_set_log_full_commit(fs_info, trans); 3125 btrfs_set_log_full_commit(fs_info, trans);
3071 btrfs_free_logged_extents(log, log_transid);
3072 mutex_unlock(&log_root_tree->log_mutex); 3126 mutex_unlock(&log_root_tree->log_mutex);
3073 goto out_wake_log_root; 3127 goto out_wake_log_root;
3074 } 3128 }
3075 btrfs_wait_logged_extents(trans, log, log_transid);
3076 3129
3077 btrfs_set_super_log_root(fs_info->super_for_commit, 3130 btrfs_set_super_log_root(fs_info->super_for_commit,
3078 log_root_tree->node->start); 3131 log_root_tree->node->start);
@@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
3159 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); 3212 EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
3160 } 3213 }
3161 3214
3162 /*
3163 * We may have short-circuited the log tree with the full commit logic
3164 * and left ordered extents on our list, so clear these out to keep us
3165 * from leaking inodes and memory.
3166 */
3167 btrfs_free_logged_extents(log, 0);
3168 btrfs_free_logged_extents(log, 1);
3169
3170 free_extent_buffer(log->node); 3215 free_extent_buffer(log->node);
3171 kfree(log); 3216 kfree(log);
3172} 3217}
@@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3756 int start_slot, int nr, int inode_only, 3801 int start_slot, int nr, int inode_only,
3757 u64 logged_isize) 3802 u64 logged_isize)
3758{ 3803{
3759 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 3804 struct btrfs_fs_info *fs_info = trans->fs_info;
3760 unsigned long src_offset; 3805 unsigned long src_offset;
3761 unsigned long dst_offset; 3806 unsigned long dst_offset;
3762 struct btrfs_root *log = inode->root->log_root; 3807 struct btrfs_root *log = inode->root->log_root;
@@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3937 struct btrfs_file_extent_item); 3982 struct btrfs_file_extent_item);
3938 if (btrfs_file_extent_type(src, extent) == 3983 if (btrfs_file_extent_type(src, extent) ==
3939 BTRFS_FILE_EXTENT_INLINE) { 3984 BTRFS_FILE_EXTENT_INLINE) {
3940 len = btrfs_file_extent_inline_len(src, 3985 len = btrfs_file_extent_ram_bytes(src, extent);
3941 src_path->slots[0],
3942 extent);
3943 *last_extent = ALIGN(key.offset + len, 3986 *last_extent = ALIGN(key.offset + len,
3944 fs_info->sectorsize); 3987 fs_info->sectorsize);
3945 } else { 3988 } else {
@@ -4004,7 +4047,7 @@ fill_holes:
4004 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); 4047 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
4005 if (btrfs_file_extent_type(src, extent) == 4048 if (btrfs_file_extent_type(src, extent) ==
4006 BTRFS_FILE_EXTENT_INLINE) { 4049 BTRFS_FILE_EXTENT_INLINE) {
4007 len = btrfs_file_extent_inline_len(src, i, extent); 4050 len = btrfs_file_extent_ram_bytes(src, extent);
4008 extent_end = ALIGN(key.offset + len, 4051 extent_end = ALIGN(key.offset + len,
4009 fs_info->sectorsize); 4052 fs_info->sectorsize);
4010 } else { 4053 } else {
@@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
4078 return 0; 4121 return 0;
4079} 4122}
4080 4123
4081static int wait_ordered_extents(struct btrfs_trans_handle *trans, 4124static int log_extent_csums(struct btrfs_trans_handle *trans,
4082 struct inode *inode, 4125 struct btrfs_inode *inode,
4083 struct btrfs_root *root, 4126 struct btrfs_root *log_root,
4084 const struct extent_map *em, 4127 const struct extent_map *em)
4085 const struct list_head *logged_list,
4086 bool *ordered_io_error)
4087{ 4128{
4088 struct btrfs_fs_info *fs_info = root->fs_info;
4089 struct btrfs_ordered_extent *ordered;
4090 struct btrfs_root *log = root->log_root;
4091 u64 mod_start = em->mod_start;
4092 u64 mod_len = em->mod_len;
4093 const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
4094 u64 csum_offset; 4129 u64 csum_offset;
4095 u64 csum_len; 4130 u64 csum_len;
4096 LIST_HEAD(ordered_sums); 4131 LIST_HEAD(ordered_sums);
4097 int ret = 0; 4132 int ret = 0;
4098 4133
4099 *ordered_io_error = false; 4134 if (inode->flags & BTRFS_INODE_NODATASUM ||
4100 4135 test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
4101 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
4102 em->block_start == EXTENT_MAP_HOLE) 4136 em->block_start == EXTENT_MAP_HOLE)
4103 return 0; 4137 return 0;
4104 4138
4105 /* 4139 /* If we're compressed we have to save the entire range of csums. */
4106 * Wait far any ordered extent that covers our extent map. If it
4107 * finishes without an error, first check and see if our csums are on
4108 * our outstanding ordered extents.
4109 */
4110 list_for_each_entry(ordered, logged_list, log_list) {
4111 struct btrfs_ordered_sum *sum;
4112
4113 if (!mod_len)
4114 break;
4115
4116 if (ordered->file_offset + ordered->len <= mod_start ||
4117 mod_start + mod_len <= ordered->file_offset)
4118 continue;
4119
4120 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
4121 !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
4122 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
4123 const u64 start = ordered->file_offset;
4124 const u64 end = ordered->file_offset + ordered->len - 1;
4125
4126 WARN_ON(ordered->inode != inode);
4127 filemap_fdatawrite_range(inode->i_mapping, start, end);
4128 }
4129
4130 wait_event(ordered->wait,
4131 (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) ||
4132 test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)));
4133
4134 if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) {
4135 /*
4136 * Clear the AS_EIO/AS_ENOSPC flags from the inode's
4137 * i_mapping flags, so that the next fsync won't get
4138 * an outdated io error too.
4139 */
4140 filemap_check_errors(inode->i_mapping);
4141 *ordered_io_error = true;
4142 break;
4143 }
4144 /*
4145 * We are going to copy all the csums on this ordered extent, so
4146 * go ahead and adjust mod_start and mod_len in case this
4147 * ordered extent has already been logged.
4148 */
4149 if (ordered->file_offset > mod_start) {
4150 if (ordered->file_offset + ordered->len >=
4151 mod_start + mod_len)
4152 mod_len = ordered->file_offset - mod_start;
4153 /*
4154 * If we have this case
4155 *
4156 * |--------- logged extent ---------|
4157 * |----- ordered extent ----|
4158 *
4159 * Just don't mess with mod_start and mod_len, we'll
4160 * just end up logging more csums than we need and it
4161 * will be ok.
4162 */
4163 } else {
4164 if (ordered->file_offset + ordered->len <
4165 mod_start + mod_len) {
4166 mod_len = (mod_start + mod_len) -
4167 (ordered->file_offset + ordered->len);
4168 mod_start = ordered->file_offset +
4169 ordered->len;
4170 } else {
4171 mod_len = 0;
4172 }
4173 }
4174
4175 if (skip_csum)
4176 continue;
4177
4178 /*
4179 * To keep us from looping for the above case of an ordered
4180 * extent that falls inside of the logged extent.
4181 */
4182 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
4183 &ordered->flags))
4184 continue;
4185
4186 list_for_each_entry(sum, &ordered->list, list) {
4187 ret = btrfs_csum_file_blocks(trans, log, sum);
4188 if (ret)
4189 break;
4190 }
4191 }
4192
4193 if (*ordered_io_error || !mod_len || ret || skip_csum)
4194 return ret;
4195
4196 if (em->compress_type) { 4140 if (em->compress_type) {
4197 csum_offset = 0; 4141 csum_offset = 0;
4198 csum_len = max(em->block_len, em->orig_block_len); 4142 csum_len = max(em->block_len, em->orig_block_len);
4199 } else { 4143 } else {
4200 csum_offset = mod_start - em->start; 4144 csum_offset = em->mod_start - em->start;
4201 csum_len = mod_len; 4145 csum_len = em->mod_len;
4202 } 4146 }
4203 4147
4204 /* block start is already adjusted for the file extent offset. */ 4148 /* block start is already adjusted for the file extent offset. */
4205 ret = btrfs_lookup_csums_range(fs_info->csum_root, 4149 ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,
4206 em->block_start + csum_offset, 4150 em->block_start + csum_offset,
4207 em->block_start + csum_offset + 4151 em->block_start + csum_offset +
4208 csum_len - 1, &ordered_sums, 0); 4152 csum_len - 1, &ordered_sums, 0);
@@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
4214 struct btrfs_ordered_sum, 4158 struct btrfs_ordered_sum,
4215 list); 4159 list);
4216 if (!ret) 4160 if (!ret)
4217 ret = btrfs_csum_file_blocks(trans, log, sums); 4161 ret = btrfs_csum_file_blocks(trans, log_root, sums);
4218 list_del(&sums->list); 4162 list_del(&sums->list);
4219 kfree(sums); 4163 kfree(sums);
4220 } 4164 }
@@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
4226 struct btrfs_inode *inode, struct btrfs_root *root, 4170 struct btrfs_inode *inode, struct btrfs_root *root,
4227 const struct extent_map *em, 4171 const struct extent_map *em,
4228 struct btrfs_path *path, 4172 struct btrfs_path *path,
4229 const struct list_head *logged_list,
4230 struct btrfs_log_ctx *ctx) 4173 struct btrfs_log_ctx *ctx)
4231{ 4174{
4232 struct btrfs_root *log = root->log_root; 4175 struct btrfs_root *log = root->log_root;
@@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
4238 u64 block_len; 4181 u64 block_len;
4239 int ret; 4182 int ret;
4240 int extent_inserted = 0; 4183 int extent_inserted = 0;
4241 bool ordered_io_err = false;
4242 4184
4243 ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, 4185 ret = log_extent_csums(trans, inode, log, em);
4244 logged_list, &ordered_io_err);
4245 if (ret) 4186 if (ret)
4246 return ret; 4187 return ret;
4247 4188
4248 if (ordered_io_err) {
4249 ctx->io_err = -EIO;
4250 return ctx->io_err;
4251 }
4252
4253 btrfs_init_map_token(&token); 4189 btrfs_init_map_token(&token);
4254 4190
4255 ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, 4191 ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
@@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4424 struct btrfs_root *root, 4360 struct btrfs_root *root,
4425 struct btrfs_inode *inode, 4361 struct btrfs_inode *inode,
4426 struct btrfs_path *path, 4362 struct btrfs_path *path,
4427 struct list_head *logged_list,
4428 struct btrfs_log_ctx *ctx, 4363 struct btrfs_log_ctx *ctx,
4429 const u64 start, 4364 const u64 start,
4430 const u64 end) 4365 const u64 end)
@@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4480 } 4415 }
4481 4416
4482 list_sort(NULL, &extents, extent_cmp); 4417 list_sort(NULL, &extents, extent_cmp);
4483 btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
4484 /*
4485 * Some ordered extents started by fsync might have completed
4486 * before we could collect them into the list logged_list, which
4487 * means they're gone, not in our logged_list nor in the inode's
4488 * ordered tree. We want the application/user space to know an
4489 * error happened while attempting to persist file data so that
4490 * it can take proper action. If such error happened, we leave
4491 * without writing to the log tree and the fsync must report the
4492 * file data write error and not commit the current transaction.
4493 */
4494 ret = filemap_check_errors(inode->vfs_inode.i_mapping);
4495 if (ret)
4496 ctx->io_err = ret;
4497process: 4418process:
4498 while (!list_empty(&extents)) { 4419 while (!list_empty(&extents)) {
4499 em = list_entry(extents.next, struct extent_map, list); 4420 em = list_entry(extents.next, struct extent_map, list);
@@ -4512,8 +4433,7 @@ process:
4512 4433
4513 write_unlock(&tree->lock); 4434 write_unlock(&tree->lock);
4514 4435
4515 ret = log_one_extent(trans, inode, root, em, path, logged_list, 4436 ret = log_one_extent(trans, inode, root, em, path, ctx);
4516 ctx);
4517 write_lock(&tree->lock); 4437 write_lock(&tree->lock);
4518 clear_em_logging(tree, em); 4438 clear_em_logging(tree, em);
4519 free_extent_map(em); 4439 free_extent_map(em);
@@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4712 4632
4713 if (btrfs_file_extent_type(leaf, extent) == 4633 if (btrfs_file_extent_type(leaf, extent) ==
4714 BTRFS_FILE_EXTENT_INLINE) { 4634 BTRFS_FILE_EXTENT_INLINE) {
4715 len = btrfs_file_extent_inline_len(leaf, 4635 len = btrfs_file_extent_ram_bytes(leaf, extent);
4716 path->slots[0],
4717 extent);
4718 ASSERT(len == i_size || 4636 ASSERT(len == i_size ||
4719 (len == fs_info->sectorsize && 4637 (len == fs_info->sectorsize &&
4720 btrfs_file_extent_compression(leaf, extent) != 4638 btrfs_file_extent_compression(leaf, extent) !=
@@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4898 struct btrfs_key min_key; 4816 struct btrfs_key min_key;
4899 struct btrfs_key max_key; 4817 struct btrfs_key max_key;
4900 struct btrfs_root *log = root->log_root; 4818 struct btrfs_root *log = root->log_root;
4901 LIST_HEAD(logged_list);
4902 u64 last_extent = 0; 4819 u64 last_extent = 0;
4903 int err = 0; 4820 int err = 0;
4904 int ret; 4821 int ret;
@@ -5094,8 +5011,7 @@ again:
5094 * we don't need to do more work nor fallback to 5011 * we don't need to do more work nor fallback to
5095 * a transaction commit. 5012 * a transaction commit.
5096 */ 5013 */
5097 if (IS_ERR(other_inode) && 5014 if (other_inode == ERR_PTR(-ENOENT)) {
5098 PTR_ERR(other_inode) == -ENOENT) {
5099 goto next_key; 5015 goto next_key;
5100 } else if (IS_ERR(other_inode)) { 5016 } else if (IS_ERR(other_inode)) {
5101 err = PTR_ERR(other_inode); 5017 err = PTR_ERR(other_inode);
@@ -5235,7 +5151,7 @@ log_extents:
5235 } 5151 }
5236 if (fast_search) { 5152 if (fast_search) {
5237 ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 5153 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
5238 &logged_list, ctx, start, end); 5154 ctx, start, end);
5239 if (ret) { 5155 if (ret) {
5240 err = ret; 5156 err = ret;
5241 goto out_unlock; 5157 goto out_unlock;
@@ -5286,10 +5202,6 @@ log_extents:
5286 inode->last_log_commit = inode->last_sub_trans; 5202 inode->last_log_commit = inode->last_sub_trans;
5287 spin_unlock(&inode->lock); 5203 spin_unlock(&inode->lock);
5288out_unlock: 5204out_unlock:
5289 if (unlikely(err))
5290 btrfs_put_logged_extents(&logged_list);
5291 else
5292 btrfs_submit_logged_extents(&logged_list, log);
5293 mutex_unlock(&inode->log_mutex); 5205 mutex_unlock(&inode->log_mutex);
5294 5206
5295 btrfs_free_path(path); 5207 btrfs_free_path(path);
@@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
5585 struct btrfs_inode *inode, 5497 struct btrfs_inode *inode,
5586 struct btrfs_log_ctx *ctx) 5498 struct btrfs_log_ctx *ctx)
5587{ 5499{
5588 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 5500 struct btrfs_fs_info *fs_info = trans->fs_info;
5589 int ret; 5501 int ret;
5590 struct btrfs_path *path; 5502 struct btrfs_path *path;
5591 struct btrfs_key key; 5503 struct btrfs_key key;
@@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
6120 struct btrfs_inode *inode, struct btrfs_inode *old_dir, 6032 struct btrfs_inode *inode, struct btrfs_inode *old_dir,
6121 struct dentry *parent) 6033 struct dentry *parent)
6122{ 6034{
6123 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 6035 struct btrfs_fs_info *fs_info = trans->fs_info;
6124 6036
6125 /* 6037 /*
6126 * this will force the logging code to walk the dentry chain 6038 * this will force the logging code to walk the dentry chain
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1da162928d1a..da86706123ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8,15 +8,12 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/iocontext.h>
12#include <linux/capability.h>
13#include <linux/ratelimit.h> 11#include <linux/ratelimit.h>
14#include <linux/kthread.h> 12#include <linux/kthread.h>
15#include <linux/raid/pq.h> 13#include <linux/raid/pq.h>
16#include <linux/semaphore.h> 14#include <linux/semaphore.h>
17#include <linux/uuid.h> 15#include <linux/uuid.h>
18#include <linux/list_sort.h> 16#include <linux/list_sort.h>
19#include <asm/div64.h>
20#include "ctree.h" 17#include "ctree.h"
21#include "extent_map.h" 18#include "extent_map.h"
22#include "disk-io.h" 19#include "disk-io.h"
@@ -634,44 +631,48 @@ static void pending_bios_fn(struct btrfs_work *work)
634 * devices. 631 * devices.
635 */ 632 */
636static void btrfs_free_stale_devices(const char *path, 633static void btrfs_free_stale_devices(const char *path,
637 struct btrfs_device *skip_dev) 634 struct btrfs_device *skip_device)
638{ 635{
639 struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; 636 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
640 struct btrfs_device *dev, *tmp_dev; 637 struct btrfs_device *device, *tmp_device;
641 638
642 list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) { 639 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
643 640 mutex_lock(&fs_devices->device_list_mutex);
644 if (fs_devs->opened) 641 if (fs_devices->opened) {
642 mutex_unlock(&fs_devices->device_list_mutex);
645 continue; 643 continue;
644 }
646 645
647 list_for_each_entry_safe(dev, tmp_dev, 646 list_for_each_entry_safe(device, tmp_device,
648 &fs_devs->devices, dev_list) { 647 &fs_devices->devices, dev_list) {
649 int not_found = 0; 648 int not_found = 0;
650 649
651 if (skip_dev && skip_dev == dev) 650 if (skip_device && skip_device == device)
652 continue; 651 continue;
653 if (path && !dev->name) 652 if (path && !device->name)
654 continue; 653 continue;
655 654
656 rcu_read_lock(); 655 rcu_read_lock();
657 if (path) 656 if (path)
658 not_found = strcmp(rcu_str_deref(dev->name), 657 not_found = strcmp(rcu_str_deref(device->name),
659 path); 658 path);
660 rcu_read_unlock(); 659 rcu_read_unlock();
661 if (not_found) 660 if (not_found)
662 continue; 661 continue;
663 662
664 /* delete the stale device */ 663 /* delete the stale device */
665 if (fs_devs->num_devices == 1) { 664 fs_devices->num_devices--;
666 btrfs_sysfs_remove_fsid(fs_devs); 665 list_del(&device->dev_list);
667 list_del(&fs_devs->fs_list); 666 btrfs_free_device(device);
668 free_fs_devices(fs_devs); 667
668 if (fs_devices->num_devices == 0)
669 break; 669 break;
670 } else { 670 }
671 fs_devs->num_devices--; 671 mutex_unlock(&fs_devices->device_list_mutex);
672 list_del(&dev->dev_list); 672 if (fs_devices->num_devices == 0) {
673 btrfs_free_device(dev); 673 btrfs_sysfs_remove_fsid(fs_devices);
674 } 674 list_del(&fs_devices->fs_list);
675 free_fs_devices(fs_devices);
675 } 676 }
676 } 677 }
677} 678}
@@ -750,7 +751,8 @@ error_brelse:
750 * error pointer when failed 751 * error pointer when failed
751 */ 752 */
752static noinline struct btrfs_device *device_list_add(const char *path, 753static noinline struct btrfs_device *device_list_add(const char *path,
753 struct btrfs_super_block *disk_super) 754 struct btrfs_super_block *disk_super,
755 bool *new_device_added)
754{ 756{
755 struct btrfs_device *device; 757 struct btrfs_device *device;
756 struct btrfs_fs_devices *fs_devices; 758 struct btrfs_fs_devices *fs_devices;
@@ -764,21 +766,26 @@ static noinline struct btrfs_device *device_list_add(const char *path,
764 if (IS_ERR(fs_devices)) 766 if (IS_ERR(fs_devices))
765 return ERR_CAST(fs_devices); 767 return ERR_CAST(fs_devices);
766 768
769 mutex_lock(&fs_devices->device_list_mutex);
767 list_add(&fs_devices->fs_list, &fs_uuids); 770 list_add(&fs_devices->fs_list, &fs_uuids);
768 771
769 device = NULL; 772 device = NULL;
770 } else { 773 } else {
774 mutex_lock(&fs_devices->device_list_mutex);
771 device = find_device(fs_devices, devid, 775 device = find_device(fs_devices, devid,
772 disk_super->dev_item.uuid); 776 disk_super->dev_item.uuid);
773 } 777 }
774 778
775 if (!device) { 779 if (!device) {
776 if (fs_devices->opened) 780 if (fs_devices->opened) {
781 mutex_unlock(&fs_devices->device_list_mutex);
777 return ERR_PTR(-EBUSY); 782 return ERR_PTR(-EBUSY);
783 }
778 784
779 device = btrfs_alloc_device(NULL, &devid, 785 device = btrfs_alloc_device(NULL, &devid,
780 disk_super->dev_item.uuid); 786 disk_super->dev_item.uuid);
781 if (IS_ERR(device)) { 787 if (IS_ERR(device)) {
788 mutex_unlock(&fs_devices->device_list_mutex);
782 /* we can safely leave the fs_devices entry around */ 789 /* we can safely leave the fs_devices entry around */
783 return device; 790 return device;
784 } 791 }
@@ -786,17 +793,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
786 name = rcu_string_strdup(path, GFP_NOFS); 793 name = rcu_string_strdup(path, GFP_NOFS);
787 if (!name) { 794 if (!name) {
788 btrfs_free_device(device); 795 btrfs_free_device(device);
796 mutex_unlock(&fs_devices->device_list_mutex);
789 return ERR_PTR(-ENOMEM); 797 return ERR_PTR(-ENOMEM);
790 } 798 }
791 rcu_assign_pointer(device->name, name); 799 rcu_assign_pointer(device->name, name);
792 800
793 mutex_lock(&fs_devices->device_list_mutex);
794 list_add_rcu(&device->dev_list, &fs_devices->devices); 801 list_add_rcu(&device->dev_list, &fs_devices->devices);
795 fs_devices->num_devices++; 802 fs_devices->num_devices++;
796 mutex_unlock(&fs_devices->device_list_mutex);
797 803
798 device->fs_devices = fs_devices; 804 device->fs_devices = fs_devices;
799 btrfs_free_stale_devices(path, device); 805 *new_device_added = true;
800 806
801 if (disk_super->label[0]) 807 if (disk_super->label[0])
802 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n", 808 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
@@ -840,12 +846,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
840 * with larger generation number or the last-in if 846 * with larger generation number or the last-in if
841 * generation are equal. 847 * generation are equal.
842 */ 848 */
849 mutex_unlock(&fs_devices->device_list_mutex);
843 return ERR_PTR(-EEXIST); 850 return ERR_PTR(-EEXIST);
844 } 851 }
845 852
846 name = rcu_string_strdup(path, GFP_NOFS); 853 name = rcu_string_strdup(path, GFP_NOFS);
847 if (!name) 854 if (!name) {
855 mutex_unlock(&fs_devices->device_list_mutex);
848 return ERR_PTR(-ENOMEM); 856 return ERR_PTR(-ENOMEM);
857 }
849 rcu_string_free(device->name); 858 rcu_string_free(device->name);
850 rcu_assign_pointer(device->name, name); 859 rcu_assign_pointer(device->name, name);
851 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { 860 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
@@ -865,6 +874,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
865 874
866 fs_devices->total_devices = btrfs_super_num_devices(disk_super); 875 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
867 876
877 mutex_unlock(&fs_devices->device_list_mutex);
868 return device; 878 return device;
869} 879}
870 880
@@ -1004,7 +1014,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
1004 blkdev_put(device->bdev, device->mode); 1014 blkdev_put(device->bdev, device->mode);
1005} 1015}
1006 1016
1007static void btrfs_prepare_close_one_device(struct btrfs_device *device) 1017static void btrfs_close_one_device(struct btrfs_device *device)
1008{ 1018{
1009 struct btrfs_fs_devices *fs_devices = device->fs_devices; 1019 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1010 struct btrfs_device *new_device; 1020 struct btrfs_device *new_device;
@@ -1022,6 +1032,8 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
1022 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 1032 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1023 fs_devices->missing_devices--; 1033 fs_devices->missing_devices--;
1024 1034
1035 btrfs_close_bdev(device);
1036
1025 new_device = btrfs_alloc_device(NULL, &device->devid, 1037 new_device = btrfs_alloc_device(NULL, &device->devid,
1026 device->uuid); 1038 device->uuid);
1027 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ 1039 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
@@ -1035,39 +1047,23 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
1035 1047
1036 list_replace_rcu(&device->dev_list, &new_device->dev_list); 1048 list_replace_rcu(&device->dev_list, &new_device->dev_list);
1037 new_device->fs_devices = device->fs_devices; 1049 new_device->fs_devices = device->fs_devices;
1050
1051 call_rcu(&device->rcu, free_device_rcu);
1038} 1052}
1039 1053
1040static int close_fs_devices(struct btrfs_fs_devices *fs_devices) 1054static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
1041{ 1055{
1042 struct btrfs_device *device, *tmp; 1056 struct btrfs_device *device, *tmp;
1043 struct list_head pending_put;
1044
1045 INIT_LIST_HEAD(&pending_put);
1046 1057
1047 if (--fs_devices->opened > 0) 1058 if (--fs_devices->opened > 0)
1048 return 0; 1059 return 0;
1049 1060
1050 mutex_lock(&fs_devices->device_list_mutex); 1061 mutex_lock(&fs_devices->device_list_mutex);
1051 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { 1062 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
1052 btrfs_prepare_close_one_device(device); 1063 btrfs_close_one_device(device);
1053 list_add(&device->dev_list, &pending_put);
1054 } 1064 }
1055 mutex_unlock(&fs_devices->device_list_mutex); 1065 mutex_unlock(&fs_devices->device_list_mutex);
1056 1066
1057 /*
1058 * btrfs_show_devname() is using the device_list_mutex,
1059 * sometimes call to blkdev_put() leads vfs calling
1060 * into this func. So do put outside of device_list_mutex,
1061 * as of now.
1062 */
1063 while (!list_empty(&pending_put)) {
1064 device = list_first_entry(&pending_put,
1065 struct btrfs_device, dev_list);
1066 list_del(&device->dev_list);
1067 btrfs_close_bdev(device);
1068 call_rcu(&device->rcu, free_device_rcu);
1069 }
1070
1071 WARN_ON(fs_devices->open_devices); 1067 WARN_ON(fs_devices->open_devices);
1072 WARN_ON(fs_devices->rw_devices); 1068 WARN_ON(fs_devices->rw_devices);
1073 fs_devices->opened = 0; 1069 fs_devices->opened = 0;
@@ -1146,7 +1142,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1146{ 1142{
1147 int ret; 1143 int ret;
1148 1144
1149 mutex_lock(&uuid_mutex); 1145 lockdep_assert_held(&uuid_mutex);
1146
1150 mutex_lock(&fs_devices->device_list_mutex); 1147 mutex_lock(&fs_devices->device_list_mutex);
1151 if (fs_devices->opened) { 1148 if (fs_devices->opened) {
1152 fs_devices->opened++; 1149 fs_devices->opened++;
@@ -1156,7 +1153,6 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1156 ret = open_fs_devices(fs_devices, flags, holder); 1153 ret = open_fs_devices(fs_devices, flags, holder);
1157 } 1154 }
1158 mutex_unlock(&fs_devices->device_list_mutex); 1155 mutex_unlock(&fs_devices->device_list_mutex);
1159 mutex_unlock(&uuid_mutex);
1160 1156
1161 return ret; 1157 return ret;
1162} 1158}
@@ -1217,16 +1213,18 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
1217 * and we are not allowed to call set_blocksize during the scan. The superblock 1213 * and we are not allowed to call set_blocksize during the scan. The superblock
1218 * is read via pagecache 1214 * is read via pagecache
1219 */ 1215 */
1220int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 1216struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1221 struct btrfs_fs_devices **fs_devices_ret) 1217 void *holder)
1222{ 1218{
1223 struct btrfs_super_block *disk_super; 1219 struct btrfs_super_block *disk_super;
1224 struct btrfs_device *device; 1220 bool new_device_added = false;
1221 struct btrfs_device *device = NULL;
1225 struct block_device *bdev; 1222 struct block_device *bdev;
1226 struct page *page; 1223 struct page *page;
1227 int ret = 0;
1228 u64 bytenr; 1224 u64 bytenr;
1229 1225
1226 lockdep_assert_held(&uuid_mutex);
1227
1230 /* 1228 /*
1231 * we would like to check all the supers, but that would make 1229 * we would like to check all the supers, but that would make
1232 * a btrfs mount succeed after a mkfs from a different FS. 1230 * a btrfs mount succeed after a mkfs from a different FS.
@@ -1238,112 +1236,25 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
1238 1236
1239 bdev = blkdev_get_by_path(path, flags, holder); 1237 bdev = blkdev_get_by_path(path, flags, holder);
1240 if (IS_ERR(bdev)) 1238 if (IS_ERR(bdev))
1241 return PTR_ERR(bdev); 1239 return ERR_CAST(bdev);
1242 1240
1243 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) { 1241 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
1244 ret = -EINVAL; 1242 device = ERR_PTR(-EINVAL);
1245 goto error_bdev_put; 1243 goto error_bdev_put;
1246 } 1244 }
1247 1245
1248 mutex_lock(&uuid_mutex); 1246 device = device_list_add(path, disk_super, &new_device_added);
1249 device = device_list_add(path, disk_super); 1247 if (!IS_ERR(device)) {
1250 if (IS_ERR(device)) 1248 if (new_device_added)
1251 ret = PTR_ERR(device); 1249 btrfs_free_stale_devices(path, device);
1252 else 1250 }
1253 *fs_devices_ret = device->fs_devices;
1254 mutex_unlock(&uuid_mutex);
1255 1251
1256 btrfs_release_disk_super(page); 1252 btrfs_release_disk_super(page);
1257 1253
1258error_bdev_put: 1254error_bdev_put:
1259 blkdev_put(bdev, flags); 1255 blkdev_put(bdev, flags);
1260 1256
1261 return ret; 1257 return device;
1262}
1263
1264/* helper to account the used device space in the range */
1265int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
1266 u64 end, u64 *length)
1267{
1268 struct btrfs_key key;
1269 struct btrfs_root *root = device->fs_info->dev_root;
1270 struct btrfs_dev_extent *dev_extent;
1271 struct btrfs_path *path;
1272 u64 extent_end;
1273 int ret;
1274 int slot;
1275 struct extent_buffer *l;
1276
1277 *length = 0;
1278
1279 if (start >= device->total_bytes ||
1280 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1281 return 0;
1282
1283 path = btrfs_alloc_path();
1284 if (!path)
1285 return -ENOMEM;
1286 path->reada = READA_FORWARD;
1287
1288 key.objectid = device->devid;
1289 key.offset = start;
1290 key.type = BTRFS_DEV_EXTENT_KEY;
1291
1292 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1293 if (ret < 0)
1294 goto out;
1295 if (ret > 0) {
1296 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1297 if (ret < 0)
1298 goto out;
1299 }
1300
1301 while (1) {
1302 l = path->nodes[0];
1303 slot = path->slots[0];
1304 if (slot >= btrfs_header_nritems(l)) {
1305 ret = btrfs_next_leaf(root, path);
1306 if (ret == 0)
1307 continue;
1308 if (ret < 0)
1309 goto out;
1310
1311 break;
1312 }
1313 btrfs_item_key_to_cpu(l, &key, slot);
1314
1315 if (key.objectid < device->devid)
1316 goto next;
1317
1318 if (key.objectid > device->devid)
1319 break;
1320
1321 if (key.type != BTRFS_DEV_EXTENT_KEY)
1322 goto next;
1323
1324 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1325 extent_end = key.offset + btrfs_dev_extent_length(l,
1326 dev_extent);
1327 if (key.offset <= start && extent_end > end) {
1328 *length = end - start + 1;
1329 break;
1330 } else if (key.offset <= start && extent_end > start)
1331 *length += extent_end - start;
1332 else if (key.offset > start && extent_end <= end)
1333 *length += extent_end - key.offset;
1334 else if (key.offset > start && key.offset <= end) {
1335 *length += end - key.offset + 1;
1336 break;
1337 } else if (key.offset > end)
1338 break;
1339
1340next:
1341 path->slots[0]++;
1342 }
1343 ret = 0;
1344out:
1345 btrfs_free_path(path);
1346 return ret;
1347} 1258}
1348 1259
1349static int contains_pending_extent(struct btrfs_transaction *transaction, 1260static int contains_pending_extent(struct btrfs_transaction *transaction,
@@ -1755,10 +1666,8 @@ error:
1755 * the btrfs_device struct should be fully filled in 1666 * the btrfs_device struct should be fully filled in
1756 */ 1667 */
1757static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, 1668static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1758 struct btrfs_fs_info *fs_info,
1759 struct btrfs_device *device) 1669 struct btrfs_device *device)
1760{ 1670{
1761 struct btrfs_root *root = fs_info->chunk_root;
1762 int ret; 1671 int ret;
1763 struct btrfs_path *path; 1672 struct btrfs_path *path;
1764 struct btrfs_dev_item *dev_item; 1673 struct btrfs_dev_item *dev_item;
@@ -1774,8 +1683,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1774 key.type = BTRFS_DEV_ITEM_KEY; 1683 key.type = BTRFS_DEV_ITEM_KEY;
1775 key.offset = device->devid; 1684 key.offset = device->devid;
1776 1685
1777 ret = btrfs_insert_empty_item(trans, root, path, &key, 1686 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1778 sizeof(*dev_item)); 1687 &key, sizeof(*dev_item));
1779 if (ret) 1688 if (ret)
1780 goto out; 1689 goto out;
1781 1690
@@ -1800,7 +1709,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1800 ptr = btrfs_device_uuid(dev_item); 1709 ptr = btrfs_device_uuid(dev_item);
1801 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 1710 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1802 ptr = btrfs_device_fsid(dev_item); 1711 ptr = btrfs_device_fsid(dev_item);
1803 write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE); 1712 write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE);
1804 btrfs_mark_buffer_dirty(leaf); 1713 btrfs_mark_buffer_dirty(leaf);
1805 1714
1806 ret = 0; 1715 ret = 0;
@@ -1924,9 +1833,10 @@ static struct btrfs_device * btrfs_find_next_active_device(
1924 * where this function called, there should be always be another device (or 1833 * where this function called, there should be always be another device (or
1925 * this_dev) which is active. 1834 * this_dev) which is active.
1926 */ 1835 */
1927void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, 1836void btrfs_assign_next_active_device(struct btrfs_device *device,
1928 struct btrfs_device *device, struct btrfs_device *this_dev) 1837 struct btrfs_device *this_dev)
1929{ 1838{
1839 struct btrfs_fs_info *fs_info = device->fs_info;
1930 struct btrfs_device *next_device; 1840 struct btrfs_device *next_device;
1931 1841
1932 if (this_dev) 1842 if (this_dev)
@@ -2029,11 +1939,14 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2029 1939
2030 cur_devices->num_devices--; 1940 cur_devices->num_devices--;
2031 cur_devices->total_devices--; 1941 cur_devices->total_devices--;
1942 /* Update total_devices of the parent fs_devices if it's seed */
1943 if (cur_devices != fs_devices)
1944 fs_devices->total_devices--;
2032 1945
2033 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 1946 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2034 cur_devices->missing_devices--; 1947 cur_devices->missing_devices--;
2035 1948
2036 btrfs_assign_next_active_device(fs_info, device, NULL); 1949 btrfs_assign_next_active_device(device, NULL);
2037 1950
2038 if (device->bdev) { 1951 if (device->bdev) {
2039 cur_devices->open_devices--; 1952 cur_devices->open_devices--;
@@ -2084,12 +1997,11 @@ error_undo:
2084 goto out; 1997 goto out;
2085} 1998}
2086 1999
2087void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, 2000void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2088 struct btrfs_device *srcdev)
2089{ 2001{
2090 struct btrfs_fs_devices *fs_devices; 2002 struct btrfs_fs_devices *fs_devices;
2091 2003
2092 lockdep_assert_held(&fs_info->fs_devices->device_list_mutex); 2004 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2093 2005
2094 /* 2006 /*
2095 * in case of fs with no seed, srcdev->fs_devices will point 2007 * in case of fs with no seed, srcdev->fs_devices will point
@@ -2151,10 +2063,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2151 } 2063 }
2152} 2064}
2153 2065
2154void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, 2066void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2155 struct btrfs_device *tgtdev)
2156{ 2067{
2157 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 2068 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2158 2069
2159 WARN_ON(!tgtdev); 2070 WARN_ON(!tgtdev);
2160 mutex_lock(&fs_devices->device_list_mutex); 2071 mutex_lock(&fs_devices->device_list_mutex);
@@ -2166,7 +2077,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2166 2077
2167 fs_devices->num_devices--; 2078 fs_devices->num_devices--;
2168 2079
2169 btrfs_assign_next_active_device(fs_info, tgtdev, NULL); 2080 btrfs_assign_next_active_device(tgtdev, NULL);
2170 2081
2171 list_del_rcu(&tgtdev->dev_list); 2082 list_del_rcu(&tgtdev->dev_list);
2172 2083
@@ -2297,7 +2208,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2297 INIT_LIST_HEAD(&seed_devices->alloc_list); 2208 INIT_LIST_HEAD(&seed_devices->alloc_list);
2298 mutex_init(&seed_devices->device_list_mutex); 2209 mutex_init(&seed_devices->device_list_mutex);
2299 2210
2300 mutex_lock(&fs_info->fs_devices->device_list_mutex); 2211 mutex_lock(&fs_devices->device_list_mutex);
2301 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, 2212 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2302 synchronize_rcu); 2213 synchronize_rcu);
2303 list_for_each_entry(device, &seed_devices->devices, dev_list) 2214 list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2317,7 +2228,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2317 generate_random_uuid(fs_devices->fsid); 2228 generate_random_uuid(fs_devices->fsid);
2318 memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 2229 memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2319 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 2230 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2320 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2231 mutex_unlock(&fs_devices->device_list_mutex);
2321 2232
2322 super_flags = btrfs_super_flags(disk_super) & 2233 super_flags = btrfs_super_flags(disk_super) &
2323 ~BTRFS_SUPER_FLAG_SEEDING; 2234 ~BTRFS_SUPER_FLAG_SEEDING;
@@ -2407,15 +2318,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2407 struct btrfs_trans_handle *trans; 2318 struct btrfs_trans_handle *trans;
2408 struct btrfs_device *device; 2319 struct btrfs_device *device;
2409 struct block_device *bdev; 2320 struct block_device *bdev;
2410 struct list_head *devices;
2411 struct super_block *sb = fs_info->sb; 2321 struct super_block *sb = fs_info->sb;
2412 struct rcu_string *name; 2322 struct rcu_string *name;
2413 u64 tmp; 2323 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2324 u64 orig_super_total_bytes;
2325 u64 orig_super_num_devices;
2414 int seeding_dev = 0; 2326 int seeding_dev = 0;
2415 int ret = 0; 2327 int ret = 0;
2416 bool unlocked = false; 2328 bool unlocked = false;
2417 2329
2418 if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) 2330 if (sb_rdonly(sb) && !fs_devices->seeding)
2419 return -EROFS; 2331 return -EROFS;
2420 2332
2421 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, 2333 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
@@ -2423,7 +2335,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2423 if (IS_ERR(bdev)) 2335 if (IS_ERR(bdev))
2424 return PTR_ERR(bdev); 2336 return PTR_ERR(bdev);
2425 2337
2426 if (fs_info->fs_devices->seeding) { 2338 if (fs_devices->seeding) {
2427 seeding_dev = 1; 2339 seeding_dev = 1;
2428 down_write(&sb->s_umount); 2340 down_write(&sb->s_umount);
2429 mutex_lock(&uuid_mutex); 2341 mutex_lock(&uuid_mutex);
@@ -2431,18 +2343,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2431 2343
2432 filemap_write_and_wait(bdev->bd_inode->i_mapping); 2344 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2433 2345
2434 devices = &fs_info->fs_devices->devices; 2346 mutex_lock(&fs_devices->device_list_mutex);
2435 2347 list_for_each_entry(device, &fs_devices->devices, dev_list) {
2436 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2437 list_for_each_entry(device, devices, dev_list) {
2438 if (device->bdev == bdev) { 2348 if (device->bdev == bdev) {
2439 ret = -EEXIST; 2349 ret = -EEXIST;
2440 mutex_unlock( 2350 mutex_unlock(
2441 &fs_info->fs_devices->device_list_mutex); 2351 &fs_devices->device_list_mutex);
2442 goto error; 2352 goto error;
2443 } 2353 }
2444 } 2354 }
2445 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2355 mutex_unlock(&fs_devices->device_list_mutex);
2446 2356
2447 device = btrfs_alloc_device(fs_info, NULL, NULL); 2357 device = btrfs_alloc_device(fs_info, NULL, NULL);
2448 if (IS_ERR(device)) { 2358 if (IS_ERR(device)) {
@@ -2491,33 +2401,34 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2491 } 2401 }
2492 } 2402 }
2493 2403
2494 device->fs_devices = fs_info->fs_devices; 2404 device->fs_devices = fs_devices;
2495 2405
2496 mutex_lock(&fs_info->fs_devices->device_list_mutex); 2406 mutex_lock(&fs_devices->device_list_mutex);
2497 mutex_lock(&fs_info->chunk_mutex); 2407 mutex_lock(&fs_info->chunk_mutex);
2498 list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices); 2408 list_add_rcu(&device->dev_list, &fs_devices->devices);
2499 list_add(&device->dev_alloc_list, 2409 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2500 &fs_info->fs_devices->alloc_list); 2410 fs_devices->num_devices++;
2501 fs_info->fs_devices->num_devices++; 2411 fs_devices->open_devices++;
2502 fs_info->fs_devices->open_devices++; 2412 fs_devices->rw_devices++;
2503 fs_info->fs_devices->rw_devices++; 2413 fs_devices->total_devices++;
2504 fs_info->fs_devices->total_devices++; 2414 fs_devices->total_rw_bytes += device->total_bytes;
2505 fs_info->fs_devices->total_rw_bytes += device->total_bytes;
2506 2415
2507 atomic64_add(device->total_bytes, &fs_info->free_chunk_space); 2416 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2508 2417
2509 if (!blk_queue_nonrot(q)) 2418 if (!blk_queue_nonrot(q))
2510 fs_info->fs_devices->rotating = 1; 2419 fs_devices->rotating = 1;
2511 2420
2512 tmp = btrfs_super_total_bytes(fs_info->super_copy); 2421 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2513 btrfs_set_super_total_bytes(fs_info->super_copy, 2422 btrfs_set_super_total_bytes(fs_info->super_copy,
2514 round_down(tmp + device->total_bytes, fs_info->sectorsize)); 2423 round_down(orig_super_total_bytes + device->total_bytes,
2424 fs_info->sectorsize));
2515 2425
2516 tmp = btrfs_super_num_devices(fs_info->super_copy); 2426 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2517 btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1); 2427 btrfs_set_super_num_devices(fs_info->super_copy,
2428 orig_super_num_devices + 1);
2518 2429
2519 /* add sysfs device entry */ 2430 /* add sysfs device entry */
2520 btrfs_sysfs_add_device_link(fs_info->fs_devices, device); 2431 btrfs_sysfs_add_device_link(fs_devices, device);
2521 2432
2522 /* 2433 /*
2523 * we've got more storage, clear any full flags on the space 2434 * we've got more storage, clear any full flags on the space
@@ -2526,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2526 btrfs_clear_space_info_full(fs_info); 2437 btrfs_clear_space_info_full(fs_info);
2527 2438
2528 mutex_unlock(&fs_info->chunk_mutex); 2439 mutex_unlock(&fs_info->chunk_mutex);
2529 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2440 mutex_unlock(&fs_devices->device_list_mutex);
2530 2441
2531 if (seeding_dev) { 2442 if (seeding_dev) {
2532 mutex_lock(&fs_info->chunk_mutex); 2443 mutex_lock(&fs_info->chunk_mutex);
@@ -2538,7 +2449,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2538 } 2449 }
2539 } 2450 }
2540 2451
2541 ret = btrfs_add_dev_item(trans, fs_info, device); 2452 ret = btrfs_add_dev_item(trans, device);
2542 if (ret) { 2453 if (ret) {
2543 btrfs_abort_transaction(trans, ret); 2454 btrfs_abort_transaction(trans, ret);
2544 goto error_sysfs; 2455 goto error_sysfs;
@@ -2558,7 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2558 */ 2469 */
2559 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", 2470 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
2560 fs_info->fsid); 2471 fs_info->fsid);
2561 if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf)) 2472 if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf))
2562 btrfs_warn(fs_info, 2473 btrfs_warn(fs_info,
2563 "sysfs: failed to create fsid for sprout"); 2474 "sysfs: failed to create fsid for sprout");
2564 } 2475 }
@@ -2593,7 +2504,23 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
2593 return ret; 2504 return ret;
2594 2505
2595error_sysfs: 2506error_sysfs:
2596 btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); 2507 btrfs_sysfs_rm_device_link(fs_devices, device);
2508 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2509 mutex_lock(&fs_info->chunk_mutex);
2510 list_del_rcu(&device->dev_list);
2511 list_del(&device->dev_alloc_list);
2512 fs_info->fs_devices->num_devices--;
2513 fs_info->fs_devices->open_devices--;
2514 fs_info->fs_devices->rw_devices--;
2515 fs_info->fs_devices->total_devices--;
2516 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2517 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2518 btrfs_set_super_total_bytes(fs_info->super_copy,
2519 orig_super_total_bytes);
2520 btrfs_set_super_num_devices(fs_info->super_copy,
2521 orig_super_num_devices);
2522 mutex_unlock(&fs_info->chunk_mutex);
2523 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2597error_trans: 2524error_trans:
2598 if (seeding_dev) 2525 if (seeding_dev)
2599 sb->s_flags |= SB_RDONLY; 2526 sb->s_flags |= SB_RDONLY;
@@ -2697,9 +2624,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2697 return btrfs_update_device(trans, device); 2624 return btrfs_update_device(trans, device);
2698} 2625}
2699 2626
2700static int btrfs_free_chunk(struct btrfs_trans_handle *trans, 2627static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2701 struct btrfs_fs_info *fs_info, u64 chunk_offset)
2702{ 2628{
2629 struct btrfs_fs_info *fs_info = trans->fs_info;
2703 struct btrfs_root *root = fs_info->chunk_root; 2630 struct btrfs_root *root = fs_info->chunk_root;
2704 int ret; 2631 int ret;
2705 struct btrfs_path *path; 2632 struct btrfs_path *path;
@@ -2808,9 +2735,9 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
2808 return em; 2735 return em;
2809} 2736}
2810 2737
2811int btrfs_remove_chunk(struct btrfs_trans_handle *trans, 2738int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2812 struct btrfs_fs_info *fs_info, u64 chunk_offset)
2813{ 2739{
2740 struct btrfs_fs_info *fs_info = trans->fs_info;
2814 struct extent_map *em; 2741 struct extent_map *em;
2815 struct map_lookup *map; 2742 struct map_lookup *map;
2816 u64 dev_extent_len = 0; 2743 u64 dev_extent_len = 0;
@@ -2829,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2829 } 2756 }
2830 map = em->map_lookup; 2757 map = em->map_lookup;
2831 mutex_lock(&fs_info->chunk_mutex); 2758 mutex_lock(&fs_info->chunk_mutex);
2832 check_system_chunk(trans, fs_info, map->type); 2759 check_system_chunk(trans, map->type);
2833 mutex_unlock(&fs_info->chunk_mutex); 2760 mutex_unlock(&fs_info->chunk_mutex);
2834 2761
2835 /* 2762 /*
@@ -2869,7 +2796,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2869 } 2796 }
2870 mutex_unlock(&fs_devices->device_list_mutex); 2797 mutex_unlock(&fs_devices->device_list_mutex);
2871 2798
2872 ret = btrfs_free_chunk(trans, fs_info, chunk_offset); 2799 ret = btrfs_free_chunk(trans, chunk_offset);
2873 if (ret) { 2800 if (ret) {
2874 btrfs_abort_transaction(trans, ret); 2801 btrfs_abort_transaction(trans, ret);
2875 goto out; 2802 goto out;
@@ -2885,7 +2812,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2885 } 2812 }
2886 } 2813 }
2887 2814
2888 ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em); 2815 ret = btrfs_remove_block_group(trans, chunk_offset, em);
2889 if (ret) { 2816 if (ret) {
2890 btrfs_abort_transaction(trans, ret); 2817 btrfs_abort_transaction(trans, ret);
2891 goto out; 2818 goto out;
@@ -2950,7 +2877,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2950 * step two, delete the device extents and the 2877 * step two, delete the device extents and the
2951 * chunk tree entries 2878 * chunk tree entries
2952 */ 2879 */
2953 ret = btrfs_remove_chunk(trans, fs_info, chunk_offset); 2880 ret = btrfs_remove_chunk(trans, chunk_offset);
2954 btrfs_end_transaction(trans); 2881 btrfs_end_transaction(trans);
2955 return ret; 2882 return ret;
2956} 2883}
@@ -3059,7 +2986,7 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3059 if (IS_ERR(trans)) 2986 if (IS_ERR(trans))
3060 return PTR_ERR(trans); 2987 return PTR_ERR(trans);
3061 2988
3062 ret = btrfs_force_chunk_alloc(trans, fs_info, 2989 ret = btrfs_force_chunk_alloc(trans,
3063 BTRFS_BLOCK_GROUP_DATA); 2990 BTRFS_BLOCK_GROUP_DATA);
3064 btrfs_end_transaction(trans); 2991 btrfs_end_transaction(trans);
3065 if (ret < 0) 2992 if (ret < 0)
@@ -4692,7 +4619,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4692 4619
4693 if (type & BTRFS_BLOCK_GROUP_DATA) { 4620 if (type & BTRFS_BLOCK_GROUP_DATA) {
4694 max_stripe_size = SZ_1G; 4621 max_stripe_size = SZ_1G;
4695 max_chunk_size = 10 * max_stripe_size; 4622 max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
4696 if (!devs_max) 4623 if (!devs_max)
4697 devs_max = BTRFS_MAX_DEVS(info); 4624 devs_max = BTRFS_MAX_DEVS(info);
4698 } else if (type & BTRFS_BLOCK_GROUP_METADATA) { 4625 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -4900,7 +4827,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4900 refcount_inc(&em->refs); 4827 refcount_inc(&em->refs);
4901 write_unlock(&em_tree->lock); 4828 write_unlock(&em_tree->lock);
4902 4829
4903 ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); 4830 ret = btrfs_make_block_group(trans, 0, type, start, num_bytes);
4904 if (ret) 4831 if (ret)
4905 goto error_del_extent; 4832 goto error_del_extent;
4906 4833
@@ -4934,9 +4861,9 @@ error:
4934} 4861}
4935 4862
4936int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 4863int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
4937 struct btrfs_fs_info *fs_info, 4864 u64 chunk_offset, u64 chunk_size)
4938 u64 chunk_offset, u64 chunk_size)
4939{ 4865{
4866 struct btrfs_fs_info *fs_info = trans->fs_info;
4940 struct btrfs_root *extent_root = fs_info->extent_root; 4867 struct btrfs_root *extent_root = fs_info->extent_root;
4941 struct btrfs_root *chunk_root = fs_info->chunk_root; 4868 struct btrfs_root *chunk_root = fs_info->chunk_root;
4942 struct btrfs_key key; 4869 struct btrfs_key key;
@@ -5038,13 +4965,12 @@ out:
5038 * require modifying the chunk tree. This division is important for the 4965 * require modifying the chunk tree. This division is important for the
5039 * bootstrap process of adding storage to a seed btrfs. 4966 * bootstrap process of adding storage to a seed btrfs.
5040 */ 4967 */
5041int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 4968int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5042 struct btrfs_fs_info *fs_info, u64 type)
5043{ 4969{
5044 u64 chunk_offset; 4970 u64 chunk_offset;
5045 4971
5046 lockdep_assert_held(&fs_info->chunk_mutex); 4972 lockdep_assert_held(&trans->fs_info->chunk_mutex);
5047 chunk_offset = find_next_chunk(fs_info); 4973 chunk_offset = find_next_chunk(trans->fs_info);
5048 return __btrfs_alloc_chunk(trans, chunk_offset, type); 4974 return __btrfs_alloc_chunk(trans, chunk_offset, type);
5049} 4975}
5050 4976
@@ -5175,7 +5101,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5175 /* 5101 /*
5176 * There could be two corrupted data stripes, we need 5102 * There could be two corrupted data stripes, we need
5177 * to loop retry in order to rebuild the correct data. 5103 * to loop retry in order to rebuild the correct data.
5178 * 5104 *
5179 * Fail a stripe at a time on every retry except the 5105 * Fail a stripe at a time on every retry except the
5180 * stripe under reconstruction. 5106 * stripe under reconstruction.
5181 */ 5107 */
@@ -6187,21 +6113,11 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6187 btrfs_io_bio(bio)->stripe_index = dev_nr; 6113 btrfs_io_bio(bio)->stripe_index = dev_nr;
6188 bio->bi_end_io = btrfs_end_bio; 6114 bio->bi_end_io = btrfs_end_bio;
6189 bio->bi_iter.bi_sector = physical >> 9; 6115 bio->bi_iter.bi_sector = physical >> 9;
6190#ifdef DEBUG 6116 btrfs_debug_in_rcu(fs_info,
6191 { 6117 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6192 struct rcu_string *name; 6118 bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
6193 6119 (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
6194 rcu_read_lock(); 6120 bio->bi_iter.bi_size);
6195 name = rcu_dereference(dev->name);
6196 btrfs_debug(fs_info,
6197 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6198 bio_op(bio), bio->bi_opf,
6199 (u64)bio->bi_iter.bi_sector,
6200 (u_long)dev->bdev->bd_dev, name->str, dev->devid,
6201 bio->bi_iter.bi_size);
6202 rcu_read_unlock();
6203 }
6204#endif
6205 bio_set_dev(bio, dev->bdev); 6121 bio_set_dev(bio, dev->bdev);
6206 6122
6207 btrfs_bio_counter_inc_noblocked(fs_info); 6123 btrfs_bio_counter_inc_noblocked(fs_info);
@@ -6403,6 +6319,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
6403 u16 num_stripes; 6319 u16 num_stripes;
6404 u16 sub_stripes; 6320 u16 sub_stripes;
6405 u64 type; 6321 u64 type;
6322 u64 features;
6323 bool mixed = false;
6406 6324
6407 length = btrfs_chunk_length(leaf, chunk); 6325 length = btrfs_chunk_length(leaf, chunk);
6408 stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 6326 stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6441,6 +6359,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
6441 btrfs_chunk_type(leaf, chunk)); 6359 btrfs_chunk_type(leaf, chunk));
6442 return -EIO; 6360 return -EIO;
6443 } 6361 }
6362
6363 if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
6364 btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
6365 return -EIO;
6366 }
6367
6368 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
6369 (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
6370 btrfs_err(fs_info,
6371 "system chunk with data or metadata type: 0x%llx", type);
6372 return -EIO;
6373 }
6374
6375 features = btrfs_super_incompat_flags(fs_info->super_copy);
6376 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
6377 mixed = true;
6378
6379 if (!mixed) {
6380 if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
6381 (type & BTRFS_BLOCK_GROUP_DATA)) {
6382 btrfs_err(fs_info,
6383 "mixed chunk type in non-mixed mode: 0x%llx", type);
6384 return -EIO;
6385 }
6386 }
6387
6444 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || 6388 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
6445 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) || 6389 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
6446 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || 6390 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
@@ -6527,6 +6471,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
6527 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 6471 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6528 map->type = btrfs_chunk_type(leaf, chunk); 6472 map->type = btrfs_chunk_type(leaf, chunk);
6529 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 6473 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6474 map->verified_stripes = 0;
6530 for (i = 0; i < num_stripes; i++) { 6475 for (i = 0; i < num_stripes; i++) {
6531 map->stripes[i].physical = 6476 map->stripes[i].physical =
6532 btrfs_stripe_offset_nr(leaf, chunk, i); 6477 btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -6563,10 +6508,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
6563 write_lock(&map_tree->map_tree.lock); 6508 write_lock(&map_tree->map_tree.lock);
6564 ret = add_extent_mapping(&map_tree->map_tree, em, 0); 6509 ret = add_extent_mapping(&map_tree->map_tree, em, 0);
6565 write_unlock(&map_tree->map_tree.lock); 6510 write_unlock(&map_tree->map_tree.lock);
6566 BUG_ON(ret); /* Tree corruption */ 6511 if (ret < 0) {
6512 btrfs_err(fs_info,
6513 "failed to add chunk map, start=%llu len=%llu: %d",
6514 em->start, em->len, ret);
6515 }
6567 free_extent_map(em); 6516 free_extent_map(em);
6568 6517
6569 return 0; 6518 return ret;
6570} 6519}
6571 6520
6572static void fill_device_from_item(struct extent_buffer *leaf, 6521static void fill_device_from_item(struct extent_buffer *leaf,
@@ -7108,9 +7057,9 @@ out:
7108} 7057}
7109 7058
7110static int update_dev_stat_item(struct btrfs_trans_handle *trans, 7059static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7111 struct btrfs_fs_info *fs_info,
7112 struct btrfs_device *device) 7060 struct btrfs_device *device)
7113{ 7061{
7062 struct btrfs_fs_info *fs_info = trans->fs_info;
7114 struct btrfs_root *dev_root = fs_info->dev_root; 7063 struct btrfs_root *dev_root = fs_info->dev_root;
7115 struct btrfs_path *path; 7064 struct btrfs_path *path;
7116 struct btrfs_key key; 7065 struct btrfs_key key;
@@ -7203,7 +7152,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
7203 */ 7152 */
7204 smp_rmb(); 7153 smp_rmb();
7205 7154
7206 ret = update_dev_stat_item(trans, fs_info, device); 7155 ret = update_dev_stat_item(trans, device);
7207 if (!ret) 7156 if (!ret)
7208 atomic_sub(stats_cnt, &device->dev_stats_ccnt); 7157 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7209 } 7158 }
@@ -7382,3 +7331,197 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
7382 fs_devices = fs_devices->seed; 7331 fs_devices = fs_devices->seed;
7383 } 7332 }
7384} 7333}
7334
7335/*
7336 * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
7337 */
7338int btrfs_bg_type_to_factor(u64 flags)
7339{
7340 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
7341 BTRFS_BLOCK_GROUP_RAID10))
7342 return 2;
7343 return 1;
7344}
7345
7346
7347static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
7348{
7349 int index = btrfs_bg_flags_to_raid_index(type);
7350 int ncopies = btrfs_raid_array[index].ncopies;
7351 int data_stripes;
7352
7353 switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
7354 case BTRFS_BLOCK_GROUP_RAID5:
7355 data_stripes = num_stripes - 1;
7356 break;
7357 case BTRFS_BLOCK_GROUP_RAID6:
7358 data_stripes = num_stripes - 2;
7359 break;
7360 default:
7361 data_stripes = num_stripes / ncopies;
7362 break;
7363 }
7364 return div_u64(chunk_len, data_stripes);
7365}
7366
7367static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7368 u64 chunk_offset, u64 devid,
7369 u64 physical_offset, u64 physical_len)
7370{
7371 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
7372 struct extent_map *em;
7373 struct map_lookup *map;
7374 u64 stripe_len;
7375 bool found = false;
7376 int ret = 0;
7377 int i;
7378
7379 read_lock(&em_tree->lock);
7380 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7381 read_unlock(&em_tree->lock);
7382
7383 if (!em) {
7384 btrfs_err(fs_info,
7385"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7386 physical_offset, devid);
7387 ret = -EUCLEAN;
7388 goto out;
7389 }
7390
7391 map = em->map_lookup;
7392 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7393 if (physical_len != stripe_len) {
7394 btrfs_err(fs_info,
7395"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7396 physical_offset, devid, em->start, physical_len,
7397 stripe_len);
7398 ret = -EUCLEAN;
7399 goto out;
7400 }
7401
7402 for (i = 0; i < map->num_stripes; i++) {
7403 if (map->stripes[i].dev->devid == devid &&
7404 map->stripes[i].physical == physical_offset) {
7405 found = true;
7406 if (map->verified_stripes >= map->num_stripes) {
7407 btrfs_err(fs_info,
7408 "too many dev extents for chunk %llu found",
7409 em->start);
7410 ret = -EUCLEAN;
7411 goto out;
7412 }
7413 map->verified_stripes++;
7414 break;
7415 }
7416 }
7417 if (!found) {
7418 btrfs_err(fs_info,
7419 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7420 physical_offset, devid);
7421 ret = -EUCLEAN;
7422 }
7423out:
7424 free_extent_map(em);
7425 return ret;
7426}
7427
7428static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
7429{
7430 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
7431 struct extent_map *em;
7432 struct rb_node *node;
7433 int ret = 0;
7434
7435 read_lock(&em_tree->lock);
7436 for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
7437 em = rb_entry(node, struct extent_map, rb_node);
7438 if (em->map_lookup->num_stripes !=
7439 em->map_lookup->verified_stripes) {
7440 btrfs_err(fs_info,
7441 "chunk %llu has missing dev extent, have %d expect %d",
7442 em->start, em->map_lookup->verified_stripes,
7443 em->map_lookup->num_stripes);
7444 ret = -EUCLEAN;
7445 goto out;
7446 }
7447 }
7448out:
7449 read_unlock(&em_tree->lock);
7450 return ret;
7451}
7452
7453/*
7454 * Ensure that all dev extents are mapped to correct chunk, otherwise
7455 * later chunk allocation/free would cause unexpected behavior.
7456 *
7457 * NOTE: This will iterate through the whole device tree, which should be of
7458 * the same size level as the chunk tree. This slightly increases mount time.
7459 */
7460int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
7461{
7462 struct btrfs_path *path;
7463 struct btrfs_root *root = fs_info->dev_root;
7464 struct btrfs_key key;
7465 int ret = 0;
7466
7467 key.objectid = 1;
7468 key.type = BTRFS_DEV_EXTENT_KEY;
7469 key.offset = 0;
7470
7471 path = btrfs_alloc_path();
7472 if (!path)
7473 return -ENOMEM;
7474
7475 path->reada = READA_FORWARD;
7476 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7477 if (ret < 0)
7478 goto out;
7479
7480 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7481 ret = btrfs_next_item(root, path);
7482 if (ret < 0)
7483 goto out;
7484 /* No dev extents at all? Not good */
7485 if (ret > 0) {
7486 ret = -EUCLEAN;
7487 goto out;
7488 }
7489 }
7490 while (1) {
7491 struct extent_buffer *leaf = path->nodes[0];
7492 struct btrfs_dev_extent *dext;
7493 int slot = path->slots[0];
7494 u64 chunk_offset;
7495 u64 physical_offset;
7496 u64 physical_len;
7497 u64 devid;
7498
7499 btrfs_item_key_to_cpu(leaf, &key, slot);
7500 if (key.type != BTRFS_DEV_EXTENT_KEY)
7501 break;
7502 devid = key.objectid;
7503 physical_offset = key.offset;
7504
7505 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7506 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
7507 physical_len = btrfs_dev_extent_length(leaf, dext);
7508
7509 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
7510 physical_offset, physical_len);
7511 if (ret < 0)
7512 goto out;
7513 ret = btrfs_next_item(root, path);
7514 if (ret < 0)
7515 goto out;
7516 if (ret > 0) {
7517 ret = 0;
7518 break;
7519 }
7520 }
7521
7522 /* Ensure all chunks have corresponding dev extents */
7523 ret = verify_chunk_dev_extent_mapping(fs_info);
7524out:
7525 btrfs_free_path(path);
7526 return ret;
7527}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139ec8daf4c..23e9285d88de 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -11,6 +11,8 @@
11#include <linux/btrfs.h> 11#include <linux/btrfs.h>
12#include "async-thread.h" 12#include "async-thread.h"
13 13
14#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
15
14extern struct mutex uuid_mutex; 16extern struct mutex uuid_mutex;
15 17
16#define BTRFS_STRIPE_LEN SZ_64K 18#define BTRFS_STRIPE_LEN SZ_64K
@@ -343,6 +345,7 @@ struct map_lookup {
343 u64 stripe_len; 345 u64 stripe_len;
344 int num_stripes; 346 int num_stripes;
345 int sub_stripes; 347 int sub_stripes;
348 int verified_stripes; /* For mount time dev extent verification */
346 struct btrfs_bio_stripe stripes[]; 349 struct btrfs_bio_stripe stripes[];
347}; 350};
348 351
@@ -382,8 +385,6 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
382 } 385 }
383} 386}
384 387
385int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
386 u64 end, u64 *length);
387void btrfs_get_bbio(struct btrfs_bio *bbio); 388void btrfs_get_bbio(struct btrfs_bio *bbio);
388void btrfs_put_bbio(struct btrfs_bio *bbio); 389void btrfs_put_bbio(struct btrfs_bio *bbio);
389int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, 390int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
@@ -396,20 +397,19 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
396 u64 physical, u64 **logical, int *naddrs, int *stripe_len); 397 u64 physical, u64 **logical, int *naddrs, int *stripe_len);
397int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); 398int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
398int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); 399int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
399int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 400int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
400 struct btrfs_fs_info *fs_info, u64 type);
401void btrfs_mapping_init(struct btrfs_mapping_tree *tree); 401void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
402void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); 402void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
403blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, 403blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
404 int mirror_num, int async_submit); 404 int mirror_num, int async_submit);
405int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 405int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
406 fmode_t flags, void *holder); 406 fmode_t flags, void *holder);
407int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 407struct btrfs_device *btrfs_scan_one_device(const char *path,
408 struct btrfs_fs_devices **fs_devices_ret); 408 fmode_t flags, void *holder);
409int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 409int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
410void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step); 410void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
411void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, 411void btrfs_assign_next_active_device(struct btrfs_device *device,
412 struct btrfs_device *device, struct btrfs_device *this_dev); 412 struct btrfs_device *this_dev);
413int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, 413int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
414 const char *device_path, 414 const char *device_path,
415 struct btrfs_device **device); 415 struct btrfs_device **device);
@@ -453,22 +453,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
453int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); 453int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
454int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, 454int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
455 struct btrfs_fs_info *fs_info); 455 struct btrfs_fs_info *fs_info);
456void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, 456void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
457 struct btrfs_device *srcdev);
458void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, 457void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
459 struct btrfs_device *srcdev); 458 struct btrfs_device *srcdev);
460void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, 459void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
461 struct btrfs_device *tgtdev);
462void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 460void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
463int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 461int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
464 u64 logical, u64 len); 462 u64 logical, u64 len);
465unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 463unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
466 u64 logical); 464 u64 logical);
467int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 465int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
468 struct btrfs_fs_info *fs_info, 466 u64 chunk_offset, u64 chunk_size);
469 u64 chunk_offset, u64 chunk_size); 467int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
470int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
471 struct btrfs_fs_info *fs_info, u64 chunk_offset);
472 468
473static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, 469static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
474 int index) 470 int index)
@@ -560,4 +556,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
560bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, 556bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
561 struct btrfs_device *failing_dev); 557 struct btrfs_device *failing_dev);
562 558
559int btrfs_bg_type_to_factor(u64 flags);
560int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
561
563#endif 562#endif
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 39b94ec965be..b401c4e36394 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(
374 __entry->extent_type = btrfs_file_extent_type(l, fi); 374 __entry->extent_type = btrfs_file_extent_type(l, fi);
375 __entry->compression = btrfs_file_extent_compression(l, fi); 375 __entry->compression = btrfs_file_extent_compression(l, fi);
376 __entry->extent_start = start; 376 __entry->extent_start = start;
377 __entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi)); 377 __entry->extent_end = (start + btrfs_file_extent_ram_bytes(l, fi));
378 ), 378 ),
379 379
380 TP_printk_btrfs( 380 TP_printk_btrfs(
@@ -433,7 +433,6 @@ DEFINE_EVENT(
433 { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \ 433 { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \
434 { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \ 434 { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \
435 { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \ 435 { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \
436 { (1 << BTRFS_ORDERED_LOGGED_CSUM), "LOGGED_CSUM" }, \
437 { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" }) 436 { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" })
438 437
439 438