aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-ref.c34
-rw-r--r--fs/btrfs/delayed-ref.h3
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c185
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c36
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c83
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/qgroup.c62
-rw-r--r--fs/btrfs/qgroup.h36
-rw-r--r--fs/btrfs/relocation.c126
-rw-r--r--fs/btrfs/root-tree.c27
-rw-r--r--fs/btrfs/send.c173
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/tree-log.c106
-rw-r--r--fs/btrfs/tree-log.h5
-rw-r--r--fs/btrfs/volumes.c27
22 files changed, 756 insertions, 240 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2b88439c2ee8..455a6b2fd539 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
589 589
590 list_del(&ref2->list); 590 list_del(&ref2->list);
591 kmem_cache_free(btrfs_prelim_ref_cache, ref2); 591 kmem_cache_free(btrfs_prelim_ref_cache, ref2);
592 cond_resched();
592 } 593 }
593 594
594 } 595 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2fe8f89091a3..eff3993c77b3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1028,6 +1028,7 @@ struct btrfs_fs_info {
1028 struct btrfs_workqueue *qgroup_rescan_workers; 1028 struct btrfs_workqueue *qgroup_rescan_workers;
1029 struct completion qgroup_rescan_completion; 1029 struct completion qgroup_rescan_completion;
1030 struct btrfs_work qgroup_rescan_work; 1030 struct btrfs_work qgroup_rescan_work;
1031 bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
1031 1032
1032 /* filesystem state */ 1033 /* filesystem state */
1033 unsigned long fs_state; 1034 unsigned long fs_state;
@@ -1079,6 +1080,8 @@ struct btrfs_fs_info {
1079 struct list_head pinned_chunks; 1080 struct list_head pinned_chunks;
1080 1081
1081 int creating_free_space_tree; 1082 int creating_free_space_tree;
1083 /* Used to record internally whether fs has been frozen */
1084 int fs_frozen;
1082}; 1085};
1083 1086
1084struct btrfs_subvolume_writers { 1087struct btrfs_subvolume_writers {
@@ -2578,7 +2581,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
2578 struct btrfs_root *root, 2581 struct btrfs_root *root,
2579 u64 root_objectid, u64 owner, u64 offset, 2582 u64 root_objectid, u64 owner, u64 offset,
2580 struct btrfs_key *ins); 2583 struct btrfs_key *ins);
2581int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 2584int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
2582 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 2585 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
2583 struct btrfs_key *ins, int is_data, int delalloc); 2586 struct btrfs_key *ins, int is_data, int delalloc);
2584int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2587int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b6d210e7a993..ac02e041464b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
541 struct btrfs_delayed_ref_head *existing; 541 struct btrfs_delayed_ref_head *existing;
542 struct btrfs_delayed_ref_head *head_ref = NULL; 542 struct btrfs_delayed_ref_head *head_ref = NULL;
543 struct btrfs_delayed_ref_root *delayed_refs; 543 struct btrfs_delayed_ref_root *delayed_refs;
544 struct btrfs_qgroup_extent_record *qexisting;
545 int count_mod = 1; 544 int count_mod = 1;
546 int must_insert_reserved = 0; 545 int must_insert_reserved = 0;
547 546
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
606 qrecord->num_bytes = num_bytes; 605 qrecord->num_bytes = num_bytes;
607 qrecord->old_roots = NULL; 606 qrecord->old_roots = NULL;
608 607
609 qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, 608 if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
610 delayed_refs, 609 delayed_refs, qrecord))
611 qrecord);
612 if (qexisting)
613 kfree(qrecord); 610 kfree(qrecord);
614 } 611 }
615 612
@@ -862,33 +859,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
862 return 0; 859 return 0;
863} 860}
864 861
865int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
866 struct btrfs_trans_handle *trans,
867 u64 ref_root, u64 bytenr, u64 num_bytes)
868{
869 struct btrfs_delayed_ref_root *delayed_refs;
870 struct btrfs_delayed_ref_head *ref_head;
871 int ret = 0;
872
873 if (!fs_info->quota_enabled || !is_fstree(ref_root))
874 return 0;
875
876 delayed_refs = &trans->transaction->delayed_refs;
877
878 spin_lock(&delayed_refs->lock);
879 ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
880 if (!ref_head) {
881 ret = -ENOENT;
882 goto out;
883 }
884 WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
885 ref_head->qgroup_ref_root = ref_root;
886 ref_head->qgroup_reserved = num_bytes;
887out:
888 spin_unlock(&delayed_refs->lock);
889 return ret;
890}
891
892int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 862int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
893 struct btrfs_trans_handle *trans, 863 struct btrfs_trans_handle *trans,
894 u64 bytenr, u64 num_bytes, 864 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5fca9534a271..43f3629760e9 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
250 u64 parent, u64 ref_root, 250 u64 parent, u64 ref_root,
251 u64 owner, u64 offset, u64 reserved, int action, 251 u64 owner, u64 offset, u64 reserved, int action,
252 struct btrfs_delayed_extent_op *extent_op); 252 struct btrfs_delayed_extent_op *extent_op);
253int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
254 struct btrfs_trans_handle *trans,
255 u64 ref_root, u64 bytenr, u64 num_bytes);
256int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, 253int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
257 struct btrfs_trans_handle *trans, 254 struct btrfs_trans_handle *trans,
258 u64 bytenr, u64 num_bytes, 255 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59febfb8d04a..54bc8c7c6bcd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
559 u32 nritems = btrfs_header_nritems(leaf); 559 u32 nritems = btrfs_header_nritems(leaf);
560 int slot; 560 int slot;
561 561
562 if (nritems == 0) 562 if (nritems == 0) {
563 struct btrfs_root *check_root;
564
565 key.objectid = btrfs_header_owner(leaf);
566 key.type = BTRFS_ROOT_ITEM_KEY;
567 key.offset = (u64)-1;
568
569 check_root = btrfs_get_fs_root(root->fs_info, &key, false);
570 /*
571 * The only reason we also check NULL here is that during
572 * open_ctree() some roots has not yet been set up.
573 */
574 if (!IS_ERR_OR_NULL(check_root)) {
575 /* if leaf is the root, then it's fine */
576 if (leaf->start !=
577 btrfs_root_bytenr(&check_root->root_item)) {
578 CORRUPT("non-root leaf's nritems is 0",
579 leaf, root, 0);
580 return -EIO;
581 }
582 }
563 return 0; 583 return 0;
584 }
564 585
565 /* Check the 0 item */ 586 /* Check the 0 item */
566 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != 587 if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
612 return 0; 633 return 0;
613} 634}
614 635
636static int check_node(struct btrfs_root *root, struct extent_buffer *node)
637{
638 unsigned long nr = btrfs_header_nritems(node);
639
640 if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
641 btrfs_crit(root->fs_info,
642 "corrupt node: block %llu root %llu nritems %lu",
643 node->start, root->objectid, nr);
644 return -EIO;
645 }
646 return 0;
647}
648
615static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, 649static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
616 u64 phy_offset, struct page *page, 650 u64 phy_offset, struct page *page,
617 u64 start, u64 end, int mirror) 651 u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
682 ret = -EIO; 716 ret = -EIO;
683 } 717 }
684 718
719 if (found_level > 0 && check_node(root, eb))
720 ret = -EIO;
721
685 if (!ret) 722 if (!ret)
686 set_extent_buffer_uptodate(eb); 723 set_extent_buffer_uptodate(eb);
687err: 724err:
@@ -1618,8 +1655,8 @@ fail:
1618 return ret; 1655 return ret;
1619} 1656}
1620 1657
1621static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1658struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1622 u64 root_id) 1659 u64 root_id)
1623{ 1660{
1624 struct btrfs_root *root; 1661 struct btrfs_root *root;
1625 1662
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2298 fs_info->quota_enabled = 0; 2335 fs_info->quota_enabled = 0;
2299 fs_info->pending_quota_state = 0; 2336 fs_info->pending_quota_state = 0;
2300 fs_info->qgroup_ulist = NULL; 2337 fs_info->qgroup_ulist = NULL;
2338 fs_info->qgroup_rescan_running = false;
2301 mutex_init(&fs_info->qgroup_rescan_lock); 2339 mutex_init(&fs_info->qgroup_rescan_lock);
2302} 2340}
2303 2341
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
2624 atomic_set(&fs_info->qgroup_op_seq, 0); 2662 atomic_set(&fs_info->qgroup_op_seq, 0);
2625 atomic_set(&fs_info->reada_works_cnt, 0); 2663 atomic_set(&fs_info->reada_works_cnt, 0);
2626 atomic64_set(&fs_info->tree_mod_seq, 0); 2664 atomic64_set(&fs_info->tree_mod_seq, 0);
2665 fs_info->fs_frozen = 0;
2627 fs_info->sb = sb; 2666 fs_info->sb = sb;
2628 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2667 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
2629 fs_info->metadata_ratio = 0; 2668 fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3739 if (btrfs_root_refs(&root->root_item) == 0) 3778 if (btrfs_root_refs(&root->root_item) == 0)
3740 synchronize_srcu(&fs_info->subvol_srcu); 3779 synchronize_srcu(&fs_info->subvol_srcu);
3741 3780
3742 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3781 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
3743 btrfs_free_log(NULL, root); 3782 btrfs_free_log(NULL, root);
3783 if (root->reloc_root) {
3784 free_extent_buffer(root->reloc_root->node);
3785 free_extent_buffer(root->reloc_root->commit_root);
3786 btrfs_put_fs_root(root->reloc_root);
3787 root->reloc_root = NULL;
3788 }
3789 }
3744 3790
3745 if (root->free_ino_pinned) 3791 if (root->free_ino_pinned)
3746 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3792 __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
3851 smp_mb(); 3897 smp_mb();
3852 3898
3853 /* wait for the qgroup rescan worker to stop */ 3899 /* wait for the qgroup rescan worker to stop */
3854 btrfs_qgroup_wait_for_completion(fs_info); 3900 btrfs_qgroup_wait_for_completion(fs_info, false);
3855 3901
3856 /* wait for the uuid_scan task to finish */ 3902 /* wait for the uuid_scan task to finish */
3857 down(&fs_info->uuid_tree_rescan_sem); 3903 down(&fs_info->uuid_tree_rescan_sem);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b3207a0e09f7..f19a982f5a4f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
68struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, 68struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
69 struct btrfs_key *location); 69 struct btrfs_key *location);
70int btrfs_init_fs_root(struct btrfs_root *root); 70int btrfs_init_fs_root(struct btrfs_root *root);
71struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
72 u64 root_id);
71int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, 73int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
72 struct btrfs_root *root); 74 struct btrfs_root *root);
73void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); 75void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 61b494e8e604..0450dc410533 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@ enum {
60 CHUNK_ALLOC_FORCE = 2, 60 CHUNK_ALLOC_FORCE = 2,
61}; 61};
62 62
63/*
64 * Control how reservations are dealt with.
65 *
66 * RESERVE_FREE - freeing a reservation.
67 * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
68 * ENOSPC accounting
69 * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
70 * bytes_may_use as the ENOSPC accounting is done elsewhere
71 */
72enum {
73 RESERVE_FREE = 0,
74 RESERVE_ALLOC = 1,
75 RESERVE_ALLOC_NO_ACCOUNT = 2,
76};
77
78static int update_block_group(struct btrfs_trans_handle *trans, 63static int update_block_group(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, u64 bytenr, 64 struct btrfs_root *root, u64 bytenr,
80 u64 num_bytes, int alloc); 65 u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key); 89 struct btrfs_key *key);
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 90static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups); 91 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 92static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve, 93 u64 ram_bytes, u64 num_bytes, int delalloc);
109 int delalloc); 94static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
95 u64 num_bytes, int delalloc);
110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 96static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes); 97 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root, 98int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@ again:
3501 dcs = BTRFS_DC_SETUP; 3487 dcs = BTRFS_DC_SETUP;
3502 else if (ret == -ENOSPC) 3488 else if (ret == -ENOSPC)
3503 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); 3489 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3504 btrfs_free_reserved_data_space(inode, 0, num_pages);
3505 3490
3506out_put: 3491out_put:
3507 iput(inode); 3492 iput(inode);
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
4472 } 4457 }
4473} 4458}
4474 4459
4460/*
4461 * If force is CHUNK_ALLOC_FORCE:
4462 * - return 1 if it successfully allocates a chunk,
4463 * - return errors including -ENOSPC otherwise.
4464 * If force is NOT CHUNK_ALLOC_FORCE:
4465 * - return 0 if it doesn't need to allocate a new chunk,
4466 * - return 1 if it successfully allocates a chunk,
4467 * - return errors including -ENOSPC otherwise.
4468 */
4475static int do_chunk_alloc(struct btrfs_trans_handle *trans, 4469static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4476 struct btrfs_root *extent_root, u64 flags, int force) 4470 struct btrfs_root *extent_root, u64 flags, int force)
4477{ 4471{
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root,
4882 btrfs_get_alloc_profile(root, 0), 4876 btrfs_get_alloc_profile(root, 0),
4883 CHUNK_ALLOC_NO_FORCE); 4877 CHUNK_ALLOC_NO_FORCE);
4884 btrfs_end_transaction(trans, root); 4878 btrfs_end_transaction(trans, root);
4885 if (ret == -ENOSPC) 4879 if (ret > 0 || ret == -ENOSPC)
4886 ret = 0; 4880 ret = 0;
4887 break; 4881 break;
4888 case COMMIT_TRANS: 4882 case COMMIT_TRANS:
@@ -6497,19 +6491,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6497} 6491}
6498 6492
6499/** 6493/**
6500 * btrfs_update_reserved_bytes - update the block_group and space info counters 6494 * btrfs_add_reserved_bytes - update the block_group and space info counters
6501 * @cache: The cache we are manipulating 6495 * @cache: The cache we are manipulating
6496 * @ram_bytes: The number of bytes of file content, and will be same to
6497 * @num_bytes except for the compress path.
6502 * @num_bytes: The number of bytes in question 6498 * @num_bytes: The number of bytes in question
6503 * @reserve: One of the reservation enums
6504 * @delalloc: The blocks are allocated for the delalloc write 6499 * @delalloc: The blocks are allocated for the delalloc write
6505 * 6500 *
6506 * This is called by the allocator when it reserves space, or by somebody who is 6501 * This is called by the allocator when it reserves space. Metadata
6507 * freeing space that was never actually used on disk. For example if you 6502 * reservations should be called with RESERVE_ALLOC so we do the proper
6508 * reserve some space for a new leaf in transaction A and before transaction A
6509 * commits you free that leaf, you call this with reserve set to 0 in order to
6510 * clear the reservation.
6511 *
6512 * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
6513 * ENOSPC accounting. For data we handle the reservation through clearing the 6503 * ENOSPC accounting. For data we handle the reservation through clearing the
6514 * delalloc bits in the io_tree. We have to do this since we could end up 6504 * delalloc bits in the io_tree. We have to do this since we could end up
6515 * allocating less disk space for the amount of data we have reserved in the 6505 * allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6509,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6519 * make the reservation and return -EAGAIN, otherwise this function always 6509 * make the reservation and return -EAGAIN, otherwise this function always
6520 * succeeds. 6510 * succeeds.
6521 */ 6511 */
6522static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 6512static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6523 u64 num_bytes, int reserve, int delalloc) 6513 u64 ram_bytes, u64 num_bytes, int delalloc)
6524{ 6514{
6525 struct btrfs_space_info *space_info = cache->space_info; 6515 struct btrfs_space_info *space_info = cache->space_info;
6526 int ret = 0; 6516 int ret = 0;
6527 6517
6528 spin_lock(&space_info->lock); 6518 spin_lock(&space_info->lock);
6529 spin_lock(&cache->lock); 6519 spin_lock(&cache->lock);
6530 if (reserve != RESERVE_FREE) { 6520 if (cache->ro) {
6531 if (cache->ro) { 6521 ret = -EAGAIN;
6532 ret = -EAGAIN;
6533 } else {
6534 cache->reserved += num_bytes;
6535 space_info->bytes_reserved += num_bytes;
6536 if (reserve == RESERVE_ALLOC) {
6537 trace_btrfs_space_reservation(cache->fs_info,
6538 "space_info", space_info->flags,
6539 num_bytes, 0);
6540 space_info->bytes_may_use -= num_bytes;
6541 }
6542
6543 if (delalloc)
6544 cache->delalloc_bytes += num_bytes;
6545 }
6546 } else { 6522 } else {
6547 if (cache->ro) 6523 cache->reserved += num_bytes;
6548 space_info->bytes_readonly += num_bytes; 6524 space_info->bytes_reserved += num_bytes;
6549 cache->reserved -= num_bytes;
6550 space_info->bytes_reserved -= num_bytes;
6551 6525
6526 trace_btrfs_space_reservation(cache->fs_info,
6527 "space_info", space_info->flags,
6528 ram_bytes, 0);
6529 space_info->bytes_may_use -= ram_bytes;
6552 if (delalloc) 6530 if (delalloc)
6553 cache->delalloc_bytes -= num_bytes; 6531 cache->delalloc_bytes += num_bytes;
6554 } 6532 }
6555 spin_unlock(&cache->lock); 6533 spin_unlock(&cache->lock);
6556 spin_unlock(&space_info->lock); 6534 spin_unlock(&space_info->lock);
6557 return ret; 6535 return ret;
6558} 6536}
6559 6537
6538/**
6539 * btrfs_free_reserved_bytes - update the block_group and space info counters
6540 * @cache: The cache we are manipulating
6541 * @num_bytes: The number of bytes in question
6542 * @delalloc: The blocks are allocated for the delalloc write
6543 *
6544 * This is called by somebody who is freeing space that was never actually used
6545 * on disk. For example if you reserve some space for a new leaf in transaction
6546 * A and before transaction A commits you free that leaf, you call this with
6547 * reserve set to 0 in order to clear the reservation.
6548 */
6549
6550static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6551 u64 num_bytes, int delalloc)
6552{
6553 struct btrfs_space_info *space_info = cache->space_info;
6554 int ret = 0;
6555
6556 spin_lock(&space_info->lock);
6557 spin_lock(&cache->lock);
6558 if (cache->ro)
6559 space_info->bytes_readonly += num_bytes;
6560 cache->reserved -= num_bytes;
6561 space_info->bytes_reserved -= num_bytes;
6562
6563 if (delalloc)
6564 cache->delalloc_bytes -= num_bytes;
6565 spin_unlock(&cache->lock);
6566 spin_unlock(&space_info->lock);
6567 return ret;
6568}
6560void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 6569void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6561 struct btrfs_root *root) 6570 struct btrfs_root *root)
6562{ 6571{
@@ -7191,7 +7200,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7191 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 7200 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7192 7201
7193 btrfs_add_free_space(cache, buf->start, buf->len); 7202 btrfs_add_free_space(cache, buf->start, buf->len);
7194 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); 7203 btrfs_free_reserved_bytes(cache, buf->len, 0);
7195 btrfs_put_block_group(cache); 7204 btrfs_put_block_group(cache);
7196 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 7205 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
7197 pin = 0; 7206 pin = 0;
@@ -7416,9 +7425,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7416 * the free space extent currently. 7425 * the free space extent currently.
7417 */ 7426 */
7418static noinline int find_free_extent(struct btrfs_root *orig_root, 7427static noinline int find_free_extent(struct btrfs_root *orig_root,
7419 u64 num_bytes, u64 empty_size, 7428 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7420 u64 hint_byte, struct btrfs_key *ins, 7429 u64 hint_byte, struct btrfs_key *ins,
7421 u64 flags, int delalloc) 7430 u64 flags, int delalloc)
7422{ 7431{
7423 int ret = 0; 7432 int ret = 0;
7424 struct btrfs_root *root = orig_root->fs_info->extent_root; 7433 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7439,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
7430 struct btrfs_space_info *space_info; 7439 struct btrfs_space_info *space_info;
7431 int loop = 0; 7440 int loop = 0;
7432 int index = __get_raid_index(flags); 7441 int index = __get_raid_index(flags);
7433 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
7434 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
7435 bool failed_cluster_refill = false; 7442 bool failed_cluster_refill = false;
7436 bool failed_alloc = false; 7443 bool failed_alloc = false;
7437 bool use_cluster = true; 7444 bool use_cluster = true;
@@ -7763,8 +7770,8 @@ checks:
7763 search_start - offset); 7770 search_start - offset);
7764 BUG_ON(offset > search_start); 7771 BUG_ON(offset > search_start);
7765 7772
7766 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 7773 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7767 alloc_type, delalloc); 7774 num_bytes, delalloc);
7768 if (ret == -EAGAIN) { 7775 if (ret == -EAGAIN) {
7769 btrfs_add_free_space(block_group, offset, num_bytes); 7776 btrfs_add_free_space(block_group, offset, num_bytes);
7770 goto loop; 7777 goto loop;
@@ -7936,7 +7943,7 @@ again:
7936 up_read(&info->groups_sem); 7943 up_read(&info->groups_sem);
7937} 7944}
7938 7945
7939int btrfs_reserve_extent(struct btrfs_root *root, 7946int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
7940 u64 num_bytes, u64 min_alloc_size, 7947 u64 num_bytes, u64 min_alloc_size,
7941 u64 empty_size, u64 hint_byte, 7948 u64 empty_size, u64 hint_byte,
7942 struct btrfs_key *ins, int is_data, int delalloc) 7949 struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7955,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
7948 flags = btrfs_get_alloc_profile(root, is_data); 7955 flags = btrfs_get_alloc_profile(root, is_data);
7949again: 7956again:
7950 WARN_ON(num_bytes < root->sectorsize); 7957 WARN_ON(num_bytes < root->sectorsize);
7951 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 7958 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
7952 flags, delalloc); 7959 hint_byte, ins, flags, delalloc);
7953 if (!ret && !is_data) { 7960 if (!ret && !is_data) {
7954 btrfs_dec_block_group_reservations(root->fs_info, 7961 btrfs_dec_block_group_reservations(root->fs_info,
7955 ins->objectid); 7962 ins->objectid);
@@ -7958,6 +7965,7 @@ again:
7958 num_bytes = min(num_bytes >> 1, ins->offset); 7965 num_bytes = min(num_bytes >> 1, ins->offset);
7959 num_bytes = round_down(num_bytes, root->sectorsize); 7966 num_bytes = round_down(num_bytes, root->sectorsize);
7960 num_bytes = max(num_bytes, min_alloc_size); 7967 num_bytes = max(num_bytes, min_alloc_size);
7968 ram_bytes = num_bytes;
7961 if (num_bytes == min_alloc_size) 7969 if (num_bytes == min_alloc_size)
7962 final_tried = true; 7970 final_tried = true;
7963 goto again; 7971 goto again;
@@ -7995,7 +8003,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7995 if (btrfs_test_opt(root->fs_info, DISCARD)) 8003 if (btrfs_test_opt(root->fs_info, DISCARD))
7996 ret = btrfs_discard_extent(root, start, len, NULL); 8004 ret = btrfs_discard_extent(root, start, len, NULL);
7997 btrfs_add_free_space(cache, start, len); 8005 btrfs_add_free_space(cache, start, len);
7998 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 8006 btrfs_free_reserved_bytes(cache, len, delalloc);
7999 trace_btrfs_reserved_extent_free(root, start, len); 8007 trace_btrfs_reserved_extent_free(root, start, len);
8000 } 8008 }
8001 8009
@@ -8223,8 +8231,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8223 if (!block_group) 8231 if (!block_group)
8224 return -EINVAL; 8232 return -EINVAL;
8225 8233
8226 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 8234 ret = btrfs_add_reserved_bytes(block_group, ins->offset,
8227 RESERVE_ALLOC_NO_ACCOUNT, 0); 8235 ins->offset, 0);
8228 BUG_ON(ret); /* logic error */ 8236 BUG_ON(ret); /* logic error */
8229 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 8237 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
8230 0, owner, offset, ins, 1); 8238 0, owner, offset, ins, 1);
@@ -8368,7 +8376,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8368 if (IS_ERR(block_rsv)) 8376 if (IS_ERR(block_rsv))
8369 return ERR_CAST(block_rsv); 8377 return ERR_CAST(block_rsv);
8370 8378
8371 ret = btrfs_reserve_extent(root, blocksize, blocksize, 8379 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8372 empty_size, hint, &ins, 0, 0); 8380 empty_size, hint, &ins, 0, 0);
8373 if (ret) 8381 if (ret)
8374 goto out_unuse; 8382 goto out_unuse;
@@ -8521,35 +8529,6 @@ reada:
8521 wc->reada_slot = slot; 8529 wc->reada_slot = slot;
8522} 8530}
8523 8531
8524/*
8525 * These may not be seen by the usual inc/dec ref code so we have to
8526 * add them here.
8527 */
8528static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
8529 struct btrfs_root *root, u64 bytenr,
8530 u64 num_bytes)
8531{
8532 struct btrfs_qgroup_extent_record *qrecord;
8533 struct btrfs_delayed_ref_root *delayed_refs;
8534
8535 qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
8536 if (!qrecord)
8537 return -ENOMEM;
8538
8539 qrecord->bytenr = bytenr;
8540 qrecord->num_bytes = num_bytes;
8541 qrecord->old_roots = NULL;
8542
8543 delayed_refs = &trans->transaction->delayed_refs;
8544 spin_lock(&delayed_refs->lock);
8545 if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
8546 delayed_refs, qrecord))
8547 kfree(qrecord);
8548 spin_unlock(&delayed_refs->lock);
8549
8550 return 0;
8551}
8552
8553static int account_leaf_items(struct btrfs_trans_handle *trans, 8532static int account_leaf_items(struct btrfs_trans_handle *trans,
8554 struct btrfs_root *root, 8533 struct btrfs_root *root,
8555 struct extent_buffer *eb) 8534 struct extent_buffer *eb)
@@ -8583,7 +8562,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
8583 8562
8584 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 8563 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8585 8564
8586 ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); 8565 ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
8566 bytenr, num_bytes, GFP_NOFS);
8587 if (ret) 8567 if (ret)
8588 return ret; 8568 return ret;
8589 } 8569 }
@@ -8732,8 +8712,9 @@ walk_down:
8732 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 8712 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8733 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 8713 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
8734 8714
8735 ret = record_one_subtree_extent(trans, root, child_bytenr, 8715 ret = btrfs_qgroup_insert_dirty_extent(trans,
8736 root->nodesize); 8716 root->fs_info, child_bytenr,
8717 root->nodesize, GFP_NOFS);
8737 if (ret) 8718 if (ret)
8738 goto out; 8719 goto out;
8739 } 8720 }
@@ -9906,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root,
9906 } else { 9887 } else {
9907 ret = 0; 9888 ret = 0;
9908 } 9889 }
9890 free_extent_map(em);
9909 goto out; 9891 goto out;
9910 } 9892 }
9911 path->slots[0]++; 9893 path->slots[0]++;
@@ -9942,6 +9924,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9942 block_group->iref = 0; 9924 block_group->iref = 0;
9943 block_group->inode = NULL; 9925 block_group->inode = NULL;
9944 spin_unlock(&block_group->lock); 9926 spin_unlock(&block_group->lock);
9927 ASSERT(block_group->io_ctl.inode == NULL);
9945 iput(inode); 9928 iput(inode);
9946 last = block_group->key.objectid + block_group->key.offset; 9929 last = block_group->key.objectid + block_group->key.offset;
9947 btrfs_put_block_group(block_group); 9930 btrfs_put_block_group(block_group);
@@ -9999,6 +9982,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
9999 free_excluded_extents(info->extent_root, block_group); 9982 free_excluded_extents(info->extent_root, block_group);
10000 9983
10001 btrfs_remove_free_space_cache(block_group); 9984 btrfs_remove_free_space_cache(block_group);
9985 ASSERT(list_empty(&block_group->dirty_list));
9986 ASSERT(list_empty(&block_group->io_list));
9987 ASSERT(list_empty(&block_group->bg_list));
9988 ASSERT(atomic_read(&block_group->count) == 1);
10002 btrfs_put_block_group(block_group); 9989 btrfs_put_block_group(block_group);
10003 9990
10004 spin_lock(&info->block_group_cache_lock); 9991 spin_lock(&info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc2729a7612d..28cd88fccc7e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,6 +20,7 @@
20#define EXTENT_DAMAGED (1U << 14) 20#define EXTENT_DAMAGED (1U << 14)
21#define EXTENT_NORESERVE (1U << 15) 21#define EXTENT_NORESERVE (1U << 15)
22#define EXTENT_QGROUP_RESERVED (1U << 16) 22#define EXTENT_QGROUP_RESERVED (1U << 16)
23#define EXTENT_CLEAR_DATA_RESV (1U << 17)
23#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 24#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
24#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 25#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
25 26
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9404121fd5f7..fea31a4a6e36 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2033 */ 2033 */
2034 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 2034 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2035 &BTRFS_I(inode)->runtime_flags); 2035 &BTRFS_I(inode)->runtime_flags);
2036 /*
2037 * An ordered extent might have started before and completed
2038 * already with io errors, in which case the inode was not
2039 * updated and we end up here. So check the inode's mapping
2040 * flags for any errors that might have happened while doing
2041 * writeback of file data.
2042 */
2043 ret = btrfs_inode_check_errors(inode);
2036 inode_unlock(inode); 2044 inode_unlock(inode);
2037 goto out; 2045 goto out;
2038 } 2046 }
@@ -2062,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2062 } 2070 }
2063 trans->sync = true; 2071 trans->sync = true;
2064 2072
2065 btrfs_init_log_ctx(&ctx); 2073 btrfs_init_log_ctx(&ctx, inode);
2066 2074
2067 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); 2075 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2068 if (ret < 0) { 2076 if (ret < 0) {
@@ -2667,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2667 2675
2668 alloc_start = round_down(offset, blocksize); 2676 alloc_start = round_down(offset, blocksize);
2669 alloc_end = round_up(offset + len, blocksize); 2677 alloc_end = round_up(offset + len, blocksize);
2678 cur_offset = alloc_start;
2670 2679
2671 /* Make sure we aren't being give some crap mode */ 2680 /* Make sure we aren't being give some crap mode */
2672 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 2681 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2759,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2759 2768
2760 /* First, check if we exceed the qgroup limit */ 2769 /* First, check if we exceed the qgroup limit */
2761 INIT_LIST_HEAD(&reserve_list); 2770 INIT_LIST_HEAD(&reserve_list);
2762 cur_offset = alloc_start;
2763 while (1) { 2771 while (1) {
2764 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 2772 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2765 alloc_end - cur_offset, 0); 2773 alloc_end - cur_offset, 0);
@@ -2786,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
2786 last_byte - cur_offset); 2794 last_byte - cur_offset);
2787 if (ret < 0) 2795 if (ret < 0)
2788 break; 2796 break;
2797 } else {
2798 /*
2799 * Do not need to reserve unwritten extent for this
2800 * range, free reserved data space first, otherwise
2801 * it'll result in false ENOSPC error.
2802 */
2803 btrfs_free_reserved_data_space(inode, cur_offset,
2804 last_byte - cur_offset);
2789 } 2805 }
2790 free_extent_map(em); 2806 free_extent_map(em);
2791 cur_offset = last_byte; 2807 cur_offset = last_byte;
@@ -2803,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2803 range->start, 2819 range->start,
2804 range->len, 1 << inode->i_blkbits, 2820 range->len, 1 << inode->i_blkbits,
2805 offset + len, &alloc_hint); 2821 offset + len, &alloc_hint);
2822 else
2823 btrfs_free_reserved_data_space(inode, range->start,
2824 range->len);
2806 list_del(&range->list); 2825 list_del(&range->list);
2807 kfree(range); 2826 kfree(range);
2808 } 2827 }
@@ -2837,18 +2856,11 @@ out_unlock:
2837 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 2856 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2838 &cached_state, GFP_KERNEL); 2857 &cached_state, GFP_KERNEL);
2839out: 2858out:
2840 /*
2841 * As we waited the extent range, the data_rsv_map must be empty
2842 * in the range, as written data range will be released from it.
2843 * And for prealloacted extent, it will also be released when
2844 * its metadata is written.
2845 * So this is completely used as cleanup.
2846 */
2847 btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
2848 inode_unlock(inode); 2859 inode_unlock(inode);
2849 /* Let go of our reservation. */ 2860 /* Let go of our reservation. */
2850 btrfs_free_reserved_data_space(inode, alloc_start, 2861 if (ret != 0)
2851 alloc_end - alloc_start); 2862 btrfs_free_reserved_data_space(inode, alloc_start,
2863 alloc_end - cur_offset);
2852 return ret; 2864 return ret;
2853} 2865}
2854 2866
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index aa6fabaee72e..359ee861b5a4 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -495,10 +495,9 @@ again:
495 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, 495 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
496 prealloc, prealloc, &alloc_hint); 496 prealloc, prealloc, &alloc_hint);
497 if (ret) { 497 if (ret) {
498 btrfs_delalloc_release_space(inode, 0, prealloc); 498 btrfs_delalloc_release_metadata(inode, prealloc);
499 goto out_put; 499 goto out_put;
500 } 500 }
501 btrfs_free_reserved_data_space(inode, 0, prealloc);
502 501
503 ret = btrfs_write_out_ino_cache(root, trans, path, inode); 502 ret = btrfs_write_out_ino_cache(root, trans, path, inode);
504out_put: 503out_put:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2f5975954ccf..e6811c42e41e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -566,6 +566,8 @@ cont:
566 PAGE_SET_WRITEBACK | 566 PAGE_SET_WRITEBACK |
567 page_error_op | 567 page_error_op |
568 PAGE_END_WRITEBACK); 568 PAGE_END_WRITEBACK);
569 btrfs_free_reserved_data_space_noquota(inode, start,
570 end - start + 1);
569 goto free_pages_out; 571 goto free_pages_out;
570 } 572 }
571 } 573 }
@@ -742,7 +744,7 @@ retry:
742 lock_extent(io_tree, async_extent->start, 744 lock_extent(io_tree, async_extent->start,
743 async_extent->start + async_extent->ram_size - 1); 745 async_extent->start + async_extent->ram_size - 1);
744 746
745 ret = btrfs_reserve_extent(root, 747 ret = btrfs_reserve_extent(root, async_extent->ram_size,
746 async_extent->compressed_size, 748 async_extent->compressed_size,
747 async_extent->compressed_size, 749 async_extent->compressed_size,
748 0, alloc_hint, &ins, 1, 1); 750 0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
969 EXTENT_DEFRAG, PAGE_UNLOCK | 971 EXTENT_DEFRAG, PAGE_UNLOCK |
970 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | 972 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
971 PAGE_END_WRITEBACK); 973 PAGE_END_WRITEBACK);
972 974 btrfs_free_reserved_data_space_noquota(inode, start,
975 end - start + 1);
973 *nr_written = *nr_written + 976 *nr_written = *nr_written +
974 (end - start + PAGE_SIZE) / PAGE_SIZE; 977 (end - start + PAGE_SIZE) / PAGE_SIZE;
975 *page_started = 1; 978 *page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
989 unsigned long op; 992 unsigned long op;
990 993
991 cur_alloc_size = disk_num_bytes; 994 cur_alloc_size = disk_num_bytes;
992 ret = btrfs_reserve_extent(root, cur_alloc_size, 995 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
993 root->sectorsize, 0, alloc_hint, 996 root->sectorsize, 0, alloc_hint,
994 &ins, 1, 1); 997 &ins, 1, 1);
995 if (ret < 0) 998 if (ret < 0)
@@ -1489,8 +1492,10 @@ out_check:
1489 extent_clear_unlock_delalloc(inode, cur_offset, 1492 extent_clear_unlock_delalloc(inode, cur_offset,
1490 cur_offset + num_bytes - 1, 1493 cur_offset + num_bytes - 1,
1491 locked_page, EXTENT_LOCKED | 1494 locked_page, EXTENT_LOCKED |
1492 EXTENT_DELALLOC, PAGE_UNLOCK | 1495 EXTENT_DELALLOC |
1493 PAGE_SET_PRIVATE2); 1496 EXTENT_CLEAR_DATA_RESV,
1497 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1498
1494 if (!nolock && nocow) 1499 if (!nolock && nocow)
1495 btrfs_end_write_no_snapshoting(root); 1500 btrfs_end_write_no_snapshoting(root);
1496 cur_offset = extent_end; 1501 cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1807 return; 1812 return;
1808 1813
1809 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1814 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1810 && do_list && !(state->state & EXTENT_NORESERVE)) 1815 && do_list && !(state->state & EXTENT_NORESERVE)
1816 && (*bits & (EXTENT_DO_ACCOUNTING |
1817 EXTENT_CLEAR_DATA_RESV)))
1811 btrfs_free_reserved_data_space_noquota(inode, 1818 btrfs_free_reserved_data_space_noquota(inode,
1812 state->start, len); 1819 state->start, len);
1813 1820
@@ -3435,10 +3442,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3435 found_key.offset = 0; 3442 found_key.offset = 0;
3436 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 3443 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
3437 ret = PTR_ERR_OR_ZERO(inode); 3444 ret = PTR_ERR_OR_ZERO(inode);
3438 if (ret && ret != -ESTALE) 3445 if (ret && ret != -ENOENT)
3439 goto out; 3446 goto out;
3440 3447
3441 if (ret == -ESTALE && root == root->fs_info->tree_root) { 3448 if (ret == -ENOENT && root == root->fs_info->tree_root) {
3442 struct btrfs_root *dead_root; 3449 struct btrfs_root *dead_root;
3443 struct btrfs_fs_info *fs_info = root->fs_info; 3450 struct btrfs_fs_info *fs_info = root->fs_info;
3444 int is_dead_root = 0; 3451 int is_dead_root = 0;
@@ -3474,7 +3481,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3474 * Inode is already gone but the orphan item is still there, 3481 * Inode is already gone but the orphan item is still there,
3475 * kill the orphan item. 3482 * kill the orphan item.
3476 */ 3483 */
3477 if (ret == -ESTALE) { 3484 if (ret == -ENOENT) {
3478 trans = btrfs_start_transaction(root, 1); 3485 trans = btrfs_start_transaction(root, 1);
3479 if (IS_ERR(trans)) { 3486 if (IS_ERR(trans)) {
3480 ret = PTR_ERR(trans); 3487 ret = PTR_ERR(trans);
@@ -3633,7 +3640,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3633/* 3640/*
3634 * read an inode from the btree into the in-memory inode 3641 * read an inode from the btree into the in-memory inode
3635 */ 3642 */
3636static void btrfs_read_locked_inode(struct inode *inode) 3643static int btrfs_read_locked_inode(struct inode *inode)
3637{ 3644{
3638 struct btrfs_path *path; 3645 struct btrfs_path *path;
3639 struct extent_buffer *leaf; 3646 struct extent_buffer *leaf;
@@ -3652,14 +3659,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
3652 filled = true; 3659 filled = true;
3653 3660
3654 path = btrfs_alloc_path(); 3661 path = btrfs_alloc_path();
3655 if (!path) 3662 if (!path) {
3663 ret = -ENOMEM;
3656 goto make_bad; 3664 goto make_bad;
3665 }
3657 3666
3658 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3667 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3659 3668
3660 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3669 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3661 if (ret) 3670 if (ret) {
3671 if (ret > 0)
3672 ret = -ENOENT;
3662 goto make_bad; 3673 goto make_bad;
3674 }
3663 3675
3664 leaf = path->nodes[0]; 3676 leaf = path->nodes[0];
3665 3677
@@ -3812,11 +3824,12 @@ cache_acl:
3812 } 3824 }
3813 3825
3814 btrfs_update_iflags(inode); 3826 btrfs_update_iflags(inode);
3815 return; 3827 return 0;
3816 3828
3817make_bad: 3829make_bad:
3818 btrfs_free_path(path); 3830 btrfs_free_path(path);
3819 make_bad_inode(inode); 3831 make_bad_inode(inode);
3832 return ret;
3820} 3833}
3821 3834
3822/* 3835/*
@@ -4204,6 +4217,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4204 int err = 0; 4217 int err = 0;
4205 struct btrfs_root *root = BTRFS_I(dir)->root; 4218 struct btrfs_root *root = BTRFS_I(dir)->root;
4206 struct btrfs_trans_handle *trans; 4219 struct btrfs_trans_handle *trans;
4220 u64 last_unlink_trans;
4207 4221
4208 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) 4222 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4209 return -ENOTEMPTY; 4223 return -ENOTEMPTY;
@@ -4226,11 +4240,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4226 if (err) 4240 if (err)
4227 goto out; 4241 goto out;
4228 4242
4243 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4244
4229 /* now the directory is empty */ 4245 /* now the directory is empty */
4230 err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry), 4246 err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
4231 dentry->d_name.name, dentry->d_name.len); 4247 dentry->d_name.name, dentry->d_name.len);
4232 if (!err) 4248 if (!err) {
4233 btrfs_i_size_write(inode, 0); 4249 btrfs_i_size_write(inode, 0);
4250 /*
4251 * Propagate the last_unlink_trans value of the deleted dir to
4252 * its parent directory. This is to prevent an unrecoverable
4253 * log tree in the case we do something like this:
4254 * 1) create dir foo
4255 * 2) create snapshot under dir foo
4256 * 3) delete the snapshot
4257 * 4) rmdir foo
4258 * 5) mkdir foo
4259 * 6) fsync foo or some file inside foo
4260 */
4261 if (last_unlink_trans >= trans->transid)
4262 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4263 }
4234out: 4264out:
4235 btrfs_end_transaction(trans, root); 4265 btrfs_end_transaction(trans, root);
4236 btrfs_btree_balance_dirty(root); 4266 btrfs_btree_balance_dirty(root);
@@ -5606,7 +5636,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5606 return ERR_PTR(-ENOMEM); 5636 return ERR_PTR(-ENOMEM);
5607 5637
5608 if (inode->i_state & I_NEW) { 5638 if (inode->i_state & I_NEW) {
5609 btrfs_read_locked_inode(inode); 5639 int ret;
5640
5641 ret = btrfs_read_locked_inode(inode);
5610 if (!is_bad_inode(inode)) { 5642 if (!is_bad_inode(inode)) {
5611 inode_tree_add(inode); 5643 inode_tree_add(inode);
5612 unlock_new_inode(inode); 5644 unlock_new_inode(inode);
@@ -5615,7 +5647,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5615 } else { 5647 } else {
5616 unlock_new_inode(inode); 5648 unlock_new_inode(inode);
5617 iput(inode); 5649 iput(inode);
5618 inode = ERR_PTR(-ESTALE); 5650 ASSERT(ret < 0);
5651 inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
5619 } 5652 }
5620 } 5653 }
5621 5654
@@ -7225,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7225 int ret; 7258 int ret;
7226 7259
7227 alloc_hint = get_extent_allocation_hint(inode, start, len); 7260 alloc_hint = get_extent_allocation_hint(inode, start, len);
7228 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 7261 ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
7229 alloc_hint, &ins, 1, 1); 7262 alloc_hint, &ins, 1, 1);
7230 if (ret) 7263 if (ret)
7231 return ERR_PTR(ret); 7264 return ERR_PTR(ret);
@@ -7725,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7725 ret = PTR_ERR(em2); 7758 ret = PTR_ERR(em2);
7726 goto unlock_err; 7759 goto unlock_err;
7727 } 7760 }
7761 /*
7762 * For inode marked NODATACOW or extent marked PREALLOC,
7763 * use the existing or preallocated extent, so does not
7764 * need to adjust btrfs_space_info's bytes_may_use.
7765 */
7766 btrfs_free_reserved_data_space_noquota(inode,
7767 start, len);
7728 goto unlock; 7768 goto unlock;
7729 } 7769 }
7730 } 7770 }
@@ -7759,7 +7799,6 @@ unlock:
7759 i_size_write(inode, start + len); 7799 i_size_write(inode, start + len);
7760 7800
7761 adjust_dio_outstanding_extents(inode, dio_data, len); 7801 adjust_dio_outstanding_extents(inode, dio_data, len);
7762 btrfs_free_reserved_data_space(inode, start, len);
7763 WARN_ON(dio_data->reserve < len); 7802 WARN_ON(dio_data->reserve < len);
7764 dio_data->reserve -= len; 7803 dio_data->reserve -= len;
7765 dio_data->unsubmitted_oe_range_end = start + len; 7804 dio_data->unsubmitted_oe_range_end = start + len;
@@ -10280,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10280 u64 last_alloc = (u64)-1; 10319 u64 last_alloc = (u64)-1;
10281 int ret = 0; 10320 int ret = 0;
10282 bool own_trans = true; 10321 bool own_trans = true;
10322 u64 end = start + num_bytes - 1;
10283 10323
10284 if (trans) 10324 if (trans)
10285 own_trans = false; 10325 own_trans = false;
@@ -10301,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10301 * sized chunks. 10341 * sized chunks.
10302 */ 10342 */
10303 cur_bytes = min(cur_bytes, last_alloc); 10343 cur_bytes = min(cur_bytes, last_alloc);
10304 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 10344 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10305 *alloc_hint, &ins, 1, 0); 10345 min_size, 0, *alloc_hint, &ins, 1, 0);
10306 if (ret) { 10346 if (ret) {
10307 if (own_trans) 10347 if (own_trans)
10308 btrfs_end_transaction(trans, root); 10348 btrfs_end_transaction(trans, root);
@@ -10388,6 +10428,9 @@ next:
10388 if (own_trans) 10428 if (own_trans)
10389 btrfs_end_transaction(trans, root); 10429 btrfs_end_transaction(trans, root);
10390 } 10430 }
10431 if (cur_offset < end)
10432 btrfs_free_reserved_data_space(inode, cur_offset,
10433 end - cur_offset + 1);
10391 return ret; 10434 return ret;
10392} 10435}
10393 10436
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 14ed1e9e6bc8..b2a2da5893af 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
5084 if (!capable(CAP_SYS_ADMIN)) 5084 if (!capable(CAP_SYS_ADMIN))
5085 return -EPERM; 5085 return -EPERM;
5086 5086
5087 return btrfs_qgroup_wait_for_completion(root->fs_info); 5087 return btrfs_qgroup_wait_for_completion(root->fs_info, true);
5088} 5088}
5089 5089
5090static long _btrfs_ioctl_set_received_subvol(struct file *file, 5090static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93ee1c18ef9d..8db2e29fdcf4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
995 goto out; 995 goto out;
996 fs_info->quota_enabled = 0; 996 fs_info->quota_enabled = 0;
997 fs_info->pending_quota_state = 0; 997 fs_info->pending_quota_state = 0;
998 btrfs_qgroup_wait_for_completion(fs_info); 998 btrfs_qgroup_wait_for_completion(fs_info, false);
999 spin_lock(&fs_info->qgroup_lock); 999 spin_lock(&fs_info->qgroup_lock);
1000 quota_root = fs_info->quota_root; 1000 quota_root = fs_info->quota_root;
1001 fs_info->quota_root = NULL; 1001 fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
1453 return ret; 1453 return ret;
1454} 1454}
1455 1455
1456struct btrfs_qgroup_extent_record * 1456int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
1457btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 1457 struct btrfs_delayed_ref_root *delayed_refs,
1458 struct btrfs_delayed_ref_root *delayed_refs, 1458 struct btrfs_qgroup_extent_record *record)
1459 struct btrfs_qgroup_extent_record *record)
1460{ 1459{
1461 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1460 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1462 struct rb_node *parent_node = NULL; 1461 struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
1475 else if (bytenr > entry->bytenr) 1474 else if (bytenr > entry->bytenr)
1476 p = &(*p)->rb_right; 1475 p = &(*p)->rb_right;
1477 else 1476 else
1478 return entry; 1477 return 1;
1479 } 1478 }
1480 1479
1481 rb_link_node(&record->node, parent_node, p); 1480 rb_link_node(&record->node, parent_node, p);
1482 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1481 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
1483 return NULL; 1482 return 0;
1483}
1484
1485int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
1486 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
1487 gfp_t gfp_flag)
1488{
1489 struct btrfs_qgroup_extent_record *record;
1490 struct btrfs_delayed_ref_root *delayed_refs;
1491 int ret;
1492
1493 if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
1494 return 0;
1495 if (WARN_ON(trans == NULL))
1496 return -EINVAL;
1497 record = kmalloc(sizeof(*record), gfp_flag);
1498 if (!record)
1499 return -ENOMEM;
1500
1501 delayed_refs = &trans->transaction->delayed_refs;
1502 record->bytenr = bytenr;
1503 record->num_bytes = num_bytes;
1504 record->old_roots = NULL;
1505
1506 spin_lock(&delayed_refs->lock);
1507 ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
1508 record);
1509 spin_unlock(&delayed_refs->lock);
1510 if (ret > 0)
1511 kfree(record);
1512 return 0;
1484} 1513}
1485 1514
1486#define UPDATE_NEW 0 1515#define UPDATE_NEW 0
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2303 int err = -ENOMEM; 2332 int err = -ENOMEM;
2304 int ret = 0; 2333 int ret = 0;
2305 2334
2335 mutex_lock(&fs_info->qgroup_rescan_lock);
2336 fs_info->qgroup_rescan_running = true;
2337 mutex_unlock(&fs_info->qgroup_rescan_lock);
2338
2306 path = btrfs_alloc_path(); 2339 path = btrfs_alloc_path();
2307 if (!path) 2340 if (!path)
2308 goto out; 2341 goto out;
@@ -2369,6 +2402,9 @@ out:
2369 } 2402 }
2370 2403
2371done: 2404done:
2405 mutex_lock(&fs_info->qgroup_rescan_lock);
2406 fs_info->qgroup_rescan_running = false;
2407 mutex_unlock(&fs_info->qgroup_rescan_lock);
2372 complete_all(&fs_info->qgroup_rescan_completion); 2408 complete_all(&fs_info->qgroup_rescan_completion);
2373} 2409}
2374 2410
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2487 return 0; 2523 return 0;
2488} 2524}
2489 2525
2490int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2526int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
2527 bool interruptible)
2491{ 2528{
2492 int running; 2529 int running;
2493 int ret = 0; 2530 int ret = 0;
2494 2531
2495 mutex_lock(&fs_info->qgroup_rescan_lock); 2532 mutex_lock(&fs_info->qgroup_rescan_lock);
2496 spin_lock(&fs_info->qgroup_lock); 2533 spin_lock(&fs_info->qgroup_lock);
2497 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2534 running = fs_info->qgroup_rescan_running;
2498 spin_unlock(&fs_info->qgroup_lock); 2535 spin_unlock(&fs_info->qgroup_lock);
2499 mutex_unlock(&fs_info->qgroup_rescan_lock); 2536 mutex_unlock(&fs_info->qgroup_rescan_lock);
2500 2537
2501 if (running) 2538 if (!running)
2539 return 0;
2540
2541 if (interruptible)
2502 ret = wait_for_completion_interruptible( 2542 ret = wait_for_completion_interruptible(
2503 &fs_info->qgroup_rescan_completion); 2543 &fs_info->qgroup_rescan_completion);
2544 else
2545 wait_for_completion(&fs_info->qgroup_rescan_completion);
2504 2546
2505 return ret; 2547 return ret;
2506} 2548}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 710887c06aaf..1bc64c864b62 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
46 struct btrfs_fs_info *fs_info); 46 struct btrfs_fs_info *fs_info);
47int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 47int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
48void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 48void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
49int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); 49int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
50 bool interruptible);
50int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 51int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
51 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 52 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
52int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 53int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
63struct btrfs_delayed_extent_op; 64struct btrfs_delayed_extent_op;
64int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 65int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
65 struct btrfs_fs_info *fs_info); 66 struct btrfs_fs_info *fs_info);
66struct btrfs_qgroup_extent_record * 67/*
67btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, 68 * Insert one dirty extent record into @delayed_refs, informing qgroup to
68 struct btrfs_delayed_ref_root *delayed_refs, 69 * account that extent at commit trans time.
69 struct btrfs_qgroup_extent_record *record); 70 *
71 * No lock version, caller must acquire delayed ref lock and allocate memory.
72 *
73 * Return 0 for success insert
74 * Return >0 for existing record, caller can free @record safely.
75 * Error is not possible
76 */
77int btrfs_qgroup_insert_dirty_extent_nolock(
78 struct btrfs_fs_info *fs_info,
79 struct btrfs_delayed_ref_root *delayed_refs,
80 struct btrfs_qgroup_extent_record *record);
81
82/*
83 * Insert one dirty extent record into @delayed_refs, informing qgroup to
84 * account that extent at commit trans time.
85 *
86 * Better encapsulated version.
87 *
88 * Return 0 if the operation is done.
89 * Return <0 for error, like memory allocation failure or invalid parameter
90 * (NULL trans)
91 */
92int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
94 gfp_t gfp_flag);
95
70int 96int
71btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 97btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
72 struct btrfs_fs_info *fs_info, 98 struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5aea41b4..8a2c2a07987b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h" 32#include "free-space-cache.h"
33#include "inode-map.h" 33#include "inode-map.h"
34#include "qgroup.h"
34 35
35/* 36/*
36 * backref_node, mapping_node and tree_block start with this 37 * backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
3037 u64 num_bytes; 3038 u64 num_bytes;
3038 int nr = 0; 3039 int nr = 0;
3039 int ret = 0; 3040 int ret = 0;
3041 u64 prealloc_start = cluster->start - offset;
3042 u64 prealloc_end = cluster->end - offset;
3043 u64 cur_offset;
3040 3044
3041 BUG_ON(cluster->start != cluster->boundary[0]); 3045 BUG_ON(cluster->start != cluster->boundary[0]);
3042 inode_lock(inode); 3046 inode_lock(inode);
3043 3047
3044 ret = btrfs_check_data_free_space(inode, cluster->start, 3048 ret = btrfs_check_data_free_space(inode, prealloc_start,
3045 cluster->end + 1 - cluster->start); 3049 prealloc_end + 1 - prealloc_start);
3046 if (ret) 3050 if (ret)
3047 goto out; 3051 goto out;
3048 3052
3053 cur_offset = prealloc_start;
3049 while (nr < cluster->nr) { 3054 while (nr < cluster->nr) {
3050 start = cluster->boundary[nr] - offset; 3055 start = cluster->boundary[nr] - offset;
3051 if (nr + 1 < cluster->nr) 3056 if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
3055 3060
3056 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3061 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
3057 num_bytes = end + 1 - start; 3062 num_bytes = end + 1 - start;
3063 if (cur_offset < start)
3064 btrfs_free_reserved_data_space(inode, cur_offset,
3065 start - cur_offset);
3058 ret = btrfs_prealloc_file_range(inode, 0, start, 3066 ret = btrfs_prealloc_file_range(inode, 0, start,
3059 num_bytes, num_bytes, 3067 num_bytes, num_bytes,
3060 end + 1, &alloc_hint); 3068 end + 1, &alloc_hint);
3069 cur_offset = end + 1;
3061 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3070 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
3062 if (ret) 3071 if (ret)
3063 break; 3072 break;
3064 nr++; 3073 nr++;
3065 } 3074 }
3066 btrfs_free_reserved_data_space(inode, cluster->start, 3075 if (cur_offset < prealloc_end)
3067 cluster->end + 1 - cluster->start); 3076 btrfs_free_reserved_data_space(inode, cur_offset,
3077 prealloc_end + 1 - cur_offset);
3068out: 3078out:
3069 inode_unlock(inode); 3079 inode_unlock(inode);
3070 return ret; 3080 return ret;
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
3916 return 0; 3926 return 0;
3917} 3927}
3918 3928
3929/*
3930 * Qgroup fixer for data chunk relocation.
3931 * The data relocation is done in the following steps
3932 * 1) Copy data extents into data reloc tree
3933 * 2) Create tree reloc tree(special snapshot) for related subvolumes
3934 * 3) Modify file extents in tree reloc tree
3935 * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
3936 *
3937 * The problem is, data and tree reloc tree are not accounted to qgroup,
3938 * and 4) will only info qgroup to track tree blocks change, not file extents
3939 * in the tree blocks.
3940 *
3941 * The good news is, related data extents are all in data reloc tree, so we
3942 * only need to info qgroup to track all file extents in data reloc tree
3943 * before commit trans.
3944 */
3945static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
3946 struct reloc_control *rc)
3947{
3948 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3949 struct inode *inode = rc->data_inode;
3950 struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
3951 struct btrfs_path *path;
3952 struct btrfs_key key;
3953 int ret = 0;
3954
3955 if (!fs_info->quota_enabled)
3956 return 0;
3957
3958 /*
3959 * Only for stage where we update data pointers the qgroup fix is
3960 * valid.
3961 * For MOVING_DATA stage, we will miss the timing of swapping tree
3962 * blocks, and won't fix it.
3963 */
3964 if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
3965 return 0;
3966
3967 path = btrfs_alloc_path();
3968 if (!path)
3969 return -ENOMEM;
3970 key.objectid = btrfs_ino(inode);
3971 key.type = BTRFS_EXTENT_DATA_KEY;
3972 key.offset = 0;
3973
3974 ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
3975 if (ret < 0)
3976 goto out;
3977
3978 lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
3979 while (1) {
3980 struct btrfs_file_extent_item *fi;
3981
3982 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3983 if (key.objectid > btrfs_ino(inode))
3984 break;
3985 if (key.type != BTRFS_EXTENT_DATA_KEY)
3986 goto next;
3987 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3988 struct btrfs_file_extent_item);
3989 if (btrfs_file_extent_type(path->nodes[0], fi) !=
3990 BTRFS_FILE_EXTENT_REG)
3991 goto next;
3992 ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
3993 btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
3994 btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
3995 GFP_NOFS);
3996 if (ret < 0)
3997 break;
3998next:
3999 ret = btrfs_next_item(data_reloc_root, path);
4000 if (ret < 0)
4001 break;
4002 if (ret > 0) {
4003 ret = 0;
4004 break;
4005 }
4006 }
4007 unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
4008out:
4009 btrfs_free_path(path);
4010 return ret;
4011}
4012
3919static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 4013static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3920{ 4014{
3921 struct rb_root blocks = RB_ROOT; 4015 struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,16 @@ restart:
4102 4196
4103 /* get rid of pinned extents */ 4197 /* get rid of pinned extents */
4104 trans = btrfs_join_transaction(rc->extent_root); 4198 trans = btrfs_join_transaction(rc->extent_root);
4105 if (IS_ERR(trans)) 4199 if (IS_ERR(trans)) {
4106 err = PTR_ERR(trans); 4200 err = PTR_ERR(trans);
4107 else 4201 goto out_free;
4108 btrfs_commit_transaction(trans, rc->extent_root); 4202 }
4203 err = qgroup_fix_relocated_data_extents(trans, rc);
4204 if (err < 0) {
4205 btrfs_abort_transaction(trans, err);
4206 goto out_free;
4207 }
4208 btrfs_commit_transaction(trans, rc->extent_root);
4109out_free: 4209out_free:
4110 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4210 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
4111 btrfs_free_path(path); 4211 btrfs_free_path(path);
@@ -4468,10 +4568,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4468 unset_reloc_control(rc); 4568 unset_reloc_control(rc);
4469 4569
4470 trans = btrfs_join_transaction(rc->extent_root); 4570 trans = btrfs_join_transaction(rc->extent_root);
4471 if (IS_ERR(trans)) 4571 if (IS_ERR(trans)) {
4472 err = PTR_ERR(trans); 4572 err = PTR_ERR(trans);
4473 else 4573 goto out_free;
4474 err = btrfs_commit_transaction(trans, rc->extent_root); 4574 }
4575 err = qgroup_fix_relocated_data_extents(trans, rc);
4576 if (err < 0) {
4577 btrfs_abort_transaction(trans, err);
4578 goto out_free;
4579 }
4580 err = btrfs_commit_transaction(trans, rc->extent_root);
4475out_free: 4581out_free:
4476 kfree(rc); 4582 kfree(rc);
4477out: 4583out:
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7fd7e1830cfe..091296062456 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
272 root_key.objectid = key.offset; 272 root_key.objectid = key.offset;
273 key.offset++; 273 key.offset++;
274 274
275 /*
276 * The root might have been inserted already, as before we look
277 * for orphan roots, log replay might have happened, which
278 * triggers a transaction commit and qgroup accounting, which
279 * in turn reads and inserts fs roots while doing backref
280 * walking.
281 */
282 root = btrfs_lookup_fs_root(tree_root->fs_info,
283 root_key.objectid);
284 if (root) {
285 WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
286 &root->state));
287 if (btrfs_root_refs(&root->root_item) == 0)
288 btrfs_add_dead_root(root);
289 continue;
290 }
291
275 root = btrfs_read_fs_root(tree_root, &root_key); 292 root = btrfs_read_fs_root(tree_root, &root_key);
276 err = PTR_ERR_OR_ZERO(root); 293 err = PTR_ERR_OR_ZERO(root);
277 if (err && err != -ENOENT) { 294 if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
310 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); 327 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
311 328
312 err = btrfs_insert_fs_root(root->fs_info, root); 329 err = btrfs_insert_fs_root(root->fs_info, root);
313 /*
314 * The root might have been inserted already, as before we look
315 * for orphan roots, log replay might have happened, which
316 * triggers a transaction commit and qgroup accounting, which
317 * in turn reads and inserts fs roots while doing backref
318 * walking.
319 */
320 if (err == -EEXIST)
321 err = 0;
322 if (err) { 330 if (err) {
331 BUG_ON(err == -EEXIST);
323 btrfs_free_fs_root(root); 332 btrfs_free_fs_root(root);
324 break; 333 break;
325 } 334 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd298385c..efe129fe2678 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@ struct pending_dir_move {
231 u64 parent_ino; 231 u64 parent_ino;
232 u64 ino; 232 u64 ino;
233 u64 gen; 233 u64 gen;
234 bool is_orphan;
235 struct list_head update_refs; 234 struct list_head update_refs;
236}; 235};
237 236
@@ -274,6 +273,39 @@ struct name_cache_entry {
274 char name[]; 273 char name[];
275}; 274};
276 275
276static void inconsistent_snapshot_error(struct send_ctx *sctx,
277 enum btrfs_compare_tree_result result,
278 const char *what)
279{
280 const char *result_string;
281
282 switch (result) {
283 case BTRFS_COMPARE_TREE_NEW:
284 result_string = "new";
285 break;
286 case BTRFS_COMPARE_TREE_DELETED:
287 result_string = "deleted";
288 break;
289 case BTRFS_COMPARE_TREE_CHANGED:
290 result_string = "updated";
291 break;
292 case BTRFS_COMPARE_TREE_SAME:
293 ASSERT(0);
294 result_string = "unchanged";
295 break;
296 default:
297 ASSERT(0);
298 result_string = "unexpected";
299 }
300
301 btrfs_err(sctx->send_root->fs_info,
302 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
303 result_string, what, sctx->cmp_key->objectid,
304 sctx->send_root->root_key.objectid,
305 (sctx->parent_root ?
306 sctx->parent_root->root_key.objectid : 0));
307}
308
277static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 309static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
278 310
279static struct waiting_dir_move * 311static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1861 * was already unlinked/moved, so we can safely assume that we will not 1893 * was already unlinked/moved, so we can safely assume that we will not
1862 * overwrite anything at this point in time. 1894 * overwrite anything at this point in time.
1863 */ 1895 */
1864 if (other_inode > sctx->send_progress) { 1896 if (other_inode > sctx->send_progress ||
1897 is_waiting_for_move(sctx, other_inode)) {
1865 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1898 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1866 who_gen, NULL, NULL, NULL, NULL); 1899 who_gen, NULL, NULL, NULL, NULL);
1867 if (ret < 0) 1900 if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
2502 key.type = BTRFS_INODE_ITEM_KEY; 2535 key.type = BTRFS_INODE_ITEM_KEY;
2503 key.offset = 0; 2536 key.offset = 0;
2504 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2537 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2538 if (ret > 0)
2539 ret = -ENOENT;
2505 if (ret < 0) 2540 if (ret < 0)
2506 goto out; 2541 goto out;
2507 2542
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2947 } 2982 }
2948 2983
2949 if (loc.objectid > send_progress) { 2984 if (loc.objectid > send_progress) {
2985 struct orphan_dir_info *odi;
2986
2987 odi = get_orphan_dir_info(sctx, dir);
2988 free_orphan_dir_info(sctx, odi);
2950 ret = 0; 2989 ret = 0;
2951 goto out; 2990 goto out;
2952 } 2991 }
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
3047 pm->parent_ino = parent_ino; 3086 pm->parent_ino = parent_ino;
3048 pm->ino = ino; 3087 pm->ino = ino;
3049 pm->gen = ino_gen; 3088 pm->gen = ino_gen;
3050 pm->is_orphan = is_orphan;
3051 INIT_LIST_HEAD(&pm->list); 3089 INIT_LIST_HEAD(&pm->list);
3052 INIT_LIST_HEAD(&pm->update_refs); 3090 INIT_LIST_HEAD(&pm->update_refs);
3053 RB_CLEAR_NODE(&pm->node); 3091 RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3113 return NULL; 3151 return NULL;
3114} 3152}
3115 3153
3154static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3155 u64 ino, u64 gen, u64 *ancestor_ino)
3156{
3157 int ret = 0;
3158 u64 parent_inode = 0;
3159 u64 parent_gen = 0;
3160 u64 start_ino = ino;
3161
3162 *ancestor_ino = 0;
3163 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3164 fs_path_reset(name);
3165
3166 if (is_waiting_for_rm(sctx, ino))
3167 break;
3168 if (is_waiting_for_move(sctx, ino)) {
3169 if (*ancestor_ino == 0)
3170 *ancestor_ino = ino;
3171 ret = get_first_ref(sctx->parent_root, ino,
3172 &parent_inode, &parent_gen, name);
3173 } else {
3174 ret = __get_cur_name_and_parent(sctx, ino, gen,
3175 &parent_inode,
3176 &parent_gen, name);
3177 if (ret > 0) {
3178 ret = 0;
3179 break;
3180 }
3181 }
3182 if (ret < 0)
3183 break;
3184 if (parent_inode == start_ino) {
3185 ret = 1;
3186 if (*ancestor_ino == 0)
3187 *ancestor_ino = ino;
3188 break;
3189 }
3190 ino = parent_inode;
3191 gen = parent_gen;
3192 }
3193 return ret;
3194}
3195
3116static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3196static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3117{ 3197{
3118 struct fs_path *from_path = NULL; 3198 struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3123 u64 parent_ino, parent_gen; 3203 u64 parent_ino, parent_gen;
3124 struct waiting_dir_move *dm = NULL; 3204 struct waiting_dir_move *dm = NULL;
3125 u64 rmdir_ino = 0; 3205 u64 rmdir_ino = 0;
3206 u64 ancestor;
3207 bool is_orphan;
3126 int ret; 3208 int ret;
3127 3209
3128 name = fs_path_alloc(); 3210 name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3135 dm = get_waiting_dir_move(sctx, pm->ino); 3217 dm = get_waiting_dir_move(sctx, pm->ino);
3136 ASSERT(dm); 3218 ASSERT(dm);
3137 rmdir_ino = dm->rmdir_ino; 3219 rmdir_ino = dm->rmdir_ino;
3220 is_orphan = dm->orphanized;
3138 free_waiting_dir_move(sctx, dm); 3221 free_waiting_dir_move(sctx, dm);
3139 3222
3140 if (pm->is_orphan) { 3223 if (is_orphan) {
3141 ret = gen_unique_name(sctx, pm->ino, 3224 ret = gen_unique_name(sctx, pm->ino,
3142 pm->gen, from_path); 3225 pm->gen, from_path);
3143 } else { 3226 } else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3155 goto out; 3238 goto out;
3156 3239
3157 sctx->send_progress = sctx->cur_ino + 1; 3240 sctx->send_progress = sctx->cur_ino + 1;
3241 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3242 if (ret < 0)
3243 goto out;
3244 if (ret) {
3245 LIST_HEAD(deleted_refs);
3246 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3247 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3248 &pm->update_refs, &deleted_refs,
3249 is_orphan);
3250 if (ret < 0)
3251 goto out;
3252 if (rmdir_ino) {
3253 dm = get_waiting_dir_move(sctx, pm->ino);
3254 ASSERT(dm);
3255 dm->rmdir_ino = rmdir_ino;
3256 }
3257 goto out;
3258 }
3158 fs_path_reset(name); 3259 fs_path_reset(name);
3159 to_path = name; 3260 to_path = name;
3160 name = NULL; 3261 name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3174 /* already deleted */ 3275 /* already deleted */
3175 goto finish; 3276 goto finish;
3176 } 3277 }
3177 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3278 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
3178 if (ret < 0) 3279 if (ret < 0)
3179 goto out; 3280 goto out;
3180 if (!ret) 3281 if (!ret)
@@ -3204,8 +3305,18 @@ finish:
3204 * and old parent(s). 3305 * and old parent(s).
3205 */ 3306 */
3206 list_for_each_entry(cur, &pm->update_refs, list) { 3307 list_for_each_entry(cur, &pm->update_refs, list) {
3207 if (cur->dir == rmdir_ino) 3308 /*
3309 * The parent inode might have been deleted in the send snapshot
3310 */
3311 ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3312 NULL, NULL, NULL, NULL, NULL);
3313 if (ret == -ENOENT) {
3314 ret = 0;
3208 continue; 3315 continue;
3316 }
3317 if (ret < 0)
3318 goto out;
3319
3209 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3320 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3210 if (ret < 0) 3321 if (ret < 0)
3211 goto out; 3322 goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3325 u64 left_gen; 3436 u64 left_gen;
3326 u64 right_gen; 3437 u64 right_gen;
3327 int ret = 0; 3438 int ret = 0;
3439 struct waiting_dir_move *wdm;
3328 3440
3329 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3441 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3330 return 0; 3442 return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
3383 goto out; 3495 goto out;
3384 } 3496 }
3385 3497
3386 if (is_waiting_for_move(sctx, di_key.objectid)) { 3498 wdm = get_waiting_dir_move(sctx, di_key.objectid);
3499 if (wdm && !wdm->orphanized) {
3387 ret = add_pending_dir_move(sctx, 3500 ret = add_pending_dir_move(sctx,
3388 sctx->cur_ino, 3501 sctx->cur_ino,
3389 sctx->cur_inode_gen, 3502 sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
3470 ret = is_ancestor(sctx->parent_root, 3583 ret = is_ancestor(sctx->parent_root,
3471 sctx->cur_ino, sctx->cur_inode_gen, 3584 sctx->cur_ino, sctx->cur_inode_gen,
3472 ino, path_before); 3585 ino, path_before);
3473 break; 3586 if (ret)
3587 break;
3474 } 3588 }
3475 3589
3476 fs_path_reset(path_before); 3590 fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3643 goto out; 3757 goto out;
3644 if (ret) { 3758 if (ret) {
3645 struct name_cache_entry *nce; 3759 struct name_cache_entry *nce;
3760 struct waiting_dir_move *wdm;
3646 3761
3647 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3762 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3648 cur->full_path); 3763 cur->full_path);
3649 if (ret < 0) 3764 if (ret < 0)
3650 goto out; 3765 goto out;
3766
3767 /*
3768 * If ow_inode has its rename operation delayed
3769 * make sure that its orphanized name is used in
3770 * the source path when performing its rename
3771 * operation.
3772 */
3773 if (is_waiting_for_move(sctx, ow_inode)) {
3774 wdm = get_waiting_dir_move(sctx,
3775 ow_inode);
3776 ASSERT(wdm);
3777 wdm->orphanized = true;
3778 }
3779
3651 /* 3780 /*
3652 * Make sure we clear our orphanized inode's 3781 * Make sure we clear our orphanized inode's
3653 * name from the name cache. This is because the 3782 * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3663 name_cache_delete(sctx, nce); 3792 name_cache_delete(sctx, nce);
3664 kfree(nce); 3793 kfree(nce);
3665 } 3794 }
3795
3796 /*
3797 * ow_inode might currently be an ancestor of
3798 * cur_ino, therefore compute valid_path (the
3799 * current path of cur_ino) again because it
3800 * might contain the pre-orphanization name of
3801 * ow_inode, which is no longer valid.
3802 */
3803 fs_path_reset(valid_path);
3804 ret = get_cur_path(sctx, sctx->cur_ino,
3805 sctx->cur_inode_gen, valid_path);
3806 if (ret < 0)
3807 goto out;
3666 } else { 3808 } else {
3667 ret = send_unlink(sctx, cur->full_path); 3809 ret = send_unlink(sctx, cur->full_path);
3668 if (ret < 0) 3810 if (ret < 0)
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
5602{ 5744{
5603 int ret = 0; 5745 int ret = 0;
5604 5746
5605 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5747 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5748 inconsistent_snapshot_error(sctx, result, "reference");
5749 return -EIO;
5750 }
5606 5751
5607 if (!sctx->cur_inode_new_gen && 5752 if (!sctx->cur_inode_new_gen &&
5608 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5753 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
5627{ 5772{
5628 int ret = 0; 5773 int ret = 0;
5629 5774
5630 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5775 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5776 inconsistent_snapshot_error(sctx, result, "xattr");
5777 return -EIO;
5778 }
5631 5779
5632 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5780 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5633 if (result == BTRFS_COMPARE_TREE_NEW) 5781 if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
5651{ 5799{
5652 int ret = 0; 5800 int ret = 0;
5653 5801
5654 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5802 if (sctx->cur_ino != sctx->cmp_key->objectid) {
5803 inconsistent_snapshot_error(sctx, result, "extent");
5804 return -EIO;
5805 }
5655 5806
5656 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5807 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
5657 if (result != BTRFS_COMPARE_TREE_DELETED) 5808 if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 864ce334f696..4071fe2bd098 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
2241 struct btrfs_trans_handle *trans; 2241 struct btrfs_trans_handle *trans;
2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root; 2242 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2243 2243
2244 root->fs_info->fs_frozen = 1;
2245 /*
2246 * We don't need a barrier here, we'll wait for any transaction that
2247 * could be in progress on other threads (and do delayed iputs that
2248 * we want to avoid on a frozen filesystem), or do the commit
2249 * ourselves.
2250 */
2244 trans = btrfs_attach_transaction_barrier(root); 2251 trans = btrfs_attach_transaction_barrier(root);
2245 if (IS_ERR(trans)) { 2252 if (IS_ERR(trans)) {
2246 /* no transaction, don't bother */ 2253 /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
2251 return btrfs_commit_transaction(trans, root); 2258 return btrfs_commit_transaction(trans, root);
2252} 2259}
2253 2260
2261static int btrfs_unfreeze(struct super_block *sb)
2262{
2263 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
2264
2265 root->fs_info->fs_frozen = 0;
2266 return 0;
2267}
2268
2254static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 2269static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2255{ 2270{
2256 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 2271 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
2299 .statfs = btrfs_statfs, 2314 .statfs = btrfs_statfs,
2300 .remount_fs = btrfs_remount, 2315 .remount_fs = btrfs_remount,
2301 .freeze_fs = btrfs_freeze, 2316 .freeze_fs = btrfs_freeze,
2317 .unfreeze_fs = btrfs_unfreeze,
2302}; 2318};
2303 2319
2304static const struct file_operations btrfs_ctl_fops = { 2320static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9cca0a721961..95d41919d034 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2278 2278
2279 kmem_cache_free(btrfs_trans_handle_cachep, trans); 2279 kmem_cache_free(btrfs_trans_handle_cachep, trans);
2280 2280
2281 /*
2282 * If fs has been frozen, we can not handle delayed iputs, otherwise
2283 * it'll result in deadlock about SB_FREEZE_FS.
2284 */
2281 if (current != root->fs_info->transaction_kthread && 2285 if (current != root->fs_info->transaction_kthread &&
2282 current != root->fs_info->cleaner_kthread) 2286 current != root->fs_info->cleaner_kthread &&
2287 !root->fs_info->fs_frozen)
2283 btrfs_run_delayed_iputs(root); 2288 btrfs_run_delayed_iputs(root);
2284 2289
2285 return ret; 2290 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d31a0c4f56be..e935035ac034 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
27#include "backref.h" 27#include "backref.h"
28#include "hash.h" 28#include "hash.h"
29#include "compression.h" 29#include "compression.h"
30#include "qgroup.h"
30 31
31/* magic values for the inode_only field in btrfs_log_inode: 32/* magic values for the inode_only field in btrfs_log_inode:
32 * 33 *
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
680 ins.type = BTRFS_EXTENT_ITEM_KEY; 681 ins.type = BTRFS_EXTENT_ITEM_KEY;
681 offset = key->offset - btrfs_file_extent_offset(eb, item); 682 offset = key->offset - btrfs_file_extent_offset(eb, item);
682 683
684 /*
685 * Manually record dirty extent, as here we did a shallow
686 * file extent item copy and skip normal backref update,
687 * but modifying extent tree all by ourselves.
688 * So need to manually record dirty extent for qgroup,
689 * as the owner of the file extent changed from log tree
690 * (doesn't affect qgroup) to fs/file tree(affects qgroup)
691 */
692 ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
693 btrfs_file_extent_disk_bytenr(eb, item),
694 btrfs_file_extent_disk_num_bytes(eb, item),
695 GFP_NOFS);
696 if (ret < 0)
697 goto out;
698
683 if (ins.objectid > 0) { 699 if (ins.objectid > 0) {
684 u64 csum_start; 700 u64 csum_start;
685 u64 csum_end; 701 u64 csum_end;
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2807 */ 2823 */
2808 mutex_unlock(&root->log_mutex); 2824 mutex_unlock(&root->log_mutex);
2809 2825
2810 btrfs_init_log_ctx(&root_log_ctx); 2826 btrfs_init_log_ctx(&root_log_ctx, NULL);
2811 2827
2812 mutex_lock(&log_root_tree->log_mutex); 2828 mutex_lock(&log_root_tree->log_mutex);
2813 atomic_inc(&log_root_tree->log_batch); 2829 atomic_inc(&log_root_tree->log_batch);
@@ -4469,7 +4485,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4469static int btrfs_check_ref_name_override(struct extent_buffer *eb, 4485static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4470 const int slot, 4486 const int slot,
4471 const struct btrfs_key *key, 4487 const struct btrfs_key *key,
4472 struct inode *inode) 4488 struct inode *inode,
4489 u64 *other_ino)
4473{ 4490{
4474 int ret; 4491 int ret;
4475 struct btrfs_path *search_path; 4492 struct btrfs_path *search_path;
@@ -4528,7 +4545,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4528 search_path, parent, 4545 search_path, parent,
4529 name, this_name_len, 0); 4546 name, this_name_len, 0);
4530 if (di && !IS_ERR(di)) { 4547 if (di && !IS_ERR(di)) {
4531 ret = 1; 4548 struct btrfs_key di_key;
4549
4550 btrfs_dir_item_key_to_cpu(search_path->nodes[0],
4551 di, &di_key);
4552 if (di_key.type == BTRFS_INODE_ITEM_KEY) {
4553 ret = 1;
4554 *other_ino = di_key.objectid;
4555 } else {
4556 ret = -EAGAIN;
4557 }
4532 goto out; 4558 goto out;
4533 } else if (IS_ERR(di)) { 4559 } else if (IS_ERR(di)) {
4534 ret = PTR_ERR(di); 4560 ret = PTR_ERR(di);
@@ -4722,16 +4748,72 @@ again:
4722 if ((min_key.type == BTRFS_INODE_REF_KEY || 4748 if ((min_key.type == BTRFS_INODE_REF_KEY ||
4723 min_key.type == BTRFS_INODE_EXTREF_KEY) && 4749 min_key.type == BTRFS_INODE_EXTREF_KEY) &&
4724 BTRFS_I(inode)->generation == trans->transid) { 4750 BTRFS_I(inode)->generation == trans->transid) {
4751 u64 other_ino = 0;
4752
4725 ret = btrfs_check_ref_name_override(path->nodes[0], 4753 ret = btrfs_check_ref_name_override(path->nodes[0],
4726 path->slots[0], 4754 path->slots[0],
4727 &min_key, inode); 4755 &min_key, inode,
4756 &other_ino);
4728 if (ret < 0) { 4757 if (ret < 0) {
4729 err = ret; 4758 err = ret;
4730 goto out_unlock; 4759 goto out_unlock;
4731 } else if (ret > 0) { 4760 } else if (ret > 0 && ctx &&
4732 err = 1; 4761 other_ino != btrfs_ino(ctx->inode)) {
4733 btrfs_set_log_full_commit(root->fs_info, trans); 4762 struct btrfs_key inode_key;
4734 goto out_unlock; 4763 struct inode *other_inode;
4764
4765 if (ins_nr > 0) {
4766 ins_nr++;
4767 } else {
4768 ins_nr = 1;
4769 ins_start_slot = path->slots[0];
4770 }
4771 ret = copy_items(trans, inode, dst_path, path,
4772 &last_extent, ins_start_slot,
4773 ins_nr, inode_only,
4774 logged_isize);
4775 if (ret < 0) {
4776 err = ret;
4777 goto out_unlock;
4778 }
4779 ins_nr = 0;
4780 btrfs_release_path(path);
4781 inode_key.objectid = other_ino;
4782 inode_key.type = BTRFS_INODE_ITEM_KEY;
4783 inode_key.offset = 0;
4784 other_inode = btrfs_iget(root->fs_info->sb,
4785 &inode_key, root,
4786 NULL);
4787 /*
4788 * If the other inode that had a conflicting dir
4789 * entry was deleted in the current transaction,
4790 * we don't need to do more work nor fallback to
4791 * a transaction commit.
4792 */
4793 if (IS_ERR(other_inode) &&
4794 PTR_ERR(other_inode) == -ENOENT) {
4795 goto next_key;
4796 } else if (IS_ERR(other_inode)) {
4797 err = PTR_ERR(other_inode);
4798 goto out_unlock;
4799 }
4800 /*
4801 * We are safe logging the other inode without
4802 * acquiring its i_mutex as long as we log with
4803 * the LOG_INODE_EXISTS mode. We're safe against
4804 * concurrent renames of the other inode as well
4805 * because during a rename we pin the log and
4806 * update the log with the new name before we
4807 * unpin it.
4808 */
4809 err = btrfs_log_inode(trans, root, other_inode,
4810 LOG_INODE_EXISTS,
4811 0, LLONG_MAX, ctx);
4812 iput(other_inode);
4813 if (err)
4814 goto out_unlock;
4815 else
4816 goto next_key;
4735 } 4817 }
4736 } 4818 }
4737 4819
@@ -4799,7 +4881,7 @@ next_slot:
4799 ins_nr = 0; 4881 ins_nr = 0;
4800 } 4882 }
4801 btrfs_release_path(path); 4883 btrfs_release_path(path);
4802 4884next_key:
4803 if (min_key.offset < (u64)-1) { 4885 if (min_key.offset < (u64)-1) {
4804 min_key.offset++; 4886 min_key.offset++;
4805 } else if (min_key.type < max_key.type) { 4887 } else if (min_key.type < max_key.type) {
@@ -4993,8 +5075,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4993 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 5075 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
4994 break; 5076 break;
4995 5077
4996 if (IS_ROOT(parent)) 5078 if (IS_ROOT(parent)) {
5079 inode = d_inode(parent);
5080 if (btrfs_must_commit_transaction(trans, inode))
5081 ret = 1;
4997 break; 5082 break;
5083 }
4998 5084
4999 parent = dget_parent(parent); 5085 parent = dget_parent(parent);
5000 dput(old_parent); 5086 dput(old_parent);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75d080d..ab858e31ccbc 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
30 int log_transid; 30 int log_transid;
31 int io_err; 31 int io_err;
32 bool log_new_dentries; 32 bool log_new_dentries;
33 struct inode *inode;
33 struct list_head list; 34 struct list_head list;
34}; 35};
35 36
36static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) 37static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
38 struct inode *inode)
37{ 39{
38 ctx->log_ret = 0; 40 ctx->log_ret = 0;
39 ctx->log_transid = 0; 41 ctx->log_transid = 0;
40 ctx->io_err = 0; 42 ctx->io_err = 0;
41 ctx->log_new_dentries = false; 43 ctx->log_new_dentries = false;
44 ctx->inode = inode;
42 INIT_LIST_HEAD(&ctx->list); 45 INIT_LIST_HEAD(&ctx->list);
43} 46}
44 47
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 51f125508771..035efce603a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work)
834 struct btrfs_device *device; 834 struct btrfs_device *device;
835 835
836 device = container_of(work, struct btrfs_device, rcu_work); 836 device = container_of(work, struct btrfs_device, rcu_work);
837
838 if (device->bdev)
839 blkdev_put(device->bdev, device->mode);
840
841 rcu_string_free(device->name); 837 rcu_string_free(device->name);
842 kfree(device); 838 kfree(device);
843} 839}
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head)
852 schedule_work(&device->rcu_work); 848 schedule_work(&device->rcu_work);
853} 849}
854 850
851static void btrfs_close_bdev(struct btrfs_device *device)
852{
853 if (device->bdev && device->writeable) {
854 sync_blockdev(device->bdev);
855 invalidate_bdev(device->bdev);
856 }
857
858 if (device->bdev)
859 blkdev_put(device->bdev, device->mode);
860}
861
855static void btrfs_close_one_device(struct btrfs_device *device) 862static void btrfs_close_one_device(struct btrfs_device *device)
856{ 863{
857 struct btrfs_fs_devices *fs_devices = device->fs_devices; 864 struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
870 if (device->missing) 877 if (device->missing)
871 fs_devices->missing_devices--; 878 fs_devices->missing_devices--;
872 879
873 if (device->bdev && device->writeable) { 880 btrfs_close_bdev(device);
874 sync_blockdev(device->bdev);
875 invalidate_bdev(device->bdev);
876 }
877 881
878 new_device = btrfs_alloc_device(NULL, &device->devid, 882 new_device = btrfs_alloc_device(NULL, &device->devid,
879 device->uuid); 883 device->uuid);
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
1932 btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); 1936 btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
1933 } 1937 }
1934 1938
1939 btrfs_close_bdev(device);
1940
1935 call_rcu(&device->rcu, free_device); 1941 call_rcu(&device->rcu, free_device);
1936 1942
1937 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; 1943 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2025 /* zero out the old super if it is writable */ 2031 /* zero out the old super if it is writable */
2026 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); 2032 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
2027 } 2033 }
2034
2035 btrfs_close_bdev(srcdev);
2036
2028 call_rcu(&srcdev->rcu, free_device); 2037 call_rcu(&srcdev->rcu, free_device);
2029 2038
2030 /* 2039 /*
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2080 * the device_list_mutex lock. 2089 * the device_list_mutex lock.
2081 */ 2090 */
2082 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); 2091 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2092
2093 btrfs_close_bdev(tgtdev);
2083 call_rcu(&tgtdev->rcu, free_device); 2094 call_rcu(&tgtdev->rcu, free_device);
2084} 2095}
2085 2096