aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.c20
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c285
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/inode.c59
-rw-r--r--fs/btrfs/ordered-data.c123
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/qgroup.c169
-rw-r--r--fs/btrfs/qgroup.h1
-rw-r--r--fs/btrfs/super.c51
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/ulist.h15
17 files changed, 541 insertions, 305 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564bfcb46..54a201dac7f9 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
276 } 276 }
277 if (ret > 0) 277 if (ret > 0)
278 goto next; 278 goto next;
279 ret = ulist_add_merge(parents, eb->start, 279 ret = ulist_add_merge_ptr(parents, eb->start,
280 (uintptr_t)eie, 280 eie, (void **)&old, GFP_NOFS);
281 (u64 *)&old, GFP_NOFS);
282 if (ret < 0) 281 if (ret < 0)
283 break; 282 break;
284 if (!ret && extent_item_pos) { 283 if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@ again:
1001 ret = -EIO; 1000 ret = -EIO;
1002 goto out; 1001 goto out;
1003 } 1002 }
1003 btrfs_tree_read_lock(eb);
1004 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1004 ret = find_extent_in_eb(eb, bytenr, 1005 ret = find_extent_in_eb(eb, bytenr,
1005 *extent_item_pos, &eie); 1006 *extent_item_pos, &eie);
1007 btrfs_tree_read_unlock_blocking(eb);
1006 free_extent_buffer(eb); 1008 free_extent_buffer(eb);
1007 if (ret < 0) 1009 if (ret < 0)
1008 goto out; 1010 goto out;
1009 ref->inode_list = eie; 1011 ref->inode_list = eie;
1010 } 1012 }
1011 ret = ulist_add_merge(refs, ref->parent, 1013 ret = ulist_add_merge_ptr(refs, ref->parent,
1012 (uintptr_t)ref->inode_list, 1014 ref->inode_list,
1013 (u64 *)&eie, GFP_NOFS); 1015 (void **)&eie, GFP_NOFS);
1014 if (ret < 0) 1016 if (ret < 0)
1015 goto out; 1017 goto out;
1016 if (!ret && extent_item_pos) { 1018 if (!ret && extent_item_pos) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923c410c..43527fd78825 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -84,12 +84,6 @@ struct btrfs_inode {
84 */ 84 */
85 struct list_head delalloc_inodes; 85 struct list_head delalloc_inodes;
86 86
87 /*
88 * list for tracking inodes that must be sent to disk before a
89 * rename or truncate commit
90 */
91 struct list_head ordered_operations;
92
93 /* node for the red-black tree that links inodes in subvolume root */ 87 /* node for the red-black tree that links inodes in subvolume root */
94 struct rb_node rb_node; 88 struct rb_node rb_node;
95 89
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453b8e24..44ee5d2e52a4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
280 280
281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 281 WARN_ON(btrfs_header_generation(buf) > trans->transid);
282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
283 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 283 ret = btrfs_inc_ref(trans, root, cow, 1);
284 else 284 else
285 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 285 ret = btrfs_inc_ref(trans, root, cow, 0);
286 286
287 if (ret) 287 if (ret)
288 return ret; 288 return ret;
@@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1035 if ((owner == root->root_key.objectid || 1035 if ((owner == root->root_key.objectid ||
1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1038 ret = btrfs_inc_ref(trans, root, buf, 1, 1); 1038 ret = btrfs_inc_ref(trans, root, buf, 1);
1039 BUG_ON(ret); /* -ENOMEM */ 1039 BUG_ON(ret); /* -ENOMEM */
1040 1040
1041 if (root->root_key.objectid == 1041 if (root->root_key.objectid ==
1042 BTRFS_TREE_RELOC_OBJECTID) { 1042 BTRFS_TREE_RELOC_OBJECTID) {
1043 ret = btrfs_dec_ref(trans, root, buf, 0, 1); 1043 ret = btrfs_dec_ref(trans, root, buf, 0);
1044 BUG_ON(ret); /* -ENOMEM */ 1044 BUG_ON(ret); /* -ENOMEM */
1045 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1045 ret = btrfs_inc_ref(trans, root, cow, 1);
1046 BUG_ON(ret); /* -ENOMEM */ 1046 BUG_ON(ret); /* -ENOMEM */
1047 } 1047 }
1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1050 1050
1051 if (root->root_key.objectid == 1051 if (root->root_key.objectid ==
1052 BTRFS_TREE_RELOC_OBJECTID) 1052 BTRFS_TREE_RELOC_OBJECTID)
1053 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1053 ret = btrfs_inc_ref(trans, root, cow, 1);
1054 else 1054 else
1055 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1055 ret = btrfs_inc_ref(trans, root, cow, 0);
1056 BUG_ON(ret); /* -ENOMEM */ 1056 BUG_ON(ret); /* -ENOMEM */
1057 } 1057 }
1058 if (new_flags != 0) { 1058 if (new_flags != 0) {
@@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
1070 if (root->root_key.objectid == 1070 if (root->root_key.objectid ==
1071 BTRFS_TREE_RELOC_OBJECTID) 1071 BTRFS_TREE_RELOC_OBJECTID)
1072 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1072 ret = btrfs_inc_ref(trans, root, cow, 1);
1073 else 1073 else
1074 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1074 ret = btrfs_inc_ref(trans, root, cow, 0);
1075 BUG_ON(ret); /* -ENOMEM */ 1075 BUG_ON(ret); /* -ENOMEM */
1076 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 1076 ret = btrfs_dec_ref(trans, root, buf, 1);
1077 BUG_ON(ret); /* -ENOMEM */ 1077 BUG_ON(ret); /* -ENOMEM */
1078 } 1078 }
1079 clean_tree_block(trans, root, buf); 1079 clean_tree_block(trans, root, buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397f4e92..8e29b614fe93 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3327 struct btrfs_key *ins, int is_data, int delalloc); 3327 struct btrfs_key *ins, int is_data, int delalloc);
3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3329 struct extent_buffer *buf, int full_backref, int no_quota); 3329 struct extent_buffer *buf, int full_backref);
3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3331 struct extent_buffer *buf, int full_backref, int no_quota); 3331 struct extent_buffer *buf, int full_backref);
3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root, 3333 struct btrfs_root *root,
3334 u64 bytenr, u64 num_bytes, u64 flags, 3334 u64 bytenr, u64 num_bytes, u64 flags,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..d0ed9e664f7d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrfs_work *work);
60static void free_fs_root(struct btrfs_root *root); 60static void free_fs_root(struct btrfs_root *root);
61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
62 int read_only); 62 int read_only);
63static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
64 struct btrfs_root *root);
65static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 63static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
66static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 64static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
67 struct btrfs_root *root); 65 struct btrfs_root *root);
@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root)
3829 btrfs_cleanup_transaction(root); 3827 btrfs_cleanup_transaction(root);
3830} 3828}
3831 3829
3832static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3833 struct btrfs_root *root)
3834{
3835 struct btrfs_inode *btrfs_inode;
3836 struct list_head splice;
3837
3838 INIT_LIST_HEAD(&splice);
3839
3840 mutex_lock(&root->fs_info->ordered_operations_mutex);
3841 spin_lock(&root->fs_info->ordered_root_lock);
3842
3843 list_splice_init(&t->ordered_operations, &splice);
3844 while (!list_empty(&splice)) {
3845 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3846 ordered_operations);
3847
3848 list_del_init(&btrfs_inode->ordered_operations);
3849 spin_unlock(&root->fs_info->ordered_root_lock);
3850
3851 btrfs_invalidate_inodes(btrfs_inode->root);
3852
3853 spin_lock(&root->fs_info->ordered_root_lock);
3854 }
3855
3856 spin_unlock(&root->fs_info->ordered_root_lock);
3857 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3858}
3859
3860static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3830static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3861{ 3831{
3862 struct btrfs_ordered_extent *ordered; 3832 struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4063,6 @@ again:
4093void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4063void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4094 struct btrfs_root *root) 4064 struct btrfs_root *root)
4095{ 4065{
4096 btrfs_destroy_ordered_operations(cur_trans, root);
4097
4098 btrfs_destroy_delayed_refs(cur_trans, root); 4066 btrfs_destroy_delayed_refs(cur_trans, root);
4099 4067
4100 cur_trans->state = TRANS_STATE_COMMIT_START; 4068 cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..102ed3143976 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3057,7 +3057,7 @@ out:
3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3058 struct btrfs_root *root, 3058 struct btrfs_root *root,
3059 struct extent_buffer *buf, 3059 struct extent_buffer *buf,
3060 int full_backref, int inc, int no_quota) 3060 int full_backref, int inc)
3061{ 3061{
3062 u64 bytenr; 3062 u64 bytenr;
3063 u64 num_bytes; 3063 u64 num_bytes;
@@ -3111,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3111 key.offset -= btrfs_file_extent_offset(buf, fi); 3111 key.offset -= btrfs_file_extent_offset(buf, fi);
3112 ret = process_func(trans, root, bytenr, num_bytes, 3112 ret = process_func(trans, root, bytenr, num_bytes,
3113 parent, ref_root, key.objectid, 3113 parent, ref_root, key.objectid,
3114 key.offset, no_quota); 3114 key.offset, 1);
3115 if (ret) 3115 if (ret)
3116 goto fail; 3116 goto fail;
3117 } else { 3117 } else {
@@ -3119,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3119 num_bytes = btrfs_level_size(root, level - 1); 3119 num_bytes = btrfs_level_size(root, level - 1);
3120 ret = process_func(trans, root, bytenr, num_bytes, 3120 ret = process_func(trans, root, bytenr, num_bytes,
3121 parent, ref_root, level - 1, 0, 3121 parent, ref_root, level - 1, 0,
3122 no_quota); 3122 1);
3123 if (ret) 3123 if (ret)
3124 goto fail; 3124 goto fail;
3125 } 3125 }
@@ -3130,15 +3130,15 @@ fail:
3130} 3130}
3131 3131
3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3133 struct extent_buffer *buf, int full_backref, int no_quota) 3133 struct extent_buffer *buf, int full_backref)
3134{ 3134{
3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3136} 3136}
3137 3137
3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3139 struct extent_buffer *buf, int full_backref, int no_quota) 3139 struct extent_buffer *buf, int full_backref)
3140{ 3140{
3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3142} 3142}
3143 3143
3144static int write_one_cache_group(struct btrfs_trans_handle *trans, 3144static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -7478,6 +7478,220 @@ reada:
7478 wc->reada_slot = slot; 7478 wc->reada_slot = slot;
7479} 7479}
7480 7480
7481static int account_leaf_items(struct btrfs_trans_handle *trans,
7482 struct btrfs_root *root,
7483 struct extent_buffer *eb)
7484{
7485 int nr = btrfs_header_nritems(eb);
7486 int i, extent_type, ret;
7487 struct btrfs_key key;
7488 struct btrfs_file_extent_item *fi;
7489 u64 bytenr, num_bytes;
7490
7491 for (i = 0; i < nr; i++) {
7492 btrfs_item_key_to_cpu(eb, &key, i);
7493
7494 if (key.type != BTRFS_EXTENT_DATA_KEY)
7495 continue;
7496
7497 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7498 /* filter out non qgroup-accountable extents */
7499 extent_type = btrfs_file_extent_type(eb, fi);
7500
7501 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7502 continue;
7503
7504 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7505 if (!bytenr)
7506 continue;
7507
7508 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7509
7510 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7511 root->objectid,
7512 bytenr, num_bytes,
7513 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7514 if (ret)
7515 return ret;
7516 }
7517 return 0;
7518}
7519
7520/*
7521 * Walk up the tree from the bottom, freeing leaves and any interior
7522 * nodes which have had all slots visited. If a node (leaf or
7523 * interior) is freed, the node above it will have it's slot
7524 * incremented. The root node will never be freed.
7525 *
7526 * At the end of this function, we should have a path which has all
7527 * slots incremented to the next position for a search. If we need to
7528 * read a new node it will be NULL and the node above it will have the
7529 * correct slot selected for a later read.
7530 *
7531 * If we increment the root nodes slot counter past the number of
7532 * elements, 1 is returned to signal completion of the search.
7533 */
7534static int adjust_slots_upwards(struct btrfs_root *root,
7535 struct btrfs_path *path, int root_level)
7536{
7537 int level = 0;
7538 int nr, slot;
7539 struct extent_buffer *eb;
7540
7541 if (root_level == 0)
7542 return 1;
7543
7544 while (level <= root_level) {
7545 eb = path->nodes[level];
7546 nr = btrfs_header_nritems(eb);
7547 path->slots[level]++;
7548 slot = path->slots[level];
7549 if (slot >= nr || level == 0) {
7550 /*
7551 * Don't free the root - we will detect this
7552 * condition after our loop and return a
7553 * positive value for caller to stop walking the tree.
7554 */
7555 if (level != root_level) {
7556 btrfs_tree_unlock_rw(eb, path->locks[level]);
7557 path->locks[level] = 0;
7558
7559 free_extent_buffer(eb);
7560 path->nodes[level] = NULL;
7561 path->slots[level] = 0;
7562 }
7563 } else {
7564 /*
7565 * We have a valid slot to walk back down
7566 * from. Stop here so caller can process these
7567 * new nodes.
7568 */
7569 break;
7570 }
7571
7572 level++;
7573 }
7574
7575 eb = path->nodes[root_level];
7576 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7577 return 1;
7578
7579 return 0;
7580}
7581
7582/*
7583 * root_eb is the subtree root and is locked before this function is called.
7584 */
7585static int account_shared_subtree(struct btrfs_trans_handle *trans,
7586 struct btrfs_root *root,
7587 struct extent_buffer *root_eb,
7588 u64 root_gen,
7589 int root_level)
7590{
7591 int ret = 0;
7592 int level;
7593 struct extent_buffer *eb = root_eb;
7594 struct btrfs_path *path = NULL;
7595
7596 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7597 BUG_ON(root_eb == NULL);
7598
7599 if (!root->fs_info->quota_enabled)
7600 return 0;
7601
7602 if (!extent_buffer_uptodate(root_eb)) {
7603 ret = btrfs_read_buffer(root_eb, root_gen);
7604 if (ret)
7605 goto out;
7606 }
7607
7608 if (root_level == 0) {
7609 ret = account_leaf_items(trans, root, root_eb);
7610 goto out;
7611 }
7612
7613 path = btrfs_alloc_path();
7614 if (!path)
7615 return -ENOMEM;
7616
7617 /*
7618 * Walk down the tree. Missing extent blocks are filled in as
7619 * we go. Metadata is accounted every time we read a new
7620 * extent block.
7621 *
7622 * When we reach a leaf, we account for file extent items in it,
7623 * walk back up the tree (adjusting slot pointers as we go)
7624 * and restart the search process.
7625 */
7626 extent_buffer_get(root_eb); /* For path */
7627 path->nodes[root_level] = root_eb;
7628 path->slots[root_level] = 0;
7629 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7630walk_down:
7631 level = root_level;
7632 while (level >= 0) {
7633 if (path->nodes[level] == NULL) {
7634 int child_bsize = root->nodesize;
7635 int parent_slot;
7636 u64 child_gen;
7637 u64 child_bytenr;
7638
7639 /* We need to get child blockptr/gen from
7640 * parent before we can read it. */
7641 eb = path->nodes[level + 1];
7642 parent_slot = path->slots[level + 1];
7643 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7644 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7645
7646 eb = read_tree_block(root, child_bytenr, child_bsize,
7647 child_gen);
7648 if (!eb || !extent_buffer_uptodate(eb)) {
7649 ret = -EIO;
7650 goto out;
7651 }
7652
7653 path->nodes[level] = eb;
7654 path->slots[level] = 0;
7655
7656 btrfs_tree_read_lock(eb);
7657 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7658 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7659
7660 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7661 root->objectid,
7662 child_bytenr,
7663 child_bsize,
7664 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7665 0);
7666 if (ret)
7667 goto out;
7668
7669 }
7670
7671 if (level == 0) {
7672 ret = account_leaf_items(trans, root, path->nodes[level]);
7673 if (ret)
7674 goto out;
7675
7676 /* Nonzero return here means we completed our search */
7677 ret = adjust_slots_upwards(root, path, root_level);
7678 if (ret)
7679 break;
7680
7681 /* Restart search with new slots */
7682 goto walk_down;
7683 }
7684
7685 level--;
7686 }
7687
7688 ret = 0;
7689out:
7690 btrfs_free_path(path);
7691
7692 return ret;
7693}
7694
7481/* 7695/*
7482 * helper to process tree block while walking down the tree. 7696 * helper to process tree block while walking down the tree.
7483 * 7697 *
@@ -7532,9 +7746,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7532 /* wc->stage == UPDATE_BACKREF */ 7746 /* wc->stage == UPDATE_BACKREF */
7533 if (!(wc->flags[level] & flag)) { 7747 if (!(wc->flags[level] & flag)) {
7534 BUG_ON(!path->locks[level]); 7748 BUG_ON(!path->locks[level]);
7535 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 7749 ret = btrfs_inc_ref(trans, root, eb, 1);
7536 BUG_ON(ret); /* -ENOMEM */ 7750 BUG_ON(ret); /* -ENOMEM */
7537 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 7751 ret = btrfs_dec_ref(trans, root, eb, 0);
7538 BUG_ON(ret); /* -ENOMEM */ 7752 BUG_ON(ret); /* -ENOMEM */
7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 7753 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7540 eb->len, flag, 7754 eb->len, flag,
@@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7581 int level = wc->level; 7795 int level = wc->level;
7582 int reada = 0; 7796 int reada = 0;
7583 int ret = 0; 7797 int ret = 0;
7798 bool need_account = false;
7584 7799
7585 generation = btrfs_node_ptr_generation(path->nodes[level], 7800 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]); 7801 path->slots[level]);
@@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7626 7841
7627 if (wc->stage == DROP_REFERENCE) { 7842 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) { 7843 if (wc->refs[level - 1] > 1) {
7844 need_account = true;
7629 if (level == 1 && 7845 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7846 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip; 7847 goto skip;
@@ -7689,6 +7905,16 @@ skip:
7689 parent = 0; 7905 parent = 0;
7690 } 7906 }
7691 7907
7908 if (need_account) {
7909 ret = account_shared_subtree(trans, root, next,
7910 generation, level - 1);
7911 if (ret) {
7912 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
7913 "%d accounting shared subtree. Quota "
7914 "is out of sync, rescan required.\n",
7915 root->fs_info->sb->s_id, ret);
7916 }
7917 }
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7918 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0); 7919 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret); /* -ENOMEM */ 7920 BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7995,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7769 if (wc->refs[level] == 1) { 7995 if (wc->refs[level] == 1) {
7770 if (level == 0) { 7996 if (level == 0) {
7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 7997 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7772 ret = btrfs_dec_ref(trans, root, eb, 1, 7998 ret = btrfs_dec_ref(trans, root, eb, 1);
7773 wc->for_reloc);
7774 else 7999 else
7775 ret = btrfs_dec_ref(trans, root, eb, 0, 8000 ret = btrfs_dec_ref(trans, root, eb, 0);
7776 wc->for_reloc);
7777 BUG_ON(ret); /* -ENOMEM */ 8001 BUG_ON(ret); /* -ENOMEM */
8002 ret = account_leaf_items(trans, root, eb);
8003 if (ret) {
8004 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8005 "%d accounting leaf items. Quota "
8006 "is out of sync, rescan required.\n",
8007 root->fs_info->sb->s_id, ret);
8008 }
7778 } 8009 }
7779 /* make block locked assertion in clean_tree_block happy */ 8010 /* make block locked assertion in clean_tree_block happy */
7780 if (!path->locks[level] && 8011 if (!path->locks[level] &&
@@ -7900,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7900 int level; 8131 int level;
7901 bool root_dropped = false; 8132 bool root_dropped = false;
7902 8133
8134 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8135
7903 path = btrfs_alloc_path(); 8136 path = btrfs_alloc_path();
7904 if (!path) { 8137 if (!path) {
7905 err = -ENOMEM; 8138 err = -ENOMEM;
@@ -8025,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8025 goto out_end_trans; 8258 goto out_end_trans;
8026 } 8259 }
8027 8260
8261 /*
8262 * Qgroup update accounting is run from
8263 * delayed ref handling. This usually works
8264 * out because delayed refs are normally the
8265 * only way qgroup updates are added. However,
8266 * we may have added updates during our tree
8267 * walk so run qgroups here to make sure we
8268 * don't lose any updates.
8269 */
8270 ret = btrfs_delayed_qgroup_accounting(trans,
8271 root->fs_info);
8272 if (ret)
8273 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8274 "running qgroup updates "
8275 "during snapshot delete. "
8276 "Quota is out of sync, "
8277 "rescan required.\n", ret);
8278
8028 btrfs_end_transaction_throttle(trans, tree_root); 8279 btrfs_end_transaction_throttle(trans, tree_root);
8029 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8280 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8030 pr_debug("BTRFS: drop snapshot early exit\n"); 8281 pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8078 } 8329 }
8079 root_dropped = true; 8330 root_dropped = true;
8080out_end_trans: 8331out_end_trans:
8332 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8333 if (ret)
8334 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8335 "running qgroup updates "
8336 "during snapshot delete. "
8337 "Quota is out of sync, "
8338 "rescan required.\n", ret);
8339
8081 btrfs_end_transaction_throttle(trans, tree_root); 8340 btrfs_end_transaction_throttle(trans, tree_root);
8082out_free: 8341out_free:
8083 kfree(wc); 8342 kfree(wc);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe45d686..54c84daec9b5 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -756,7 +756,7 @@ again:
756 found_next = 1; 756 found_next = 1;
757 if (ret != 0) 757 if (ret != 0)
758 goto insert; 758 goto insert;
759 slot = 0; 759 slot = path->slots[0];
760 } 760 }
761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99cb55ea..d3afac292d67 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1838,33 +1838,9 @@ out:
1838 1838
1839int btrfs_release_file(struct inode *inode, struct file *filp) 1839int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 /*
1842 * ordered_data_close is set by settattr when we are about to truncate
1843 * a file from a non-zero size to a zero size. This tries to
1844 * flush down new bytes that may have been written if the
1845 * application were using truncate to replace a file in place.
1846 */
1847 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1848 &BTRFS_I(inode)->runtime_flags)) {
1849 struct btrfs_trans_handle *trans;
1850 struct btrfs_root *root = BTRFS_I(inode)->root;
1851
1852 /*
1853 * We need to block on a committing transaction to keep us from
1854 * throwing a ordered operation on to the list and causing
1855 * something like sync to deadlock trying to flush out this
1856 * inode.
1857 */
1858 trans = btrfs_start_transaction(root, 0);
1859 if (IS_ERR(trans))
1860 return PTR_ERR(trans);
1861 btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
1862 btrfs_end_transaction(trans, root);
1863 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1864 filemap_flush(inode->i_mapping);
1865 }
1866 if (filp->private_data) 1841 if (filp->private_data)
1867 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping);
1868 return 0; 1844 return 0;
1869} 1845}
1870 1846
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3183742d6f0d..03708ef3deef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -709,6 +709,18 @@ retry:
709 unlock_extent(io_tree, async_extent->start, 709 unlock_extent(io_tree, async_extent->start,
710 async_extent->start + 710 async_extent->start +
711 async_extent->ram_size - 1); 711 async_extent->ram_size - 1);
712
713 /*
714 * we need to redirty the pages if we decide to
715 * fallback to uncompressed IO, otherwise we
716 * will not submit these pages down to lower
717 * layers.
718 */
719 extent_range_redirty_for_io(inode,
720 async_extent->start,
721 async_extent->start +
722 async_extent->ram_size - 1);
723
712 goto retry; 724 goto retry;
713 } 725 }
714 goto out_free; 726 goto out_free;
@@ -7939,27 +7951,6 @@ static int btrfs_truncate(struct inode *inode)
7939 BUG_ON(ret); 7951 BUG_ON(ret);
7940 7952
7941 /* 7953 /*
7942 * setattr is responsible for setting the ordered_data_close flag,
7943 * but that is only tested during the last file release. That
7944 * could happen well after the next commit, leaving a great big
7945 * window where new writes may get lost if someone chooses to write
7946 * to this file after truncating to zero
7947 *
7948 * The inode doesn't have any dirty data here, and so if we commit
7949 * this is a noop. If someone immediately starts writing to the inode
7950 * it is very likely we'll catch some of their writes in this
7951 * transaction, and the commit will find this file on the ordered
7952 * data list with good things to send down.
7953 *
7954 * This is a best effort solution, there is still a window where
7955 * using truncate to replace the contents of the file will
7956 * end up with a zero length file after a crash.
7957 */
7958 if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
7959 &BTRFS_I(inode)->runtime_flags))
7960 btrfs_add_ordered_operation(trans, root, inode);
7961
7962 /*
7963 * So if we truncate and then write and fsync we normally would just 7954 * So if we truncate and then write and fsync we normally would just
7964 * write the extents that changed, which is a problem if we need to 7955 * write the extents that changed, which is a problem if we need to
7965 * first truncate that entire inode. So set this flag so we write out 7956 * first truncate that entire inode. So set this flag so we write out
@@ -8106,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
8106 mutex_init(&ei->delalloc_mutex); 8097 mutex_init(&ei->delalloc_mutex);
8107 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 8098 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8108 INIT_LIST_HEAD(&ei->delalloc_inodes); 8099 INIT_LIST_HEAD(&ei->delalloc_inodes);
8109 INIT_LIST_HEAD(&ei->ordered_operations);
8110 RB_CLEAR_NODE(&ei->rb_node); 8100 RB_CLEAR_NODE(&ei->rb_node);
8111 8101
8112 return inode; 8102 return inode;
@@ -8146,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *inode)
8146 if (!root) 8136 if (!root)
8147 goto free; 8137 goto free;
8148 8138
8149 /*
8150 * Make sure we're properly removed from the ordered operation
8151 * lists.
8152 */
8153 smp_mb();
8154 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
8155 spin_lock(&root->fs_info->ordered_root_lock);
8156 list_del_init(&BTRFS_I(inode)->ordered_operations);
8157 spin_unlock(&root->fs_info->ordered_root_lock);
8158 }
8159
8160 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 8139 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
8161 &BTRFS_I(inode)->runtime_flags)) { 8140 &BTRFS_I(inode)->runtime_flags)) {
8162 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 8141 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8317,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8338 ret = 0; 8317 ret = 0;
8339 8318
8340 /* 8319 /*
8341 * we're using rename to replace one file with another. 8320 * we're using rename to replace one file with another. Start IO on it
8342 * and the replacement file is large. Start IO on it now so 8321 * now so we don't add too much work to the end of the transaction
8343 * we don't add too much work to the end of the transaction
8344 */ 8322 */
8345 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && 8323 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
8346 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
8347 filemap_flush(old_inode->i_mapping); 8324 filemap_flush(old_inode->i_mapping);
8348 8325
8349 /* close the racy window with snapshot create/destroy ioctl */ 8326 /* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8368,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8391 */ 8368 */
8392 btrfs_pin_log_trans(root); 8369 btrfs_pin_log_trans(root);
8393 } 8370 }
8394 /*
8395 * make sure the inode gets flushed if it is replacing
8396 * something.
8397 */
8398 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
8399 btrfs_add_ordered_operation(trans, root, old_inode);
8400 8371
8401 inode_inc_iversion(old_dir); 8372 inode_inc_iversion(old_dir);
8402 inode_inc_iversion(new_dir); 8373 inode_inc_iversion(new_dir);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14faa6c..963895c1f801 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode,
571 571
572 trace_btrfs_ordered_extent_remove(inode, entry); 572 trace_btrfs_ordered_extent_remove(inode, entry);
573 573
574 /*
575 * we have no more ordered extents for this inode and
576 * no dirty pages. We can safely remove it from the
577 * list of ordered extents
578 */
579 if (RB_EMPTY_ROOT(&tree->tree) &&
580 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
581 spin_lock(&root->fs_info->ordered_root_lock);
582 list_del_init(&BTRFS_I(inode)->ordered_operations);
583 spin_unlock(&root->fs_info->ordered_root_lock);
584 }
585
586 if (!root->nr_ordered_extents) { 574 if (!root->nr_ordered_extents) {
587 spin_lock(&root->fs_info->ordered_root_lock); 575 spin_lock(&root->fs_info->ordered_root_lock);
588 BUG_ON(list_empty(&root->ordered_root)); 576 BUG_ON(list_empty(&root->ordered_root));
@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
687} 675}
688 676
689/* 677/*
690 * this is used during transaction commit to write all the inodes
691 * added to the ordered operation list. These files must be fully on
692 * disk before the transaction commits.
693 *
694 * we have two modes here, one is to just start the IO via filemap_flush
695 * and the other is to wait for all the io. When we wait, we have an
696 * extra check to make sure the ordered operation list really is empty
697 * before we return
698 */
699int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
700 struct btrfs_root *root, int wait)
701{
702 struct btrfs_inode *btrfs_inode;
703 struct inode *inode;
704 struct btrfs_transaction *cur_trans = trans->transaction;
705 struct list_head splice;
706 struct list_head works;
707 struct btrfs_delalloc_work *work, *next;
708 int ret = 0;
709
710 INIT_LIST_HEAD(&splice);
711 INIT_LIST_HEAD(&works);
712
713 mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
714 spin_lock(&root->fs_info->ordered_root_lock);
715 list_splice_init(&cur_trans->ordered_operations, &splice);
716 while (!list_empty(&splice)) {
717 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
718 ordered_operations);
719 inode = &btrfs_inode->vfs_inode;
720
721 list_del_init(&btrfs_inode->ordered_operations);
722
723 /*
724 * the inode may be getting freed (in sys_unlink path).
725 */
726 inode = igrab(inode);
727 if (!inode)
728 continue;
729
730 if (!wait)
731 list_add_tail(&BTRFS_I(inode)->ordered_operations,
732 &cur_trans->ordered_operations);
733 spin_unlock(&root->fs_info->ordered_root_lock);
734
735 work = btrfs_alloc_delalloc_work(inode, wait, 1);
736 if (!work) {
737 spin_lock(&root->fs_info->ordered_root_lock);
738 if (list_empty(&BTRFS_I(inode)->ordered_operations))
739 list_add_tail(&btrfs_inode->ordered_operations,
740 &splice);
741 list_splice_tail(&splice,
742 &cur_trans->ordered_operations);
743 spin_unlock(&root->fs_info->ordered_root_lock);
744 ret = -ENOMEM;
745 goto out;
746 }
747 list_add_tail(&work->list, &works);
748 btrfs_queue_work(root->fs_info->flush_workers,
749 &work->work);
750
751 cond_resched();
752 spin_lock(&root->fs_info->ordered_root_lock);
753 }
754 spin_unlock(&root->fs_info->ordered_root_lock);
755out:
756 list_for_each_entry_safe(work, next, &works, list) {
757 list_del_init(&work->list);
758 btrfs_wait_and_free_delalloc_work(work);
759 }
760 mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
761 return ret;
762}
763
764/*
765 * Used to start IO or wait for a given ordered extent to finish. 678 * Used to start IO or wait for a given ordered extent to finish.
766 * 679 *
767 * If wait is one, this effectively waits on page writeback for all the pages 680 * If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1033,6 @@ out:
1120 return index; 1033 return index;
1121} 1034}
1122 1035
1123
1124/*
1125 * add a given inode to the list of inodes that must be fully on
1126 * disk before a transaction commit finishes.
1127 *
1128 * This basically gives us the ext3 style data=ordered mode, and it is mostly
1129 * used to make sure renamed files are fully on disk.
1130 *
1131 * It is a noop if the inode is already fully on disk.
1132 *
1133 * If trans is not null, we'll do a friendly check for a transaction that
1134 * is already flushing things and force the IO down ourselves.
1135 */
1136void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1137 struct btrfs_root *root, struct inode *inode)
1138{
1139 struct btrfs_transaction *cur_trans = trans->transaction;
1140 u64 last_mod;
1141
1142 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
1143
1144 /*
1145 * if this file hasn't been changed since the last transaction
1146 * commit, we can safely return without doing anything
1147 */
1148 if (last_mod <= root->fs_info->last_trans_committed)
1149 return;
1150
1151 spin_lock(&root->fs_info->ordered_root_lock);
1152 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
1153 list_add_tail(&BTRFS_I(inode)->ordered_operations,
1154 &cur_trans->ordered_operations);
1155 }
1156 spin_unlock(&root->fs_info->ordered_root_lock);
1157}
1158
1159int __init ordered_data_init(void) 1036int __init ordered_data_init(void)
1160{ 1037{
1161 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", 1038 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 246897058efb..d81a274d621e 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
190 struct btrfs_ordered_extent *ordered); 190 struct btrfs_ordered_extent *ordered);
191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
192 u32 *sum, int len); 192 u32 *sum, int len);
193int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root, int wait);
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root,
197 struct inode *inode);
198int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); 193int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
199void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); 194void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
200void btrfs_get_logged_extents(struct inode *inode, 195void btrfs_get_logged_extents(struct inode *inode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f9..b497498484be 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202 return ret; 1202 return ret;
1203} 1203}
1204
1205static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1206 struct btrfs_qgroup_operation *oper2)
1207{
1208 /*
1209 * Ignore seq and type here, we're looking for any operation
1210 * at all related to this extent on that root.
1211 */
1212 if (oper1->bytenr < oper2->bytenr)
1213 return -1;
1214 if (oper1->bytenr > oper2->bytenr)
1215 return 1;
1216 if (oper1->ref_root < oper2->ref_root)
1217 return -1;
1218 if (oper1->ref_root > oper2->ref_root)
1219 return 1;
1220 return 0;
1221}
1222
1223static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1224 struct btrfs_qgroup_operation *oper)
1225{
1226 struct rb_node *n;
1227 struct btrfs_qgroup_operation *cur;
1228 int cmp;
1229
1230 spin_lock(&fs_info->qgroup_op_lock);
1231 n = fs_info->qgroup_op_tree.rb_node;
1232 while (n) {
1233 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1234 cmp = comp_oper_exist(cur, oper);
1235 if (cmp < 0) {
1236 n = n->rb_right;
1237 } else if (cmp) {
1238 n = n->rb_left;
1239 } else {
1240 spin_unlock(&fs_info->qgroup_op_lock);
1241 return -EEXIST;
1242 }
1243 }
1244 spin_unlock(&fs_info->qgroup_op_lock);
1245 return 0;
1246}
1247
1204static int comp_oper(struct btrfs_qgroup_operation *oper1, 1248static int comp_oper(struct btrfs_qgroup_operation *oper1,
1205 struct btrfs_qgroup_operation *oper2) 1249 struct btrfs_qgroup_operation *oper2)
1206{ 1250{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1290 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1334 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1291 INIT_LIST_HEAD(&oper->elem.list); 1335 INIT_LIST_HEAD(&oper->elem.list);
1292 oper->elem.seq = 0; 1336 oper->elem.seq = 0;
1337
1338 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1339 /*
1340 * If any operation for this bytenr/ref_root combo
1341 * exists, then we know it's not exclusively owned and
1342 * shouldn't be queued up.
1343 *
1344 * This also catches the case where we have a cloned
1345 * extent that gets queued up multiple times during
1346 * drop snapshot.
1347 */
1348 if (qgroup_oper_exists(fs_info, oper)) {
1349 kfree(oper);
1350 return 0;
1351 }
1352 }
1353
1293 ret = insert_qgroup_oper(fs_info, oper); 1354 ret = insert_qgroup_oper(fs_info, oper);
1294 if (ret) { 1355 if (ret) {
1295 /* Shouldn't happen so have an assert for developers */ 1356 /* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@ out:
1884} 1945}
1885 1946
1886/* 1947/*
1948 * Process a reference to a shared subtree. This type of operation is
1949 * queued during snapshot removal when we encounter extents which are
1950 * shared between more than one root.
1951 */
1952static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1953 struct btrfs_fs_info *fs_info,
1954 struct btrfs_qgroup_operation *oper)
1955{
1956 struct ulist *roots = NULL;
1957 struct ulist_node *unode;
1958 struct ulist_iterator uiter;
1959 struct btrfs_qgroup_list *glist;
1960 struct ulist *parents;
1961 int ret = 0;
1962 int err;
1963 struct btrfs_qgroup *qg;
1964 u64 root_obj = 0;
1965 struct seq_list elem = {};
1966
1967 parents = ulist_alloc(GFP_NOFS);
1968 if (!parents)
1969 return -ENOMEM;
1970
1971 btrfs_get_tree_mod_seq(fs_info, &elem);
1972 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0)
1976 return ret;
1977
1978 if (roots->nnodes != 1)
1979 goto out;
1980
1981 ULIST_ITER_INIT(&uiter);
1982 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
1983 /*
1984 * If we find our ref root then that means all refs
1985 * this extent has to the root have not yet been
1986 * deleted. In that case, we do nothing and let the
1987 * last ref for this bytenr drive our update.
1988 *
1989 * This can happen for example if an extent is
1990 * referenced multiple times in a snapshot (clone,
1991 * etc). If we are in the middle of snapshot removal,
1992 * queued updates for such an extent will find the
1993 * root if we have not yet finished removing the
1994 * snapshot.
1995 */
1996 if (unode->val == oper->ref_root)
1997 goto out;
1998
1999 root_obj = unode->val;
2000 BUG_ON(!root_obj);
2001
2002 spin_lock(&fs_info->qgroup_lock);
2003 qg = find_qgroup_rb(fs_info, root_obj);
2004 if (!qg)
2005 goto out_unlock;
2006
2007 qg->excl += oper->num_bytes;
2008 qg->excl_cmpr += oper->num_bytes;
2009 qgroup_dirty(fs_info, qg);
2010
2011 /*
2012 * Adjust counts for parent groups. First we find all
2013 * parents, then in the 2nd loop we do the adjustment
2014 * while adding parents of the parents to our ulist.
2015 */
2016 list_for_each_entry(glist, &qg->groups, next_group) {
2017 err = ulist_add(parents, glist->group->qgroupid,
2018 ptr_to_u64(glist->group), GFP_ATOMIC);
2019 if (err < 0) {
2020 ret = err;
2021 goto out_unlock;
2022 }
2023 }
2024
2025 ULIST_ITER_INIT(&uiter);
2026 while ((unode = ulist_next(parents, &uiter))) {
2027 qg = u64_to_ptr(unode->aux);
2028 qg->excl += oper->num_bytes;
2029 qg->excl_cmpr += oper->num_bytes;
2030 qgroup_dirty(fs_info, qg);
2031
2032 /* Add any parents of the parents */
2033 list_for_each_entry(glist, &qg->groups, next_group) {
2034 err = ulist_add(parents, glist->group->qgroupid,
2035 ptr_to_u64(glist->group), GFP_ATOMIC);
2036 if (err < 0) {
2037 ret = err;
2038 goto out_unlock;
2039 }
2040 }
2041 }
2042
2043out_unlock:
2044 spin_unlock(&fs_info->qgroup_lock);
2045
2046out:
2047 ulist_free(roots);
2048 ulist_free(parents);
2049 return ret;
2050}
2051
2052/*
1887 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2053 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1888 * from the fs. First, all roots referencing the extent are searched, and 2054 * from the fs. First, all roots referencing the extent are searched, and
1889 * then the space is accounted accordingly to the different roots. The 2055 * then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1920 case BTRFS_QGROUP_OPER_SUB_SHARED: 2086 case BTRFS_QGROUP_OPER_SUB_SHARED:
1921 ret = qgroup_shared_accounting(trans, fs_info, oper); 2087 ret = qgroup_shared_accounting(trans, fs_info, oper);
1922 break; 2088 break;
2089 case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2090 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2091 break;
1923 default: 2092 default:
1924 ASSERT(0); 2093 ASSERT(0);
1925 } 2094 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7a..18cc68ca3090 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
44 BTRFS_QGROUP_OPER_ADD_SHARED, 44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL, 45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED, 46 BTRFS_QGROUP_OPER_SUB_SHARED,
47 BTRFS_QGROUP_OPER_SUB_SUBTREE,
47}; 48};
48 49
49struct btrfs_qgroup_operation { 50struct btrfs_qgroup_operation {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67b48b9a03e0..c4124de4435b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1665,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1665 return 0; 1665 return 0;
1666} 1666}
1667 1667
1668/*
1669 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
1670 *
1671 * If there's a redundant raid level at DATA block groups, use the respective
1672 * multiplier to scale the sizes.
1673 *
1674 * Unused device space usage is based on simulating the chunk allocator
1675 * algorithm that respects the device sizes, order of allocations and the
1676 * 'alloc_start' value, this is a close approximation of the actual use but
1677 * there are other factors that may change the result (like a new metadata
1678 * chunk).
1679 *
1680 * FIXME: not accurate for mixed block groups, total and free/used are ok,
1681 * available appears slightly larger.
1682 */
1668static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1683static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1669{ 1684{
1670 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); 1685 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1675,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1675 u64 total_free_data = 0; 1690 u64 total_free_data = 0;
1676 int bits = dentry->d_sb->s_blocksize_bits; 1691 int bits = dentry->d_sb->s_blocksize_bits;
1677 __be32 *fsid = (__be32 *)fs_info->fsid; 1692 __be32 *fsid = (__be32 *)fs_info->fsid;
1693 unsigned factor = 1;
1694 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
1678 int ret; 1695 int ret;
1679 1696
1680 /* holding chunk_muext to avoid allocating new chunks */ 1697 /* holding chunk_muext to avoid allocating new chunks */
@@ -1682,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1682 rcu_read_lock(); 1699 rcu_read_lock();
1683 list_for_each_entry_rcu(found, head, list) { 1700 list_for_each_entry_rcu(found, head, list) {
1684 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 1701 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1702 int i;
1703
1685 total_free_data += found->disk_total - found->disk_used; 1704 total_free_data += found->disk_total - found->disk_used;
1686 total_free_data -= 1705 total_free_data -=
1687 btrfs_account_ro_block_groups_free_space(found); 1706 btrfs_account_ro_block_groups_free_space(found);
1707
1708 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1709 if (!list_empty(&found->block_groups[i])) {
1710 switch (i) {
1711 case BTRFS_RAID_DUP:
1712 case BTRFS_RAID_RAID1:
1713 case BTRFS_RAID_RAID10:
1714 factor = 2;
1715 }
1716 }
1717 }
1688 } 1718 }
1689 1719
1690 total_used += found->disk_used; 1720 total_used += found->disk_used;
1691 } 1721 }
1722
1692 rcu_read_unlock(); 1723 rcu_read_unlock();
1693 1724
1694 buf->f_namelen = BTRFS_NAME_LEN; 1725 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
1695 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1726 buf->f_blocks >>= bits;
1696 buf->f_bfree = buf->f_blocks - (total_used >> bits); 1727 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
1697 buf->f_bsize = dentry->d_sb->s_blocksize; 1728
1698 buf->f_type = BTRFS_SUPER_MAGIC; 1729 /* Account global block reserve as used, it's in logical size already */
1730 spin_lock(&block_rsv->lock);
1731 buf->f_bfree -= block_rsv->size >> bits;
1732 spin_unlock(&block_rsv->lock);
1733
1699 buf->f_bavail = total_free_data; 1734 buf->f_bavail = total_free_data;
1700 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); 1735 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
1701 if (ret) { 1736 if (ret) {
1702 mutex_unlock(&fs_info->chunk_mutex); 1737 mutex_unlock(&fs_info->chunk_mutex);
1703 return ret; 1738 return ret;
1704 } 1739 }
1705 buf->f_bavail += total_free_data; 1740 buf->f_bavail += div_u64(total_free_data, factor);
1706 buf->f_bavail = buf->f_bavail >> bits; 1741 buf->f_bavail = buf->f_bavail >> bits;
1707 mutex_unlock(&fs_info->chunk_mutex); 1742 mutex_unlock(&fs_info->chunk_mutex);
1708 1743
1744 buf->f_type = BTRFS_SUPER_MAGIC;
1745 buf->f_bsize = dentry->d_sb->s_blocksize;
1746 buf->f_namelen = BTRFS_NAME_LEN;
1747
1709 /* We treat it as constant endianness (it doesn't matter _which_) 1748 /* We treat it as constant endianness (it doesn't matter _which_)
1710 because we want the fsid to come out the same whether mounted 1749 because we want the fsid to come out the same whether mounted
1711 on a big-endian or little-endian host */ 1750 on a big-endian or little-endian host */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379affdf23..d89c6d3542ca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -218,7 +218,6 @@ loop:
218 spin_lock_init(&cur_trans->delayed_refs.lock); 218 spin_lock_init(&cur_trans->delayed_refs.lock);
219 219
220 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 220 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
221 INIT_LIST_HEAD(&cur_trans->ordered_operations);
222 INIT_LIST_HEAD(&cur_trans->pending_chunks); 221 INIT_LIST_HEAD(&cur_trans->pending_chunks);
223 INIT_LIST_HEAD(&cur_trans->switch_commits); 222 INIT_LIST_HEAD(&cur_trans->switch_commits);
224 list_add_tail(&cur_trans->list, &fs_info->trans_list); 223 list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1612 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1611 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1613} 1612}
1614 1613
1615static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1616 struct btrfs_root *root)
1617{
1618 int ret;
1619
1620 ret = btrfs_run_delayed_items(trans, root);
1621 if (ret)
1622 return ret;
1623
1624 /*
1625 * rename don't use btrfs_join_transaction, so, once we
1626 * set the transaction to blocked above, we aren't going
1627 * to get any new ordered operations. We can safely run
1628 * it here and no for sure that nothing new will be added
1629 * to the list
1630 */
1631 ret = btrfs_run_ordered_operations(trans, root, 1);
1632
1633 return ret;
1634}
1635
1636static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1614static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1637{ 1615{
1638 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1616 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1653 struct btrfs_transaction *prev_trans = NULL; 1631 struct btrfs_transaction *prev_trans = NULL;
1654 int ret; 1632 int ret;
1655 1633
1656 ret = btrfs_run_ordered_operations(trans, root, 0);
1657 if (ret) {
1658 btrfs_abort_transaction(trans, root, ret);
1659 btrfs_end_transaction(trans, root);
1660 return ret;
1661 }
1662
1663 /* Stop the commit early if ->aborted is set */ 1634 /* Stop the commit early if ->aborted is set */
1664 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { 1635 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1665 ret = cur_trans->aborted; 1636 ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1740 if (ret) 1711 if (ret)
1741 goto cleanup_transaction; 1712 goto cleanup_transaction;
1742 1713
1743 ret = btrfs_flush_all_pending_stuffs(trans, root); 1714 ret = btrfs_run_delayed_items(trans, root);
1744 if (ret) 1715 if (ret)
1745 goto cleanup_transaction; 1716 goto cleanup_transaction;
1746 1717
@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1748 extwriter_counter_read(cur_trans) == 0); 1719 extwriter_counter_read(cur_trans) == 0);
1749 1720
1750 /* some pending stuffs might be added after the previous flush. */ 1721 /* some pending stuffs might be added after the previous flush. */
1751 ret = btrfs_flush_all_pending_stuffs(trans, root); 1722 ret = btrfs_run_delayed_items(trans, root);
1752 if (ret) 1723 if (ret)
1753 goto cleanup_transaction; 1724 goto cleanup_transaction;
1754 1725
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558ed0716..579be51b27e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -55,7 +55,6 @@ struct btrfs_transaction {
55 wait_queue_head_t writer_wait; 55 wait_queue_head_t writer_wait;
56 wait_queue_head_t commit_wait; 56 wait_queue_head_t commit_wait;
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head ordered_operations;
59 struct list_head pending_chunks; 58 struct list_head pending_chunks;
60 struct list_head switch_commits; 59 struct list_head switch_commits;
61 struct btrfs_delayed_ref_root delayed_refs; 60 struct btrfs_delayed_ref_root delayed_refs;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf5cf41..4c29db604bbe 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); 57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
59 u64 *old_aux, gfp_t gfp_mask); 59 u64 *old_aux, gfp_t gfp_mask);
60
61/* just like ulist_add_merge() but take a pointer for the aux data */
62static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
63 void **old_aux, gfp_t gfp_mask)
64{
65#if BITS_PER_LONG == 32
66 u64 old64 = (uintptr_t)*old_aux;
67 int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
68 *old_aux = (void *)((uintptr_t)old64);
69 return ret;
70#else
71 return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
72#endif
73}
74
60struct ulist_node *ulist_next(struct ulist *ulist, 75struct ulist_node *ulist_next(struct ulist *ulist,
61 struct ulist_iterator *uiter); 76 struct ulist_iterator *uiter);
62 77